# Add items for human feedback

![Pipeline](img/generative_ai_pipeline_rlhf_plus.png)

![RLHF](img/rlhf_summarization.png)

In [2]:
import psutil

notebook_memory = psutil.virtual_memory()
print(notebook_memory)

if notebook_memory.total < 32 * 1000 * 1000 * 1000:
    print('*******************************************')    
    print('YOU ARE NOT USING THE CORRECT INSTANCE TYPE')
    print('PLEASE CHANGE INSTANCE TYPE TO  m5.2xlarge ')
    print('*******************************************')
else:
    correct_instance_type=True

svmem(total=4059807744, available=2247741440, percent=44.6, used=1569533952, free=2090758144, active=1614114816, inactive=122601472, buffers=0, cached=399515648, shared=1220608, slab=130486272)
*******************************************
YOU ARE NOT USING THE CORRECT INSTANCE TYPE
PLEASE CHANGE INSTANCE TYPE TO  m5.2xlarge 
*******************************************


In [3]:
import boto3
import sagemaker
import pandas as pd

sess = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

In [4]:
import io
import json
import uuid
import time
import boto3
import botocore

# Amazon Python SDK clients
sagemaker = boto3.client("sagemaker", region)
a2i = boto3.client("sagemaker-a2i-runtime")
s3 = boto3.client("s3", region)

# Retrieve the `augmented_ai_flow_definition_arn` Created Previously

In [5]:
%store -r augmented_ai_flow_definition_arn

In [6]:
print(augmented_ai_flow_definition_arn)

arn:aws:sagemaker:us-east-1:079002598131:flow-definition/fd-ranking-baeb7ab3-ea6f-418c-b63b-2b646f15c80d


In [7]:
items = [
    {"prompt": "Antje:  Will you write another O'Reilly book with me, Chris?\nChris:  Yes, let's start 3 months from now.\nAntje:  OK.\n",
     "responses": ["Chris and Antje will write another O'Reilly book starting in 3 months.", "Antje will write the book by herself right now."]},
    {"prompt": "Shelbee:  The wine will arrive in 10 minutes.\nChris:  Can you drink the wine?  I leave for the airport in 5 minutes.\nShelbee:  OK, I will.\n", 
     "responses": ["Chris drank the wine in 5 minutes.", "Shelbee drank the wine in 10 minutes."]}
]

In [8]:
human_loops_started = []

for item in items:
    print(f'Processing item: "{item}"')

    humanLoopName = str(uuid.uuid4())
    inputContent = {"taskObject": item}
    start_loop_response = a2i.start_human_loop(
        HumanLoopName=humanLoopName,
        FlowDefinitionArn=augmented_ai_flow_definition_arn,
        HumanLoopInput={"InputContent": json.dumps(inputContent)},
    )

    human_loops_started.append(humanLoopName)

    print(f"*** ==> Starting human loop with name: {humanLoopName}  \n")

Processing item: "{'prompt': "Antje:  Will you write another O'Reilly book with me, Chris?\nChris:  Yes, let's start 3 months from now.\nAntje:  OK.\n", 'responses': ["Chris and Antje will write another O'Reilly book starting in 3 months.", 'Antje will write the book by herself right now.']}"
*** ==> Starting human loop with name: 088017b6-3740-4059-b260-7d5b0446ed2f  

Processing item: "{'prompt': 'Shelbee:  The wine will arrive in 10 minutes.\nChris:  Can you drink the wine?  I leave for the airport in 5 minutes.\nShelbee:  OK, I will.\n', 'responses': ['Chris drank the wine in 5 minutes.', 'Shelbee drank the wine in 10 minutes.']}"
*** ==> Starting human loop with name: db20ad98-5211-48c1-8fb0-a74872ccc3b2  



In [9]:
%store human_loops_started

Stored 'human_loops_started' (list)


# Check Status of Human Loop

In [10]:
completed_human_loops = []
for human_loop_name in human_loops_started:
    resp = a2i.describe_human_loop(HumanLoopName=human_loop_name)
    print(f"HumanLoop Name: {human_loop_name}")
    print(f'HumanLoop Status: {resp["HumanLoopStatus"]}')
    print(f'HumanLoop Output Destination: {resp["HumanLoopOutput"]}')
    print("")

    if resp["HumanLoopStatus"] == "Completed":
        completed_human_loops.append(resp)

HumanLoop Name: 088017b6-3740-4059-b260-7d5b0446ed2f
HumanLoop Status: InProgress
HumanLoop Output Destination: {'OutputS3Uri': 's3://sagemaker-us-east-1-079002598131/ground-truth-star-rating-results/fd-ranking-baeb7ab3-ea6f-418c-b63b-2b646f15c80d/2023/04/21/21/05/37/088017b6-3740-4059-b260-7d5b0446ed2f/output.json'}

HumanLoop Name: db20ad98-5211-48c1-8fb0-a74872ccc3b2
HumanLoop Status: InProgress
HumanLoop Output Destination: {'OutputS3Uri': 's3://sagemaker-us-east-1-079002598131/ground-truth-star-rating-results/fd-ranking-baeb7ab3-ea6f-418c-b63b-2b646f15c80d/2023/04/21/21/05/37/db20ad98-5211-48c1-8fb0-a74872ccc3b2/output.json'}



# Wait For Workers to Complete Their Human Loop Tasks

Navigate to the link below and login with your email and password that you used when you set up the Private Workforce.

In [11]:
%store -r augmented_ai_workteam_arn

In [12]:
print(augmented_ai_workteam_arn)

arn:aws:sagemaker:us-east-1:079002598131:workteam/private-crowd/dsoaws


In [13]:
workteam_name = augmented_ai_workteam_arn[augmented_ai_workteam_arn.rfind("/") + 1 :]
print(workteam_name)
print("Navigate to the private worker portal and complete the human loop.")
print("Make sure you have invited yourself to the workteam and received the signup email.")
print("Note:  Check your spam filter if you have not received the email.")
print("")
print("https://" + sagemaker.describe_workteam(WorkteamName=workteam_name)["Workteam"]["SubDomain"])

dsoaws
Navigate to the private worker portal and complete the human loop.
Make sure you have invited yourself to the workteam and received the signup email.
Note:  Check your spam filter if you have not received the email.

https://d0xvtcio9i.labeling.us-east-1.sagemaker.aws


# _YOU MUST LABEL THE DATA BY CLICKING THE LINK ABOVE BEFORE CONTINUING!!_

# Start Labeling

<img src="img/augmented-comprehend-custom-start-working.png" width="80%" align="left">

# Select Label

<img src="img/augmented-comprehend-custom-select-label.png" width="80%" align="left">

# Loop is Completed

<img src="img/augmented-comprehend-custom-finished-task.png" width="80%" align="left">

# Verify the Human Loops are Completed

In [14]:
workteam_name = augmented_ai_workteam_arn[augmented_ai_workteam_arn.rfind("/") + 1 :]
print(workteam_name)
print("Navigate to the private worker portal and complete the human loop.")
print("Make sure you have invited yourself to the workteam and received the signup email.")
print("Note:  Check your spam filter if you have not received the email.")
print("")
print("https://" + sagemaker.describe_workteam(WorkteamName=workteam_name)["Workteam"]["SubDomain"])

dsoaws
Navigate to the private worker portal and complete the human loop.
Make sure you have invited yourself to the workteam and received the signup email.
Note:  Check your spam filter if you have not received the email.

https://d0xvtcio9i.labeling.us-east-1.sagemaker.aws


In [24]:
import time

completed_human_loops = []
for human_loop_name in human_loops_started:
    resp = a2i.describe_human_loop(HumanLoopName=human_loop_name)
    print(f"HumanLoop Name: {human_loop_name}")
    print(f'HumanLoop Status: {resp["HumanLoopStatus"]}')
    print(f'HumanLoop Output Destination: {resp["HumanLoopOutput"]}')
    print("")
    while resp["HumanLoopStatus"] != "Completed":
        print(f"Waiting for HumanLoop to complete.")
        time.sleep(10)
        resp = a2i.describe_human_loop(HumanLoopName=human_loop_name)
    if resp["HumanLoopStatus"] == "Completed":
        completed_human_loops.append(resp)
        print(f"Completed!")
        print("")

HumanLoop Name: 088017b6-3740-4059-b260-7d5b0446ed2f
HumanLoop Status: Completed
HumanLoop Output Destination: {'OutputS3Uri': 's3://sagemaker-us-east-1-079002598131/ground-truth-star-rating-results/fd-ranking-baeb7ab3-ea6f-418c-b63b-2b646f15c80d/2023/04/21/21/05/37/088017b6-3740-4059-b260-7d5b0446ed2f/output.json'}

Completed!

HumanLoop Name: db20ad98-5211-48c1-8fb0-a74872ccc3b2
HumanLoop Status: Completed
HumanLoop Output Destination: {'OutputS3Uri': 's3://sagemaker-us-east-1-079002598131/ground-truth-star-rating-results/fd-ranking-baeb7ab3-ea6f-418c-b63b-2b646f15c80d/2023/04/21/21/05/37/db20ad98-5211-48c1-8fb0-a74872ccc3b2/output.json'}

Completed!



# View Human Labels  

Once the work is complete, Amazon GroundTruth stores the results in the specified S3 bucket and sends a Cloudwatch Event.  Here is a sample item labeled with GroundTruth in `jsonlines` format:
```
{
 "inputContent": {
     "taskObject": {
         "prompt": "Antje:  Will you write another O'Reilly book with me, Chris?\nChris:  Yes, let's start 3 months from now.\nAntje:  OK.\n",
            "responses": [
                "Chris and Antje will write another O'Reilly book starting in 3 months.", 
                "Antje will write the book by herself right now."
            ]
        },
    },
    "humanAnswers": [{
        "answerContent": {
            "ranking_1": "1", # ranking for 1st response (1 is High)
            "ranking_2": "2"  # ranking for 2nd response (2 is Low)
        }
    }]
}
```

# Prepare human-labeled data for RL/PPO training
Retrieve from GrountTruth and convert to a binary reward (0 or 1) for all rankings as shown below.

TODO:  Convert this into markdown table

From this:
```
prompt                     response       ranking

Antje:  Will you write...    Chris and Antje...   2   # High
Antje:  Will you write...    Antje will....       1   # Low
```

To this:
```
prompt                     response       ranking

Antje:  Will you write...    Chris and Antje...   0   # High
Antje:  Will you write...    Antje will....       1   # Low
```

To this:
```
prompt                     response                                               reward

Antje:  Will you write...    ["Chris and Antje...", "Antje will..."]     [1,0] # 0-index is rewarded
```

# _Note:  If nothing is showing up below, you need to return to finish the previous notebook by labeling the data in Ground Truth!!_

In [25]:
import re
from pprint import pprint

human_feedback_items = []

for resp in completed_human_loops:
    human_feedback_s3_uri = resp["HumanLoopOutput"]["OutputS3Uri"]
    split_string = re.split("s3://" + bucket + "/", resp["HumanLoopOutput"]["OutputS3Uri"])
    key = split_string[1]
    
    response = s3.get_object(Bucket=bucket, Key=key)
    content = response["Body"].read().decode("utf-8")
    json_output = json.loads(content)

    prompt = json_output["inputContent"]['taskObject']['prompt']
    responses = json_output["inputContent"]['taskObject']['responses']
    response_1_ranking = json_output["humanAnswers"][0]["answerContent"]['response_1_ranking']
    response_2_ranking = json_output["humanAnswers"][0]["answerContent"]['response_2_ranking']
    
    human_feedback_item_1 = (prompt, responses[0], response_1_ranking)
    human_feedback_items.append(human_feedback_item_1)
    human_feedback_item_2 = (prompt, responses[1], response_2_ranking)
    human_feedback_items.append(human_feedback_item_2)

In [26]:
df_human_feedback_items = pd.DataFrame(human_feedback_items, columns=['prompt', 'response', 'ranking'])
df_human_feedback_items.head(10)

Unnamed: 0,prompt,response,ranking
0,Antje: Will you write another O'Reilly book w...,Chris and Antje will write another O'Reilly bo...,1
1,Antje: Will you write another O'Reilly book w...,Antje will write the book by herself right now.,2
2,Shelbee: The wine will arrive in 10 minutes.\...,Chris drank the wine in 5 minutes.,2
3,Shelbee: The wine will arrive in 10 minutes.\...,Shelbee drank the wine in 10 minutes.,1


# Convert ranking into 0 or 1 reward

In [27]:
num_rankings = 2
df_human_feedback_items['response'] = df_human_feedback_items['response'].apply(lambda response: str(response))
df_human_feedback_items['reward'] = df_human_feedback_items['ranking'].apply(lambda ranking: str(abs(int(ranking) - num_rankings)))
df_human_feedback_items.head(10)

Unnamed: 0,prompt,response,ranking,reward
0,Antje: Will you write another O'Reilly book w...,Chris and Antje will write another O'Reilly bo...,1,1
1,Antje: Will you write another O'Reilly book w...,Antje will write the book by herself right now.,2,0
2,Shelbee: The wine will arrive in 10 minutes.\...,Chris drank the wine in 5 minutes.,2,0
3,Shelbee: The wine will arrive in 10 minutes.\...,Shelbee drank the wine in 10 minutes.,1,1


In [28]:
df_human_feedback_items_grouped_by_prompt = df_human_feedback_items.groupby('prompt', as_index=False).agg({'prompt' : 'first', 'response' : ','.join, 'reward' : ','.join})
df_human_feedback_items_grouped_by_prompt

Unnamed: 0,prompt,response,reward
0,Antje: Will you write another O'Reilly book w...,Chris and Antje will write another O'Reilly bo...,10
1,Shelbee: The wine will arrive in 10 minutes.\...,"Chris drank the wine in 5 minutes.,Shelbee dra...",1


In [29]:
df_human_feedback_items_grouped_by_prompt['response'] = df_human_feedback_items_grouped_by_prompt['response'].apply(lambda response: [s for s in response.split(',')])
df_human_feedback_items_grouped_by_prompt['reward'] = df_human_feedback_items_grouped_by_prompt['reward'].apply(lambda reward: [int(s) for s in reward.split(',')])
df_human_feedback_items_grouped_by_prompt

Unnamed: 0,prompt,response,reward
0,Antje: Will you write another O'Reilly book w...,[Chris and Antje will write another O'Reilly b...,"[1, 0]"
1,Shelbee: The wine will arrive in 10 minutes.\...,"[Chris drank the wine in 5 minutes., Shelbee d...","[0, 1]"


In [30]:
from datasets import Dataset

# Create Dataset objects (Arrow PyTables) from Pandas dataframes
human_feedback_dataset = Dataset.from_pandas(df_human_feedback_items_grouped_by_prompt)
human_feedback_dataset

Dataset({
    features: ['prompt', 'response', 'reward'],
    num_rows: 2
})

In [31]:
%store human_feedback_dataset

Stored 'human_feedback_dataset' (Dataset)


# _YOU MUST LABEL THE DATA BY CLICKING THE LINK ABOVE BEFORE CONTINUING!!_

In [23]:
# %%html

# <p><b>Shutting down your kernel for this notebook to release resources.</b></p>
# <button class="sm-command-button" data-commandlinker-command="kernelmenu:shutdown" style="display:none;">Shutdown Kernel</button>

# <script>
# try {
#     els = document.getElementsByClassName("sm-command-button");
#     els[0].click();
# }
# catch(err) {
#     // NoOp
# }
# </script>