## Deploy platypus-70b-instruct model

In [2]:
!pip install "sagemaker>=2.175.0" --upgrade --quiet

[0m

In [5]:
ROLE = "arn:aws:iam::111039177464:role/gdmlml-custom-us-west-2-cerbo-role" 

In [3]:
from sagemaker.huggingface import get_huggingface_llm_image_uri

# retrieve the llm image uri
llm_image = get_huggingface_llm_image_uri(
  "huggingface",
  version="0.9.3"
)

# print ecr image uri
print(f"llm image uri: {llm_image}")



llm image uri: 763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.0.1-tgi0.9.3-gpu-py39-cu118-ubuntu20.04


In [7]:
import json
from sagemaker.huggingface import HuggingFaceModel

# sagemaker config
instance_type = "ml.g5.48xlarge"
number_of_gpu = 8
health_check_timeout = 1800

# Define Model and Endpoint configuration parameter
config = {
  'HF_MODEL_ID': "garage-bAInd/Platypus2-70B-instruct", # model_id from hf.co/models
  'SM_NUM_GPUS': json.dumps(number_of_gpu), # Number of GPU used per replica
  'MAX_INPUT_LENGTH': json.dumps(2048),  # Max length of input text
  'MAX_TOTAL_TOKENS': json.dumps(4096),  # Max length of the generation (including input text)
  'MAX_BATCH_TOTAL_TOKENS': json.dumps(8192),  # Limits the number of tokens that can be processed in parallel during the generation
  # 'HUGGING_FACE_HUB_TOKEN': "<REPLACE WITH YOUR TOKEN>"
  # # ,'HF_MODEL_QUANTIZE': "bitsandbytes", # comment in to quantize
}

# # check if token is set
# assert config['HUGGING_FACE_HUB_TOKEN'] != "<REPLACE WITH YOUR TOKEN>", "Please set your Hugging Face Hub token"

# create HuggingFaceModel with the image uri
llm_model = HuggingFaceModel(
  role=ROLE,
  image_uri=llm_image,
  env=config
)

In [8]:
# Deploy model to an endpoint
# https://sagemaker.readthedocs.io/en/stable/api/inference/model.html#sagemaker.model.Model.deploy
llm = llm_model.deploy(
  initial_instance_count=1,
  instance_type=instance_type,
  container_startup_health_check_timeout=health_check_timeout, # 10 minutes to be able to load the model
)

----------------------------!

## Install and import packages

In [10]:
%pip install --upgrade --quiet langchain
%pip install --upgrade --quiet openai

[0mNote: you may need to restart the kernel to use updated packages.
[0mNote: you may need to restart the kernel to use updated packages.


In [11]:
%pip uninstall --quiet typing_extensions --yes
%pip install --upgrade --quiet typing_extensions

[0mNote: you may need to restart the kernel to use updated packages.
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
spyder 5.3.3 requires pyqt5<5.16, which is not installed.
spyder 5.3.3 requires pyqtwebengine<5.16, which is not installed.
panel 0.13.1 requires bokeh<2.5.0,>=2.4.0, but you have bokeh 3.2.1 which is incompatible.
spyder 5.3.3 requires ipython<8.0.0,>=7.31.1, but you have ipython 8.14.0 which is incompatible.
spyder 5.3.3 requires pylint<3.0,>=2.5.0, but you have pylint 3.0.0a6 which is incompatible.[0m[31m
[0mNote: you may need to restart the kernel to use updated packages.


In [12]:
import openai
import pandas as pd
import time
# from dotenv import load_dotenv
from langchain.chat_models import ChatAnyscale, ChatOpenAI
from langchain.schema import SystemMessage, HumanMessage
import os
# from multichoice import Multichoice
from prompt_mgr import PromptMgr

## Read input dataset and create prompt

In [13]:
df = pd.read_json('resources/datasets/val_sentence_pairs.json')
df.head()

Unnamed: 0,article_id,article_sent,correct_sent,correct_sent_id,incorrect_sent,incorrect_sent_id
0,67c3b0a2da09ce32a83c098da1e0aff034e790d0,the abc have reported that those who receive c...,those who receive centrelink payments made up ...,84-0,the abc have reported that those who receive c...,40-0
1,7c4ae3beb354e0a9f11fa09f8fbb6613a50db5cc,five ambitious clubs are locked in a scramble ...,five ambitious clubs are locked in a scramble ...,44-1,five ambitious clubs are locked in a bid for t...,17-1
2,7e37b3e3623ae0ab6568e0e73584b16fae0cc00e,but it wasn't until last year that the 25 year...,the 25 year old from pennsylvania went on a le...,45-1,the 25 year old from pennsylvania went viral o...,4-1
3,cdbfba47929b459daf474a1a3dee37d2351729ea,seven games involving nimes were investigated ...,seven games involving nimes were investigated ...,93-2,seven games involving nimes were arrested last...,83-2
4,696ac804af5fcab8853c583f5c97e1eda6e2493a,the driver's side of the windscreen immediatel...,driver's side of the windscreen immediately sh...,61-2,driver's side were immediately shatters and fa...,42-2


In [14]:
pm = PromptMgr(src_dir = 'resources/environments/answer_first/prompts')

In [33]:
def query_model(row, sm_model_predictor, prompt_mgr, swap_answers = False):
    if swap_answers: 
        prompt = prompt_mgr.bind('consistent').render(
                    article_sent=row['article_sent'], 
                    option_a=row['incorrect_sent'],
                    option_b=row['correct_sent'])
    else: 
        prompt = prompt_mgr.bind('consistent').render(
                    article_sent=row['article_sent'], 
                    option_a=row['correct_sent'],
                    option_b=row['incorrect_sent'])

    system_prompt = prompt_mgr.bind('system').render()
    
    payload = {
        "inputs": f"### Instruction:\n\n{system_prompt}\n{prompt}\n\n### Response:"
    }
    
    model_predictor = sm_model_predictor
    
    response = model_predictor.predict(payload)
    
    output = response
    return {'output': output[0]['generated_text']} 

In [39]:
# Create an empty list to store the results
results_list = []

# Iterate over each row in the DataFrame
for i in range(len(df)):
    result = query_model(df.loc[i], llm, pm, swap_answers=True)
    results_list.append(result)

# Create a new DataFrame from the results list
results_df = pd.DataFrame(results_list)

In [40]:
results_df.loc[0]['output']

"### Instruction:\n\nYou are an assistant that helps determine the factuality of statements. Being precise, factual and following the user's instructions is important for this task. \nIt is acceptable to answer in incomplete sentences e.g. just say 'x' and not 'The correct answer is x'. \n\nDecide which of the following summary is more consistent with the article\nsentence. \n\nNote that consistency means\nall information in the summary is supported by the article.\n\nArticle Sentence: the abc have reported that those who receive centrelink payments made up half of radio rental's income last year.\nSummary A: the abc have reported that those who receive centrelink payments made up radio rental's income last year.\nSummary B: those who receive centrelink payments made up half of radio rental's income last year.\n\nAnswer (A or B):\n\n\n### Response:\nB"

In [41]:
results_df.to_json('Platypus-70b-instruct-output-swapped.json')

In [42]:
# Create an empty list to store the results
results_list_no_swap = []

# Iterate over each row in the DataFrame
for i in range(len(df)):
    result = query_model(df.loc[i], llm, pm, swap_answers=False)
    results_list_no_swap.append(result)

# Create a new DataFrame from the results list
results_df_no_swap = pd.DataFrame(results_list_no_swap)

In [43]:
results_df_no_swap.loc[0]['output']

"### Instruction:\n\nYou are an assistant that helps determine the factuality of statements. Being precise, factual and following the user's instructions is important for this task. \nIt is acceptable to answer in incomplete sentences e.g. just say 'x' and not 'The correct answer is x'. \n\nDecide which of the following summary is more consistent with the article\nsentence. \n\nNote that consistency means\nall information in the summary is supported by the article.\n\nArticle Sentence: the abc have reported that those who receive centrelink payments made up half of radio rental's income last year.\nSummary A: those who receive centrelink payments made up half of radio rental's income last year.\nSummary B: the abc have reported that those who receive centrelink payments made up radio rental's income last year.\n\nAnswer (A or B):\n\n\n### Response:\nA"

In [44]:
results_df_no_swap.to_json('Platypus-70b-instruct-output-no-swap.json')

## Postprocessing and analysis

In [45]:
df = pd.read_json('resources/datasets/val_sentence_pairs.json')
df.head()

Unnamed: 0,article_id,article_sent,correct_sent,correct_sent_id,incorrect_sent,incorrect_sent_id
0,67c3b0a2da09ce32a83c098da1e0aff034e790d0,the abc have reported that those who receive c...,those who receive centrelink payments made up ...,84-0,the abc have reported that those who receive c...,40-0
1,7c4ae3beb354e0a9f11fa09f8fbb6613a50db5cc,five ambitious clubs are locked in a scramble ...,five ambitious clubs are locked in a scramble ...,44-1,five ambitious clubs are locked in a bid for t...,17-1
2,7e37b3e3623ae0ab6568e0e73584b16fae0cc00e,but it wasn't until last year that the 25 year...,the 25 year old from pennsylvania went on a le...,45-1,the 25 year old from pennsylvania went viral o...,4-1
3,cdbfba47929b459daf474a1a3dee37d2351729ea,seven games involving nimes were investigated ...,seven games involving nimes were investigated ...,93-2,seven games involving nimes were arrested last...,83-2
4,696ac804af5fcab8853c583f5c97e1eda6e2493a,the driver's side of the windscreen immediatel...,driver's side of the windscreen immediately sh...,61-2,driver's side were immediately shatters and fa...,42-2


In [70]:
input_df = df

In [71]:
Platypus_70b_instruct_swapped_df  = pd.read_json('Platypus-70b-instruct-output-swapped.json')
Platypus_70b_instruct_no_swap_df  = pd.read_json('Platypus-70b-instruct-output-no-swap.json')

In [72]:
Platypus_70b_instruct_swapped_df.rename(columns={'output': 'Platypus_70b_instruct_swapped_output'}, inplace=True)
Platypus_70b_instruct_no_swap_df.rename(columns={'output': 'Platypus_70b_instruct_no_swap_output'}, inplace=True)

In [73]:
combined_df = pd.concat([input_df, 
                         Platypus_70b_instruct_swapped_df, 
                         Platypus_70b_instruct_no_swap_df,], axis=1)

In [74]:
combined_df.head()

Unnamed: 0,article_id,article_sent,correct_sent,correct_sent_id,incorrect_sent,incorrect_sent_id,Platypus_70b_instruct_swapped_output,Platypus_70b_instruct_no_swap_output
0,67c3b0a2da09ce32a83c098da1e0aff034e790d0,the abc have reported that those who receive c...,those who receive centrelink payments made up ...,84-0,the abc have reported that those who receive c...,40-0,### Instruction:\n\nYou are an assistant that ...,### Instruction:\n\nYou are an assistant that ...
1,7c4ae3beb354e0a9f11fa09f8fbb6613a50db5cc,five ambitious clubs are locked in a scramble ...,five ambitious clubs are locked in a scramble ...,44-1,five ambitious clubs are locked in a bid for t...,17-1,### Instruction:\n\nYou are an assistant that ...,### Instruction:\n\nYou are an assistant that ...
2,7e37b3e3623ae0ab6568e0e73584b16fae0cc00e,but it wasn't until last year that the 25 year...,the 25 year old from pennsylvania went on a le...,45-1,the 25 year old from pennsylvania went viral o...,4-1,### Instruction:\n\nYou are an assistant that ...,### Instruction:\n\nYou are an assistant that ...
3,cdbfba47929b459daf474a1a3dee37d2351729ea,seven games involving nimes were investigated ...,seven games involving nimes were investigated ...,93-2,seven games involving nimes were arrested last...,83-2,### Instruction:\n\nYou are an assistant that ...,### Instruction:\n\nYou are an assistant that ...
4,696ac804af5fcab8853c583f5c97e1eda6e2493a,the driver's side of the windscreen immediatel...,driver's side of the windscreen immediately sh...,61-2,driver's side were immediately shatters and fa...,42-2,### Instruction:\n\nYou are an assistant that ...,### Instruction:\n\nYou are an assistant that ...


In [75]:
def extract_last_word(input_string):
    # Split the string into words
    words = input_string.split()

    # Check if there are words in the string
    if len(words) > 0:
        # Get the last word
        last_word = words[-1]
        return last_word
    else:
        # Return an empty string if there are no words
        return ""

In [76]:
combined_df['Platypus_70b_instruct_swapped_output_extracted'] = combined_df['Platypus_70b_instruct_swapped_output'].apply(extract_last_word)
combined_df['Platypus_70b_instruct_no_swap_output_extracted'] = combined_df['Platypus_70b_instruct_no_swap_output'].apply(extract_last_word)

In [77]:
combined_df.head()

Unnamed: 0,article_id,article_sent,correct_sent,correct_sent_id,incorrect_sent,incorrect_sent_id,Platypus_70b_instruct_swapped_output,Platypus_70b_instruct_no_swap_output,Platypus_70b_instruct_swapped_output_extracted,Platypus_70b_instruct_no_swap_output_extracted
0,67c3b0a2da09ce32a83c098da1e0aff034e790d0,the abc have reported that those who receive c...,those who receive centrelink payments made up ...,84-0,the abc have reported that those who receive c...,40-0,### Instruction:\n\nYou are an assistant that ...,### Instruction:\n\nYou are an assistant that ...,B,A
1,7c4ae3beb354e0a9f11fa09f8fbb6613a50db5cc,five ambitious clubs are locked in a scramble ...,five ambitious clubs are locked in a scramble ...,44-1,five ambitious clubs are locked in a bid for t...,17-1,### Instruction:\n\nYou are an assistant that ...,### Instruction:\n\nYou are an assistant that ...,B,A
2,7e37b3e3623ae0ab6568e0e73584b16fae0cc00e,but it wasn't until last year that the 25 year...,the 25 year old from pennsylvania went on a le...,45-1,the 25 year old from pennsylvania went viral o...,4-1,### Instruction:\n\nYou are an assistant that ...,### Instruction:\n\nYou are an assistant that ...,B,A
3,cdbfba47929b459daf474a1a3dee37d2351729ea,seven games involving nimes were investigated ...,seven games involving nimes were investigated ...,93-2,seven games involving nimes were arrested last...,83-2,### Instruction:\n\nYou are an assistant that ...,### Instruction:\n\nYou are an assistant that ...,B,A
4,696ac804af5fcab8853c583f5c97e1eda6e2493a,the driver's side of the windscreen immediatel...,driver's side of the windscreen immediately sh...,61-2,driver's side were immediately shatters and fa...,42-2,### Instruction:\n\nYou are an assistant that ...,### Instruction:\n\nYou are an assistant that ...,B,A


In [79]:
def is_correct(row): 
    if (row[0] == 'B' and row[1] == 'A'):
        return 'Y'
    if (row[0] == 'A' and row[1] == 'A'):
        return 'AA'
    if (row[0] == 'B' and row[1] == 'B'):
        return 'BB'
    if (row[0] == 'A' and row[1] == 'B'):
        return 'N'
    return 'No answer'

In [80]:
combined_df['Platypus_70b_instruct_analysis_result'] = combined_df[['Platypus_70b_instruct_swapped_output_extracted', 'Platypus_70b_instruct_no_swap_output_extracted']].apply(is_correct, axis=1)

In [81]:
combined_df.head()

Unnamed: 0,article_id,article_sent,correct_sent,correct_sent_id,incorrect_sent,incorrect_sent_id,Platypus_70b_instruct_swapped_output,Platypus_70b_instruct_no_swap_output,Platypus_70b_instruct_swapped_output_extracted,Platypus_70b_instruct_no_swap_output_extracted,Platypus_70b_instruct_analysis_result
0,67c3b0a2da09ce32a83c098da1e0aff034e790d0,the abc have reported that those who receive c...,those who receive centrelink payments made up ...,84-0,the abc have reported that those who receive c...,40-0,### Instruction:\n\nYou are an assistant that ...,### Instruction:\n\nYou are an assistant that ...,B,A,Y
1,7c4ae3beb354e0a9f11fa09f8fbb6613a50db5cc,five ambitious clubs are locked in a scramble ...,five ambitious clubs are locked in a scramble ...,44-1,five ambitious clubs are locked in a bid for t...,17-1,### Instruction:\n\nYou are an assistant that ...,### Instruction:\n\nYou are an assistant that ...,B,A,Y
2,7e37b3e3623ae0ab6568e0e73584b16fae0cc00e,but it wasn't until last year that the 25 year...,the 25 year old from pennsylvania went on a le...,45-1,the 25 year old from pennsylvania went viral o...,4-1,### Instruction:\n\nYou are an assistant that ...,### Instruction:\n\nYou are an assistant that ...,B,A,Y
3,cdbfba47929b459daf474a1a3dee37d2351729ea,seven games involving nimes were investigated ...,seven games involving nimes were investigated ...,93-2,seven games involving nimes were arrested last...,83-2,### Instruction:\n\nYou are an assistant that ...,### Instruction:\n\nYou are an assistant that ...,B,A,Y
4,696ac804af5fcab8853c583f5c97e1eda6e2493a,the driver's side of the windscreen immediatel...,driver's side of the windscreen immediately sh...,61-2,driver's side were immediately shatters and fa...,42-2,### Instruction:\n\nYou are an assistant that ...,### Instruction:\n\nYou are an assistant that ...,B,A,Y


In [88]:
def acc_bias(col):
    results = combined_df[col].value_counts()/len(combined_df)
    acc = results['Y']
    in_acc = results['N']
    if 'AA' not in results:
        results['AA'] = 0
    if 'BB' not in results: 
        results['BB'] = 0
        
    bias = abs(results['AA'] - results['BB'])
    aa_ratio = results['AA'] * 100
    bb_ratio = results['BB'] * 100
    if results['AA'] > results['BB']:
        towards = 'A'
    else:
        towards = 'B'
        
    return acc, in_acc, aa_ratio, bb_ratio, bias, towards

In [87]:
for m in ['Platypus_70b_instruct',]:
    acc, in_acc, aa_ratio, bb_ratio, bias, towards = acc_bias(m+'_analysis_result')
    acc = acc*100
    in_acc = in_acc*100
    bias = bias*100
    print(f'{m}:   \tAccuracy: {acc:.1f}%     \tInaccuracy: {in_acc:.1f}%     \tAA: {aa_ratio:.1f}%\tBB: {bb_ratio:.1f}%\tBias: {bias:.1f}% towards {towards}')

ValueError: too many values to unpack (expected 6)

## Cleanup

In [69]:
llm.delete_model()
llm.delete_endpoint()