In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=False)

Mounted at /content/drive


In [None]:
!pip install transformers -q
!pip install -q -U bitsandbytes>=0.44.0
!pip install rouge-score -q
!pip install accelerate -q
!pip install openai -q

  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone


In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import bitsandbytes
import pandas as pd
import numpy as np
from rouge_score import rouge_scorer
import re
import huggingface_hub
from google.colab import userdata
from openai import OpenAI


# Define constants
PATH_TO_DATA = '/content/drive/MyDrive/cleaned_api_bank_data.xlsx'
MODEL = "gpt-3.5-turbo-0125"#"microsoft/phi-2"# "microsoft/phi-2"
LOAD_IN_4_BIT = True
SAMPLE = None

HF_WRITE_KEY = userdata.get('HF_WRITE_KEY')
OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')

huggingface_hub.login(token=HF_WRITE_KEY)

client = OpenAI(api_key=OPENAI_API_KEY)




#### Load in data

In [None]:
from tqdm.auto import tqdm
tqdm.pandas()

api_bank_df = pd.read_excel(PATH_TO_DATA)#encoding='utf8', engine='python',on_bad_lines="warn"
api_bank_df.split.value_counts()

test_results = api_bank_df[api_bank_df['split']=='test']
if SAMPLE:
  test_results = test_results.sample(SAMPLE)
else:
  print("No sampling!")

No sampling!


In [None]:
print(f"INSTRUCTION: {api_bank_df.iloc[0].instruction}\n\n INPUT: {api_bank_df.iloc[0].input}\n\n EXPECTED COMPLETION: {api_bank_df.iloc[0].completion}")

INSTRUCTION: 
Generate an API request in the format of [ApiName(key1='value1', key2='value2', ...)] based on the previous dialogue context.
The current time is 2039-03-09 18:56:09 Wednesday.
Input: 
User: User's utterence
AI: AI's response

Expected output:
API-Request: [ApiName(key1='value1', key2='value2', ...)]

API descriptions:


 INPUT: {"apiCode": "Get_All_Sessions", "description": "Get the list of all available yoga and meditation sessions.", "parameters": {}, "response": {"data": {"description": "List of available sessions.", "type": "list", "items": {"type": "object", "properties": {"session_name": {"description": "Name of the session.", "type": "string"}, "session_date": {"description": "Date of the session.", "type": "string", "format": "formatted"}, "session_time": {"description": "Time of the session.", "type": "string", "format": "formatted"}, "session_instructor": {"description": "Name of the session instructor.", "type": "string"}, "session_description": {"description"

In [None]:

def generate_completion(input_str, instruction,client=client):
    """Generate completions using OpenAI client and chat completions"""

    completion = client.chat.completions.create(
      model=MODEL,
      messages=[
        {"role": "system", "content": input_str},
        {"role": "user", "content": instruction}
      ],
      temperature=0
    )

    return str(completion.choices[0].message.content)


  # api_bank_df.head(1)

def generate_model_answers(test_results, model_name):
    """Iterate through all inputs/instructions and generate completions"""

    test_results['model_answer'] = test_results.progress_apply(lambda x: generate_completion(x['input'], x['instruction'],), axis=1)

    # This would allow us to concat all of the data together in long format
    test_results['model'] = model_name


    return test_results

test_results_copy = test_results.copy()
test_results = generate_model_answers(test_results,model_name=MODEL)

  0%|          | 0/997 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_results['model_answer'] = test_results.progress_apply(lambda x: generate_completion(x['input'], x['instruction'],), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_results['model'] = model_name


In [None]:
# Slightly output cleaning
test_results['completion']=test_results['completion'].astype(str).str.strip()
test_results['function_call']=test_results['function_call'].astype(str).str.strip()
test_results['function_name']=test_results['function_name'].astype(str).str.strip()

# Get a scorer object
scorer = rouge_scorer.RougeScorer(['rouge1'], use_stemmer=True)

def evaluate_test_results(df,rouge_scorer=scorer):
  """Run an eval for each row in the test results"""

  test_results = df.copy()

  # Check the right call is there -- ACCURACY
  test_results['contained_correct_call'] = test_results.apply(lambda x: x['completion'] in x['model_answer'], axis=1)
  
  # Is there a verbatim match? 
  test_results['exact_match'] = test_results.apply(lambda x: x['function_call'] == x['model_answer'] if x['model_answer'] else None, axis=1)
  
  # Does it include the correct anme
  test_results['contains_function_name'] = test_results.apply(lambda x: x['function_name'] in x['model_answer'] if x['model_answer'] else None, axis=1)
  
  # Can the output be parsed?
  test_results['answer_included_list'] = test_results.apply(lambda x: bool(re.search(r'\[.*\]', x['model_answer'])) if x['model_answer'] else None, axis=1)
  
  # Get the list of answers
  test_results['answer_list'] = test_results.apply(lambda x: re.search(r'\[.*\]', x['model_answer']).group(0) if x['answer_included_list'] else '', axis=1)


  # Generate ROUGE 1 scores
  test_results['rouge_1_score'] = test_results.apply(lambda x: rouge_scorer.score(target=x['completion'],prediction=x['answer_list']),axis=1)

  # Extract the correct values into their own columns
  test_results['rouge_1_precision_list'] = test_results['rouge_1_score'].apply(lambda x: x['rouge1'].precision)
  test_results['rouge_1_recall_list'] =  test_results['rouge_1_score'].apply(lambda x: x['rouge1'].recall)
  test_results['rouge_1_f1_list'] =  test_results['rouge_1_score'].apply(lambda x: x['rouge1'].fmeasure)


  return test_results

evaluated_results = evaluate_test_results(test_results)

# Print the outputs
evaluated_results[['contained_correct_call','exact_match','answer_included_list','contains_function_name','rouge_1_precision_list','rouge_1_recall_list','rouge_1_f1_list']].mean()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_results['completion']=test_results['completion'].astype(str).str.strip()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_results['function_call']=test_results['function_call'].astype(str).str.strip()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_results['function_name']=test_results[

contained_correct_call    0.283852
exact_match               0.000000
answer_included_list      0.815446
contains_function_name    0.646941
rouge_1_precision_list    0.482745
rouge_1_recall_list       0.517158
rouge_1_f1_list           0.490196
dtype: float64

In [None]:
# Colab output path
OUTPUT_PATH = f"/content/drive/MyDrive/api_bank_results_{MODEL.split('/')[-1]}.xlsx".replace("-","_")

evaluated_results.to_excel(OUTPUT_PATH)

In [None]:
from google.colab import runtime

# Shut down colab resources when complete
runtime.unassign()