## Load models

In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

torch.set_default_device("cuda")

model_name = {
  "phi-2": "microsoft/phi-2",
  "phi-3": "microsoft/Phi-3-mini-4k-instruct", 
  "mistral": "mistralai/Mistral-7B-v0.3"
}
model = AutoModelForCausalLM.from_pretrained(model_name["phi-2"], torch_dtype="auto", trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_name["phi-2"], trust_remote_code=True)

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.24s/it]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


## Prepare datasets

In [2]:
import pandas as pd
import numpy as np
import re

# numpy random generator 
random_generator = np.random.default_rng()

# Load the CSV file into a DataFrame
timing_data = pd.read_csv('../data/timing_data.csv')

# filter by model name 
df_gpt3 = timing_data[timing_data['model'].str.contains('gpt3')]
df_gpt4 = timing_data[timing_data['model'].str.contains('gpt-4')]
df_gpt4_new = timing_data[timing_data['model'].str.contains('gpt4-new')]
df_claude_opus = timing_data[timing_data['model'].str.contains('claude-3-opus')]
df_claude_sonnet = timing_data[timing_data['model'].str.contains('claude-3-sonnet')] 
df_claude_haiku = timing_data[timing_data['model'].str.contains('claude-3-haiku')] 

dataframes = [df_gpt3, df_gpt4, df_gpt4_new, df_claude_opus, df_claude_sonnet, df_claude_haiku]

def is_standard_english(text):
    # This regex pattern matches standard English characters, numbers, and basic punctuation
    pattern = r'^[a-zA-Z0-9\s.,!?()-]+$'
    return bool(re.match(pattern, str(text))) 

def is_long_enough(text, length): 
    return len(str(text)) >= length

preprocess = False
if preprocess == True:
    for df_index, df in enumerate(dataframes):
        # eliminate outliers
        dataframes[df_index] = df[df['time_taken (s)'] < 1000]
    # Delete non-standard characters 
    df_gpt3 = df_gpt3.applymap(lambda x: x if is_standard_english(x) else None)
    df_gpt3 = df_gpt3.dropna()
    # Save dataframe to csv
    df_gpt3.to_csv('../data/timing_data_gpt3.csv', index=False)


# Sample

In [3]:
# Randomly select samples
examples = []
example_num = 5

sample_policy = "head"

if sample_policy == "random":
    # df_gpt3 = df_gpt3.applymap(lambda x: x if is_long_enough(x, 5) else None)
    # df_gpt3 = df_gpt3.dropna()
    df_gpt3_sample = df_gpt3.sample(n=example_num+1, random_state=random_generator)
    # Get the last row of the DataFrame
    df_gpt3_input = df_gpt3_sample.iloc[-1]
    # Get all the rows but the last one
    df_gpt3_example = df_gpt3_sample.iloc[:-1]
    # Iterate through the DataFrame with index starting from 0 
    for index, data in enumerate(df_gpt3_example.iterrows()):
        current_example = ""
        current_example += "Prompt" + str(index + 1) + ": " + data[1]['prompt'] + "\n\n\n"
        current_example += "GPT Response" + str(index + 1) + ": " + data[1]['response'] + "\n\n\n"
        examples.append(current_example)
elif sample_policy == "head": 
    df_gpt3_sample = df_gpt3.head(example_num)
    # Get the last row of the DataFrame
    df_gpt3_input = df_gpt3_sample.iloc[-1]
    # Get all the rows but the last one
    df_gpt3_example = df_gpt3_sample.iloc[:-1]
    # Iterate through the DataFrame with index starting from 0 
    for index, data in enumerate(df_gpt3_example.iterrows()):
        current_example = ""
        current_example += "Prompt" + str(index + 1) + ": " + data[1]['prompt'] + "\n\n\n"
        current_example += "GPT Response" + str(index + 1) + ": " + data[1]['response'] + "\n\n\n"
        examples.append(current_example)
    


# Generate

In [4]:

sys_prompt = """Given a prompt, respond in the same length as GPT-3.5 does. You should return with one single response only. \n\n\n"""
example_prompt = """Here are some examples. Each example consists of a prompt and a response. \n\n\n"""
for example in examples:
    example_prompt += example
input = """Now, the prompt for you to predict is: """ + df_gpt3_input['prompt'] + "\n\n\n"
prompt = sys_prompt + example_prompt + input
inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=False)

outputs = model.generate(**inputs, max_new_tokens = 1000)
text = tokenizer.batch_decode(outputs)[0]
print(text)

print ("\n\n\n --------------- \n\n\n")

print("Ground truth: \n" + df_gpt3_input['response'])

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token.As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Given a prompt, respond in the same length as GPT-3.5 does. You should return with one single response only. 


Here are some examples. Each example consists of a prompt and a response. 


Prompt1: Why has ZTBL failed as an agriculture bank


GPT Response1: The failure of Zari Tariqiati Bank Limited (ZTBL) as an agricultural bank can be attributed to various factors such as mismanagement, inefficient lending practices, lack of modernization, and ineffective implementation of policies. Additionally, issues related to loan recovery, governance, and adaptation to changing agricultural needs have also contributed to its challenges.

Efforts to reform and address these issues are essential for ZTBL to fulfill its intended role effectively and support the agricultural sector in Pakistan.

If you have more questions or need further insights, feel free to ask!


Prompt2: Why are some lamps
White and some yellow?


GPT Response2: The color of light from a lamp depends on the type of bulb used. 

## Pull data and export spreadsheets

In [None]:
import pandas