## Load models

In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

torch.set_default_device("cuda")

model_name = {
  "phi-2": "microsoft/phi-2",
  "phi-3": "microsoft/Phi-3-mini-4k-instruct"
}
model = AutoModelForCausalLM.from_pretrained(model_name["phi-3"], torch_dtype="auto", trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_name["phi-3"], trust_remote_code=True)

  from .autonotebook import tqdm as notebook_tqdm


## Prepare examples

In [26]:
import pandas as pd

# Load the CSV file into a DataFrame
timing_data = pd.read_csv('../data/timing_data.csv')

# filter by model name 
df_gpt3 = timing_data[timing_data['model'].str.contains('gpt3')]
df_gpt4 = timing_data[timing_data['model'].str.contains('gpt-4')]
df_gpt4_new = timing_data[timing_data['model'].str.contains('gpt4-new')]
df_claude_opus = timing_data[timing_data['model'].str.contains('claude-3-opus')]
df_claude_sonnet = timing_data[timing_data['model'].str.contains('claude-3-sonnet')] 
df_claude_haiku = timing_data[timing_data['model'].str.contains('claude-3-haiku')] 

dataframes = [df_gpt3, df_gpt4, df_gpt4_new, df_claude_opus, df_claude_sonnet, df_claude_haiku]

for df_index, df in enumerate(dataframes):
    # eliminate outliers
    dataframes[df_index] = df[df['time_taken (s)'] < 1000]

# Randomly select samples
examples = []
example_num = 1
df_gpt3_sample = df_gpt3.sample(n=example_num)
# Iterate through the DataFrame with index starting from 0 
for index, data in enumerate(df_gpt3_sample.iterrows()):
    current_example = ""
    current_example += "\nExample " + str(index + 1) + ": "
    current_example += "Prompt: " + data[1]['prompt']
    current_example += "Response: " + data[1]['response']
    examples.append(current_example)


## Generate

In [27]:
sys_prompt = """Predict the response of GPT-3.5 given a prompt. Output should be in the same length and same style. """
example_prompt = """Here are some examples. """
for example in examples:
    example_prompt += example
input = """
Now, the prompt for you to predict is: For international students coming to the US, which city is better overall: Los Angeles or Boston?"""
prompt = sys_prompt + example_prompt + input
inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=False)

outputs = model.generate(**inputs, max_length = 4000)
text = tokenizer.batch_decode(outputs)[0]
print(text)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Predict the response of GPT-3.5 given a prompt. Output should be in the same length and same style. Here are some examples. 
Example 1: Prompt: The query is: If I get mugged and the mugger uses my credit card, will I be liable for it? and the original response is: If your credit card is used fraudulently after being stolen during a mugging, the good news is that you are generally protected from liability for unauthorized transactions. According to most credit card companies' policies and federal regulations, as long as you promptly report the theft or loss of your card, your liability for any unauthorized charges is limited.

It's crucial to report the theft or loss of your credit card to the card issuer as soon as possible to minimize any potential liability. Once you report the card as stolen or lost, the issuer should deactivate the card to prevent further unauthorized use and investigate the fraudulent charges.

Remember, it's always a good idea to check with your specific credit c

## Pull data and export spreadsheets

In [None]:
import pandas