In [1]:
glb_conf = {
    "path_to_dataset": "../data/timing_data.csv",
    "dataset_format": "json", 
    "std_english_only": False,
    "substitute_emoji": True, 
    "eliminate_outliers": True, 
    "overwrite_after_preprocess": False, 
    "sample_count": 3, 
    "sample_policy": "head", 
    "max_new_tokens": 500, 
    "test_count": 10, 
}

## Load models

In [2]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

with torch.no_grad():
  torch.set_default_device("cuda")

  model_name = {
    "phi-2": "microsoft/phi-2",
    "phi-3": "microsoft/Phi-3-mini-4k-instruct", 
    "mistral": "mistralai/Mistral-7B-v0.3"
  }
  model = AutoModelForCausalLM.from_pretrained(model_name["phi-2"], torch_dtype="auto", trust_remote_code=True)
  model = model.eval()
  tokenizer = AutoTokenizer.from_pretrained(model_name["phi-2"], trust_remote_code=True)

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.20s/it]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


## Prepare datasets

In [3]:
import pandas as pd
import numpy as np
import re
import emoji

with torch.no_grad():
    # numpy random generator 
    random_generator = np.random.default_rng()

    # Load the CSV file into a DataFrame
    timing_data = pd.read_csv(glb_conf["path_to_dataset"])

    # filter by model name 
    df_gpt3 = timing_data[timing_data['model'].str.contains('gpt3')]
    df_gpt4 = timing_data[timing_data['model'].str.contains('gpt-4')]
    df_gpt4_new = timing_data[timing_data['model'].str.contains('gpt4-new')]
    df_claude_opus = timing_data[timing_data['model'].str.contains('claude-3-opus')]
    df_claude_sonnet = timing_data[timing_data['model'].str.contains('claude-3-sonnet')] 
    df_claude_haiku = timing_data[timing_data['model'].str.contains('claude-3-haiku')] 

    dataframes = [df_gpt3, df_gpt4, df_gpt4_new, df_claude_opus, df_claude_sonnet, df_claude_haiku]

    def is_standard_english(text):
        # This regex pattern matches standard English characters, numbers, and basic punctuation
        pattern = r'^[a-zA-Z0-9\s.,!?()-]+$'
        return bool(re.match(pattern, str(text))) 

    def is_long_enough(text, length): 
        return len(str(text)) >= length

    if glb_conf["substitute_emoji"]: 
        # Substitute emoji characters with their names
        for df_index, df in enumerate(dataframes):
            dataframes[df_index]['prompt'] = dataframes[df_index]['prompt'].apply(lambda x: emoji.demojize(x))
            dataframes[df_index]['response'] = dataframes[df_index]['response'].apply(lambda x: emoji.demojize(x))
            
    if glb_conf["eliminate_outliers"]: 
        for df_index, df in enumerate(dataframes):
            # eliminate outliers
            dataframes[df_index] = df[df['time_taken (s)'] < 1000] 
            
    if glb_conf["std_english_only"]: 
        for df_index, df in enumerate(dataframes):
            # Delete non-standard characters 
            dataframes[df_index] = df.applymap(lambda x: x if is_standard_english(x) else None)
            dataframes[df_index] = df.dropna()
            
    if glb_conf["overwrite_after_preprocess"]: 
        # Save dataframe to csv
        df_gpt3.to_csv(glb_conf["path_to_dataset"], index=False) 


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataframes[df_index]['prompt'] = dataframes[df_index]['prompt'].apply(lambda x: emoji.demojize(x))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataframes[df_index]['response'] = dataframes[df_index]['response'].apply(lambda x: emoji.demojize(x))


# Sample

In [4]:
# Randomly select samples
def sample(): 
    examples = []
    example_num = glb_conf["sample_count"]

    sample_policy = glb_conf["sample_policy"]

    if sample_policy == "random":
        # df_gpt3 = df_gpt3.applymap(lambda x: x if is_long_enough(x, 5) else None)
        # df_gpt3 = df_gpt3.dropna()
        df_gpt3_sample = df_gpt3.sample(n=example_num+1, random_state=random_generator)
        # Get the last row of the DataFrame
        df_gpt3_input = df_gpt3_sample.iloc[-1]
        # Get all the rows but the last one
        df_gpt3_example = df_gpt3_sample.iloc[:-1]
        # Iterate through the DataFrame with index starting from 0 
        for index, data in enumerate(df_gpt3_example.iterrows()):
            current_example = ""
            current_example += "Prompt" + str(index + 1) + ": " + data[1]['prompt'] + "\n\n\n"
            current_example += "GPT Response" + str(index + 1) + ": " + data[1]['response'] + "\n\n\n"
            examples.append(current_example)
    elif sample_policy == "head": 
        df_gpt3_sample = df_gpt3.head(example_num)
        # Get the last row of the DataFrame
        df_gpt3_input = df_gpt3_sample.iloc[-1]
        # Get all the rows but the last one
        df_gpt3_example = df_gpt3_sample.iloc[:-1]
        # Iterate through the DataFrame with index starting from 0 
        for index, data in enumerate(df_gpt3_example.iterrows()):
            current_example = ""
            current_example += "Prompt" + str(index + 1) + ": " + data[1]['prompt'] + "\n\n\n"
            current_example += "GPT Response" + str(index + 1) + ": " + data[1]['response'] + "\n\n\n"
            examples.append(current_example)
            
    return examples, df_gpt3_input 
    


# Generate

In [5]:

import tqdm 

result_df = pd.DataFrame(columns=['prompt', 'response', 'ground_truth', 'resp_len', 'gt_len', 'diff'])

sys_prompt = """Given a prompt, respond in the same length as GPT-3.5 does. You should return with one single response only. \n\n\n"""
example_prompt = """Here are some examples. Each example consists of a prompt and a response. \n\n\n"""

with torch.no_grad():
    for i in tqdm.tqdm(range(glb_conf["test_count"])): 
        [examples, df_gpt3_input] = sample() 
        for example in examples:
            # example_prompt += example
            example_prompt += examples[0]
        input = """Now, the prompt for you to predict is: """ + df_gpt3_input['prompt'] + "\n\n\n"
        prompt = sys_prompt + example_prompt + input
        inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=False)

        max_new_tokens = glb_conf["max_new_tokens"]
        outputs = model.generate(**inputs, max_new_tokens = max_new_tokens)
        text = tokenizer.batch_decode(outputs)[0]
        
        print(text) 
        print ("\n\n\n --------------- \n\n\n")
        print("Ground truth: \n" + df_gpt3_input['response'])
        
        # append to the result dataframe
        new_record = pd.DataFrame.from_records([{
            'prompt':df_gpt3_input['prompt'], 
            'response': text, 
            'ground_truth': df_gpt3_input['response'], 
            'resp_len': len(text), 
            'gt_len': len(df_gpt3_input['response']), 
            'diff': len(text) - len(df_gpt3_input['response'])
        }])
        result_df = pd.concat([result_df, new_record])
    
# save result   
result_df.to_csv("../data/result.csv", index=False) 

  0%|          | 0/10 [00:00<?, ?it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token.As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
 10%|█         | 1/10 [00:16<02:28, 16.47s/it]Token indices sequence length is longer than the specified maximum sequence length for this model (2221 > 2048). Running this sequence through the model will result in indexing errors
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Given a prompt, respond in the same length as GPT-3.5 does. You should return with one single response only. 


Here are some examples. Each example consists of a prompt and a response. 


Prompt1: The query is: I'm really intrigued by the idea of visiting Uetliberg Mountain for panoramic views. Can you recommend any specific hiking trails or viewpoints that I shouldn't miss while I'm there? and the original response is: Uetliberg Mountain is a fantastic destination for stunning panoramic views of Zurich and the surrounding areas! Here are a couple of trails and viewpoints I highly recommend checking out: :snow-capped_mountain:

1. Uetliberg Panorama Trail: This easy, well-marked trail leads from the Uetliberg train station to the summit, offering breathtaking views along the way. It's about a 30-minute walk, perfect for a leisurely stroll. Don't miss the observation tower at the top for an even higher vantage point! :sunrise_over_mountains:

2. Planet Trail: For a unique experience, f

This is a friendly reminder - the current text generation call will exceed the model's predefined maximum length (2048). Depending on the model, you may observe exceptions, performance degradation, or nothing at all.
 20%|██        | 2/10 [00:18<01:03,  7.97s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Given a prompt, respond in the same length as GPT-3.5 does. You should return with one single response only. 


Here are some examples. Each example consists of a prompt and a response. 


Prompt1: The query is: I'm really intrigued by the idea of visiting Uetliberg Mountain for panoramic views. Can you recommend any specific hiking trails or viewpoints that I shouldn't miss while I'm there? and the original response is: Uetliberg Mountain is a fantastic destination for stunning panoramic views of Zurich and the surrounding areas! Here are a couple of trails and viewpoints I highly recommend checking out: :snow-capped_mountain:

1. Uetliberg Panorama Trail: This easy, well-marked trail leads from the Uetliberg train station to the summit, offering breathtaking views along the way. It's about a 30-minute walk, perfect for a leisurely stroll. Don't miss the observation tower at the top for an even higher vantage point! :sunrise_over_mountains:

2. Planet Trail: For a unique experience, f

 30%|███       | 3/10 [00:21<00:40,  5.76s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Given a prompt, respond in the same length as GPT-3.5 does. You should return with one single response only. 


Here are some examples. Each example consists of a prompt and a response. 


Prompt1: The query is: I'm really intrigued by the idea of visiting Uetliberg Mountain for panoramic views. Can you recommend any specific hiking trails or viewpoints that I shouldn't miss while I'm there? and the original response is: Uetliberg Mountain is a fantastic destination for stunning panoramic views of Zurich and the surrounding areas! Here are a couple of trails and viewpoints I highly recommend checking out: :snow-capped_mountain:

1. Uetliberg Panorama Trail: This easy, well-marked trail leads from the Uetliberg train station to the summit, offering breathtaking views along the way. It's about a 30-minute walk, perfect for a leisurely stroll. Don't miss the observation tower at the top for an even higher vantage point! :sunrise_over_mountains:

2. Planet Trail: For a unique experience, f

 40%|████      | 4/10 [00:26<00:31,  5.29s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Given a prompt, respond in the same length as GPT-3.5 does. You should return with one single response only. 


Here are some examples. Each example consists of a prompt and a response. 


Prompt1: The query is: I'm really intrigued by the idea of visiting Uetliberg Mountain for panoramic views. Can you recommend any specific hiking trails or viewpoints that I shouldn't miss while I'm there? and the original response is: Uetliberg Mountain is a fantastic destination for stunning panoramic views of Zurich and the surrounding areas! Here are a couple of trails and viewpoints I highly recommend checking out: :snow-capped_mountain:

1. Uetliberg Panorama Trail: This easy, well-marked trail leads from the Uetliberg train station to the summit, offering breathtaking views along the way. It's about a 30-minute walk, perfect for a leisurely stroll. Don't miss the observation tower at the top for an even higher vantage point! :sunrise_over_mountains:

2. Planet Trail: For a unique experience, f

 50%|█████     | 5/10 [00:31<00:27,  5.47s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Given a prompt, respond in the same length as GPT-3.5 does. You should return with one single response only. 


Here are some examples. Each example consists of a prompt and a response. 


Prompt1: The query is: I'm really intrigued by the idea of visiting Uetliberg Mountain for panoramic views. Can you recommend any specific hiking trails or viewpoints that I shouldn't miss while I'm there? and the original response is: Uetliberg Mountain is a fantastic destination for stunning panoramic views of Zurich and the surrounding areas! Here are a couple of trails and viewpoints I highly recommend checking out: :snow-capped_mountain:

1. Uetliberg Panorama Trail: This easy, well-marked trail leads from the Uetliberg train station to the summit, offering breathtaking views along the way. It's about a 30-minute walk, perfect for a leisurely stroll. Don't miss the observation tower at the top for an even higher vantage point! :sunrise_over_mountains:

2. Planet Trail: For a unique experience, f

 60%|██████    | 6/10 [00:45<00:33,  8.29s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Given a prompt, respond in the same length as GPT-3.5 does. You should return with one single response only. 


Here are some examples. Each example consists of a prompt and a response. 


Prompt1: The query is: I'm really intrigued by the idea of visiting Uetliberg Mountain for panoramic views. Can you recommend any specific hiking trails or viewpoints that I shouldn't miss while I'm there? and the original response is: Uetliberg Mountain is a fantastic destination for stunning panoramic views of Zurich and the surrounding areas! Here are a couple of trails and viewpoints I highly recommend checking out: :snow-capped_mountain:

1. Uetliberg Panorama Trail: This easy, well-marked trail leads from the Uetliberg train station to the summit, offering breathtaking views along the way. It's about a 30-minute walk, perfect for a leisurely stroll. Don't miss the observation tower at the top for an even higher vantage point! :sunrise_over_mountains:

2. Planet Trail: For a unique experience, f

 70%|███████   | 7/10 [01:49<01:18, 26.29s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Given a prompt, respond in the same length as GPT-3.5 does. You should return with one single response only. 


Here are some examples. Each example consists of a prompt and a response. 


Prompt1: The query is: I'm really intrigued by the idea of visiting Uetliberg Mountain for panoramic views. Can you recommend any specific hiking trails or viewpoints that I shouldn't miss while I'm there? and the original response is: Uetliberg Mountain is a fantastic destination for stunning panoramic views of Zurich and the surrounding areas! Here are a couple of trails and viewpoints I highly recommend checking out: :snow-capped_mountain:

1. Uetliberg Panorama Trail: This easy, well-marked trail leads from the Uetliberg train station to the summit, offering breathtaking views along the way. It's about a 30-minute walk, perfect for a leisurely stroll. Don't miss the observation tower at the top for an even higher vantage point! :sunrise_over_mountains:

2. Planet Trail: For a unique experience, f

 70%|███████   | 7/10 [01:59<00:51, 17.13s/it]


OutOfMemoryError: CUDA out of memory. Tried to allocate 1.65 GiB. GPU 

## Pull data and export spreadsheets

In [None]:
import pandas