## Imports

In [None]:
from model_loader import HuggingfaceModel
from simpleqa_eval import SimpleQAEvalMod
import pandas as pd
import numpy as np
from datasets import load_dataset
from openai import OpenAI
import joblib


## Constructing the SQUAD 2.0 Dataset

In [None]:


# Load the SQuAD 2.0 dataset
squad_dataset = load_dataset("squad_v2")

# Convert the training and validation splits to pandas DataFrames
train_df = pd.DataFrame(squad_dataset['train'])
validation_df = pd.DataFrame(squad_dataset['validation'])

# Make a base prompt column
train_df["base_prompt"] = train_df.apply(
    lambda row: f"Answer the following question as briefly as possible.\nContext: {row['context']}\nQuestion: {row['question']}\nAnswer:",
    axis=1
)

validation_df["base_prompt"] = validation_df.apply(
    lambda row: f"Answer the following question as briefly as possible.\nContext: {row['context']}\nQuestion: {row['question']}\nAnswer:",
    axis=1
)

# Check for impossible samples in the dataset (no answer)
train_df['is_impossible'] = train_df['answers'].apply(lambda x: len(x['text']) == 0)
validation_df['is_impossible'] = validation_df['answers'].apply(lambda x: len(x['text']) == 0)

# Separate possible and impossible samples
possible_samples_train = train_df[~train_df['is_impossible']]
impossible_samples_train = train_df[train_df['is_impossible']]
possible_samples_test = validation_df[~validation_df['is_impossible']]
impossible_samples_test = validation_df[validation_df['is_impossible']]

# Randomly sample 500 rows from possible and impossible test samples
sampled_possible = possible_samples_test.sample(n=500, random_state=42)
sampled_impossible = impossible_samples_test.sample(n=500, random_state=42)

# Add a column indicating the type of sample
sampled_possible['type'] = 'possible'
sampled_impossible['type'] = 'impossible'

# Concatenate the sampled datasets into one
combined_samples = pd.concat([sampled_possible, sampled_impossible], ignore_index=True)

# Shuffle the combined dataset to mix possible and impossible samples
combined_samples = combined_samples.sample(frac=1, random_state=42).reset_index(drop=True)

# Function to generate a few-shot prompt dynamically
def generate_few_shot_prompt(row):
    # Randomly sample one example from possible and impossible train sets
    example_possible = possible_samples_train.sample(n=1).iloc[0]
    example_impossible = impossible_samples_train.sample(n=1).iloc[0]
    
    # Construct the few-shot examples
    example_1 = (
        f"Example 1: Answer the following question as briefly as possible.\n"
        f"Context: {example_possible['context']}\n"
        f"Question: {example_possible['question']}\n"
        f"Answer: {example_possible['answers']['text'][0] if example_possible['answers']['text'] else ''}"
    )
    
    example_2 = (
        f"Example 2: Answer the following question as briefly as possible.\n"
        f"Context: {example_impossible['context']}\n"
        f"Question: {example_impossible['question']}\n"
        f"Answer: {example_impossible['answers']['text'][0] if example_impossible['answers']['text'] else ''}"
    )
    
    # Combine with the current row's prompt
    return (
        f"{example_1}\n\n"
        f"{example_2}\n\n"
        f"Answer the following question as briefly as possible.\n"
        f"Context: {row['context']}\n"
        f"Question: {row['question']}\n"
        f"Answer:"
    )

# Apply the function to create the "few_shot_prompt" column
combined_samples['few_shot_prompt'] = combined_samples.apply(generate_few_shot_prompt, axis=1)

# Display a sample of the new column
combined_samples[['few_shot_prompt']].head()


## Instantiate Objects

In [None]:
# load HF model
model_name = "meta-llama/Llama-2-7b-chat-hf"
cache_dir = "./"
hf_model = HuggingfaceModel(model_name, cache_dir, max_new_tokens=200)

# simple qa eval
client = OpenAI() # key taken away
simpleqa_evaluator = SimpleQAEvalMod(client = client, model='gpt-4o', grader_template=GRADER_TEMPLATE)


model_dict_slt_ent = joblib.load('model_dict_slt_ent.pkl')
model_dict_tbg_ent = joblib.load('model_dict_tbg_ent.pkl')

### End to end flow

Below, for each row the generated squad dataset, we run the prompts through the model, get the answer and hidden states, and then run the hidden states for the TBG and SLT tokens through the loaded SEP probes to get entropy probabilities and predictions. All results get stored every 20 rows. 

In [None]:

# Process rows in chunks of 20
chunk_size = 20
for start_index in range(0, len(combined_samples), chunk_size):
    end_index = min(start_index + chunk_size, len(combined_samples))
    chunk = combined_samples.iloc[start_index:end_index]

    # Initialize lists for this chunk
    base_columns = []
    fs_columns = []

    for index, row in chunk.iterrows():
        print(index)
        real_answer = row['answers']['text'][0] if row['answers']['text'] else ''
        if real_answer=='' or row['answers']['text']==[]:
            real_answer = "The answer is not found in the context."
        
        # Generate model predictions for base prompts
        base_output_text, base_hidden_states = hf_model.predict(row['base_prompt'], temperature=1.0, return_latent=True)
        base_answer = base_output_text[len(row['base_prompt']):].strip()
        model_probs_preds_base = {"base_answer": base_answer}

        oai_grade_base = simpleqa_evaluator.grade_sample(
            question=row['base_prompt'], target=real_answer, predicted_answer=base_answer
        )
        model_probs_preds_base["simple_qa_base_result"] = oai_grade_base

        sec_last_token_embedding = base_hidden_states[1]
        last_tok_bef_gen_embedding = base_hidden_states[-1]

        for layer_num in range(sec_last_token_embedding.shape[0]):
            # Process second last token
            slt_high_ent_prob = model_dict_slt_ent[layer_num].predict_proba(
                [np.asarray(sec_last_token_embedding[layer_num][0])]
            )[0][1]
            slt_high_ent_pred = model_dict_slt_ent[layer_num].predict(
                [np.asarray(sec_last_token_embedding[layer_num][0])]
            )[0]
            model_probs_preds_base[f"base_slt_layer_{layer_num}_prob"] = slt_high_ent_prob
            model_probs_preds_base[f"base_slt_layer_{layer_num}_pred"] = slt_high_ent_pred

            # Process last token before generation
            tbg_high_ent_prob = model_dict_tbg_ent[layer_num].predict_proba(
                [np.asarray(last_tok_bef_gen_embedding[layer_num][0])]
            )[0][1]
            tbg_high_ent_pred = model_dict_tbg_ent[layer_num].predict(
                [np.asarray(last_tok_bef_gen_embedding[layer_num][0])]
            )[0]
            model_probs_preds_base[f"base_tbg_layer_{layer_num}_prob"] = tbg_high_ent_prob
            model_probs_preds_base[f"base_tbg_layer_{layer_num}_pred"] = tbg_high_ent_pred

        # Store base columns
        base_columns.append(model_probs_preds_base)

        # Generate model predictions for few-shot prompts
        fs_output_text, fs_hidden_states = hf_model.predict(row['few_shot_prompt'], temperature=1.0, return_latent=True)
        fs_answer = fs_output_text[len(row['few_shot_prompt']):].split("\n")[0].strip()
        model_probs_preds_fs = {"fs_answer": fs_answer}

        oai_grade_fs = simpleqa_evaluator.grade_sample(
            question=row['base_prompt'], target=real_answer, predicted_answer=fs_answer
        )
        model_probs_preds_fs["simple_qa_fs_result"] = oai_grade_fs

        sec_last_token_embedding = fs_hidden_states[1]
        last_tok_bef_gen_embedding = fs_hidden_states[-1]

        for layer_num in range(sec_last_token_embedding.shape[0]):
            # Process second last token
            slt_high_ent_prob = model_dict_slt_ent[layer_num].predict_proba(
                [np.asarray(sec_last_token_embedding[layer_num][0])]
            )[0][1]
            slt_high_ent_pred = model_dict_slt_ent[layer_num].predict(
                [np.asarray(sec_last_token_embedding[layer_num][0])]
            )[0]
            model_probs_preds_fs[f"fs_slt_layer_{layer_num}_prob"] = slt_high_ent_prob
            model_probs_preds_fs[f"fs_slt_layer_{layer_num}_pred"] = slt_high_ent_pred

            # Process last token before generation
            tbg_high_ent_prob = model_dict_tbg_ent[layer_num].predict_proba(
                [np.asarray(last_tok_bef_gen_embedding[layer_num][0])]
            )[0][1]
            tbg_high_ent_pred = model_dict_tbg_ent[layer_num].predict(
                [np.asarray(last_tok_bef_gen_embedding[layer_num][0])]
            )[0]
            model_probs_preds_fs[f"fs_tbg_layer_{layer_num}_prob"] = tbg_high_ent_prob
            model_probs_preds_fs[f"fs_tbg_layer_{layer_num}_pred"] = tbg_high_ent_pred

        # Store few-shot columns
        fs_columns.append(model_probs_preds_fs)

    # Convert lists of dictionaries into DataFrames for this chunk
    base_df = pd.DataFrame(base_columns)
    fs_df = pd.DataFrame(fs_columns)

    # Concatenate the new columns with the current chunk
    chunk_with_predictions = pd.concat([chunk.reset_index(drop=True), base_df, fs_df], axis=1)

    # Save the chunk with an identifier
    chunk_identifier = f"{start_index}_{end_index - 1}"
    filename = f"processed_chunk_{chunk_identifier}.csv"
    chunk_with_predictions.to_csv(filename, index=False)
    print(f"Saved chunk: {filename}")
