### **Import libraries**
`lab_v2` is a library containing methods that are often used throughout our GPT experiments.  
This library can be found at the following url:

In [111]:
from typing import List
from collections import Counter

import pandas
from lab_v2.gpt_eval.eval import Eval
from lab_v2.io.pandas import PandasIO
from lab_v2.statistics.entropy import ShannonEntropy
from lab_v2.xlsx_creation.xlsx_creation import XlsxWorkbook

### **Constants**
Specify a few constants to make the notebook easier to configure in the future.

In [112]:
# These must absolutely be changed from test to test
QUESTION_SET_FILE_PATH = 'data/question-set/last_letters.jsonl'
QUESTION_SET_INDEX_NAME = 'iIndex'
QUESTION_SET_ANSWER_NAME = 'answer'

RESPONSE_FILE_PATH = 'data/responses/last_letters/sample_0.jsonl'
RESPONSE_INDEX_NAME = 'question_id'
RESPONSE_SAMPLE_NAME = 'choices'

EXTRACT_RESPONSE = lambda response: response['message']['content']
ANSWER_EXTRACTION = Eval.extract_last_letters
COMPARE_ANSWERS = lambda x, y: x == y

OUT_FILE_PATH = 'out/Last Letters.xlsx'
NUM_SAMPLES = 10


# QUESTION_SET_FILE_PATH = 'data/question-set/csqa.jsonl'
# QUESTION_SET_INDEX_NAME = 'id'
# QUESTION_SET_ANSWER_NAME = 'answerKey'

# RESPONSE_FILE_PATH = 'data/responses/csqa/sample_0.jsonl'
# RESPONSE_INDEX_NAME = 'question_id'
# RESPONSE_SAMPLE_NAME = 'choices'

# EXTRACT_RESPONSE = lambda response: response['message']['content']
# ANSWER_EXTRACTION = Eval.extract_csqa
# COMPARE_ANSWERS = lambda x, y: x.lower() == y.lower()

# OUT_FILE_PATH = 'out/CSQA.xlsx'
# NUM_SAMPLES = 10

# These are here just to make it easier
RESPONSE_ANSWERS_NAME = 'answers'

MAJORITY_ANSWER_NAME = 'majority_answer'
MAJORITY_CORRECT_NAME = 'majority_correct'

ENTROPY_COLUMN = 'shannon_entropy'

### **File I/O**
Specify a few functions that make it easier to read from files using pandas

In [113]:
question_set = PandasIO.read_file(QUESTION_SET_FILE_PATH)
question_set = question_set[[QUESTION_SET_INDEX_NAME, QUESTION_SET_ANSWER_NAME]]
display(len(question_set))
question_set.head(1)

9741

Unnamed: 0,id,answerKey
0,075e483d21c29a511267ef62bedc0461,A


In [114]:
responses = PandasIO.read_file(RESPONSE_FILE_PATH)
display(len(responses))
responses.head(1)

9741

Unnamed: 0,question_id,id,object,created,model,usage,choices,question,n,temperature
0,ac0351d8649fb60af40c5638061a2e21,chatcmpl-7K6z3XYWOvPh7edXDkJhKGmQwlKek,chat.completion,1685027761,gpt-3.5-turbo,"{'prompt_tokens': 75, 'completion_tokens': 140...","[{'message': {'role': 'assistant', 'content': ...","Answer A, B, C or D. At the end, say 'the answ...",20,0.7


In [115]:
joined = responses.set_index(RESPONSE_INDEX_NAME).join(question_set.set_index(QUESTION_SET_INDEX_NAME))
display(len(joined))
joined.head(1)

9741

Unnamed: 0_level_0,id,object,created,model,usage,choices,question,n,temperature,answerKey
question_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
ac0351d8649fb60af40c5638061a2e21,chatcmpl-7K6z3XYWOvPh7edXDkJhKGmQwlKek,chat.completion,1685027761,gpt-3.5-turbo,"{'prompt_tokens': 75, 'completion_tokens': 140...","[{'message': {'role': 'assistant', 'content': ...","Answer A, B, C or D. At the end, say 'the answ...",20,0.7,D


In [116]:
def extract_answers(row, column):
    answers = []
    for index, element in enumerate(row[column]): 
        if index > NUM_SAMPLES: break
        response = EXTRACT_RESPONSE(element)
        answers.append(ANSWER_EXTRACTION(response))
    row[RESPONSE_ANSWERS_NAME] = answers
    return row

joined = joined.apply(lambda row: extract_answers(row, RESPONSE_SAMPLE_NAME), axis=1)
joined.head(1)

Unnamed: 0_level_0,id,object,created,model,usage,choices,question,n,temperature,answerKey,answers
question_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
ac0351d8649fb60af40c5638061a2e21,chatcmpl-7K6z3XYWOvPh7edXDkJhKGmQwlKek,chat.completion,1685027761,gpt-3.5-turbo,"{'prompt_tokens': 75, 'completion_tokens': 140...","[{'message': {'role': 'assistant', 'content': ...","Answer A, B, C or D. At the end, say 'the answ...",20,0.7,D,"[d, d, d, d, d, d, d, d, d, d, d]"


In [117]:
def get_majority(answers: List[str]):
    counter = Counter(answers)
    return counter.most_common()[0][0]
joined[MAJORITY_ANSWER_NAME] = joined[RESPONSE_ANSWERS_NAME].apply(lambda row : get_majority(row))
joined[MAJORITY_CORRECT_NAME] = joined.apply(lambda row : COMPARE_ANSWERS(row[MAJORITY_ANSWER_NAME], row[QUESTION_SET_ANSWER_NAME]), axis=1)
joined.head(1)

Unnamed: 0_level_0,id,object,created,model,usage,choices,question,n,temperature,answerKey,answers,majority_answer,majority_correct
question_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
ac0351d8649fb60af40c5638061a2e21,chatcmpl-7K6z3XYWOvPh7edXDkJhKGmQwlKek,chat.completion,1685027761,gpt-3.5-turbo,"{'prompt_tokens': 75, 'completion_tokens': 140...","[{'message': {'role': 'assistant', 'content': ...","Answer A, B, C or D. At the end, say 'the answ...",20,0.7,D,"[d, d, d, d, d, d, d, d, d, d, d]",d,True


In [118]:
joined[ENTROPY_COLUMN] = joined[RESPONSE_ANSWERS_NAME].apply(lambda row : ShannonEntropy.base(row))
joined.head(1)

Unnamed: 0_level_0,id,object,created,model,usage,choices,question,n,temperature,answerKey,answers,majority_answer,majority_correct,shannon_entropy
question_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
ac0351d8649fb60af40c5638061a2e21,chatcmpl-7K6z3XYWOvPh7edXDkJhKGmQwlKek,chat.completion,1685027761,gpt-3.5-turbo,"{'prompt_tokens': 75, 'completion_tokens': 140...","[{'message': {'role': 'assistant', 'content': ...","Answer A, B, C or D. At the end, say 'the answ...",20,0.7,D,"[d, d, d, d, d, d, d, d, d, d, d]",d,True,0.0


In [119]:
def generate_geq(dataframe: pandas.DataFrame, entropy_column: str, correct_column: str):
    ret = []
    values = dataframe[entropy_column].sort_values().unique().tolist()
    for value in values:
        filtered_entropy = dataframe[dataframe[entropy_column] >= value]
        filtered_wrong = filtered_entropy[~filtered_entropy[correct_column]]
        
        if len(filtered_entropy) == 0: break
        ret.append({
            'entropy': value, 
            'support': len(filtered_entropy), 
            'amount_wrong': len(filtered_wrong), 
            'probability_of_failure': len(filtered_wrong) / len(filtered_entropy)
        })

    return pandas.DataFrame(ret)

geq = generate_geq(joined, ENTROPY_COLUMN, MAJORITY_CORRECT_NAME)
geq.head(1)

Unnamed: 0,entropy,support,amount_wrong,probability_of_failure
0,0.0,9741,4294,0.440817


In [120]:
def generate_leq(dataframe: pandas.DataFrame, entropy_column: str, correct_column: str):
    ret = []
    values = dataframe[entropy_column].sort_values().unique().tolist()
    for value in values:
        filtered_entropy = dataframe[dataframe[entropy_column] <= value]
        filtered_wrong = filtered_entropy[~filtered_entropy[correct_column]]
        
        if len(filtered_entropy) == 0: break
        ret.append({
            'entropy': value, 
            'support': len(filtered_entropy), 
            'amount_wrong': len(filtered_wrong), 
            'probability_of_failure': len(filtered_wrong) / len(filtered_entropy)
        })

    return pandas.DataFrame(ret)

leq = generate_leq(joined, ENTROPY_COLUMN, MAJORITY_CORRECT_NAME)
leq.head(1)

In [None]:
workbook = XlsxWorkbook(OUT_FILE_PATH)
worksheet = workbook.add_sheet('Entropy_GEQ', geq)
worksheet.add_scatter_chart('Entropy vs Probability of failure (GEQ)', 'entropy', 'probability_of_failure', 'H1')

worksheet = workbook.add_sheet('Entropy_LEQ', leq)
worksheet.add_scatter_chart('Entropy vs Probability of failure (LEQ)', 'entropy', 'probability_of_failure', 'H1')
workbook.close()