### **Import libraries**
`lab_v2` is a library containing methods that are often used throughout our GPT experiments.  
This library can be found at the following url:

In [1]:
from typing import List
from collections import Counter

import pandas
from lab_v2.gpt_eval.eval import Eval
from lab_v2.io.pandas import PandasIO
from lab_v2.statistics.entropy import ShannonEntropy
from lab_v2.xlsx_creation.xlsx_creation import XlsxWorkbook

### **Constants**
Specify a few constants to make the notebook easier to configure in the future.

In [2]:
# These must absolutely be changed from test to test
QUESTION_SET_FILE_PATH = 'data/question-set/last_letters.jsonl'
QUESTION_SET_INDEX_NAME = 'iIndex'
QUESTION_SET_ANSWER_NAME = 'answer'

RESPONSE_FILE_PATH = 'data/responses/last_letters/sample_0.jsonl'
RESPONSE_INDEX_NAME = 'question_id'
RESPONSE_SAMPLE_NAME = 'choices'

EXTRACT_RESPONSE = lambda response: response['message']['content']
ANSWER_EXTRACTION = Eval.extract_last_letters
COMPARE_ANSWERS = lambda x, y: x == y

# These are here just to make it easier
RESPONSE_ANSWERS_NAME = 'answers'

MAJORITY_ANSWER_NAME = 'majority_answer'
MAJORITY_CORRECT_NAME = 'majority_correct'

ENTROPY_COLUMN = 'shannon_entropy'



### **File I/O**
Specify a few functions that make it easier to read from files using pandas

In [3]:
question_set = PandasIO.read_file(QUESTION_SET_FILE_PATH)
question_set = question_set[[QUESTION_SET_INDEX_NAME, QUESTION_SET_ANSWER_NAME]]
display(len(question_set))
question_set.head(1)

3000

Unnamed: 0,iIndex,answer
0,0,rany


In [4]:
responses = PandasIO.read_file(RESPONSE_FILE_PATH)
display(len(responses))
responses.head(1)

3000

Unnamed: 0,question_id,id,object,created,model,choices,usage,question,n,temperature
0,31,chatcmpl-7XlFtWDh2tBIfBANVqU5WPrpi3cj6,chat.completion,1688280829,gpt-3.5-turbo,"[{'index': 0, 'message': {'role': 'assistant',...","{'prompt_tokens': 55, 'completion_tokens': 149...","At the end, say 'the answer is [put the concat...",20,0.7


In [5]:
joined = responses.set_index(RESPONSE_INDEX_NAME).join(question_set.set_index(QUESTION_SET_INDEX_NAME))
display(len(joined))
joined.head(1)

3000

Unnamed: 0_level_0,id,object,created,model,choices,usage,question,n,temperature,answer
question_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
31,chatcmpl-7XlFtWDh2tBIfBANVqU5WPrpi3cj6,chat.completion,1688280829,gpt-3.5-turbo,"[{'index': 0, 'message': {'role': 'assistant',...","{'prompt_tokens': 55, 'completion_tokens': 149...","At the end, say 'the answer is [put the concat...",20,0.7,yral


In [6]:
def extract_answers(row, column):
    answers = []
    for _, element in enumerate(row[column]): 
        response = EXTRACT_RESPONSE(element)
        answers.append(ANSWER_EXTRACTION(response))
    row[RESPONSE_ANSWERS_NAME] = answers
    return row

joined = joined.apply(lambda row: extract_answers(row, RESPONSE_SAMPLE_NAME), axis=1)
joined.head(1)

Unnamed: 0_level_0,id,object,created,model,choices,usage,question,n,temperature,answer,answers
question_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
31,chatcmpl-7XlFtWDh2tBIfBANVqU5WPrpi3cj6,chat.completion,1688280829,gpt-3.5-turbo,"[{'index': 0, 'message': {'role': 'assistant',...","{'prompt_tokens': 55, 'completion_tokens': 149...","At the end, say 'the answer is [put the concat...",20,0.7,yral,"[yrela, yagl, dyal, yrela, yagl, yagl, yal, ya..."


In [7]:
def get_majority(answers: List[str]):
    counter = Counter(answers)
    return counter.most_common()[0][0]
joined[MAJORITY_ANSWER_NAME] = joined[RESPONSE_ANSWERS_NAME].apply(lambda row : get_majority(row))
joined[MAJORITY_CORRECT_NAME] = joined.apply(lambda row : COMPARE_ANSWERS(row[MAJORITY_ANSWER_NAME], row[QUESTION_SET_ANSWER_NAME]), axis=1)
joined.head(1)

Unnamed: 0_level_0,id,object,created,model,choices,usage,question,n,temperature,answer,answers,majority_answer,majority_correct
question_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
31,chatcmpl-7XlFtWDh2tBIfBANVqU5WPrpi3cj6,chat.completion,1688280829,gpt-3.5-turbo,"[{'index': 0, 'message': {'role': 'assistant',...","{'prompt_tokens': 55, 'completion_tokens': 149...","At the end, say 'the answer is [put the concat...",20,0.7,yral,"[yrela, yagl, dyal, yrela, yagl, yagl, yal, ya...",yagl,False


In [8]:
joined[ENTROPY_COLUMN] = joined[RESPONSE_ANSWERS_NAME].apply(lambda row : ShannonEntropy.base(row))
joined.head(1)

Unnamed: 0_level_0,id,object,created,model,choices,usage,question,n,temperature,answer,answers,majority_answer,majority_correct,shannon_entropy
question_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
31,chatcmpl-7XlFtWDh2tBIfBANVqU5WPrpi3cj6,chat.completion,1688280829,gpt-3.5-turbo,"[{'index': 0, 'message': {'role': 'assistant',...","{'prompt_tokens': 55, 'completion_tokens': 149...","At the end, say 'the answer is [put the concat...",20,0.7,yral,"[yrela, yagl, dyal, yrela, yagl, yagl, yal, ya...",yagl,False,2.121699


In [9]:
def generate_geq(dataframe: pandas.DataFrame, entropy_column: str, correct_column: str):
    ret = []
    values = dataframe[entropy_column].sort_values().unique().tolist()
    for value in values:
        filtered_entropy = dataframe[dataframe[entropy_column] >= value]
        filtered_wrong = filtered_entropy[~filtered_entropy[correct_column]]
        
        if len(filtered_entropy) == 0: break
        ret.append({
            'entropy': value, 
            'support': len(filtered_entropy), 
            'amount_wrong': len(filtered_wrong), 
            'probability_of_failure': len(filtered_wrong) / len(filtered_entropy)
        })

    return pandas.DataFrame(ret)

geq = generate_geq(joined, ENTROPY_COLUMN, MAJORITY_CORRECT_NAME)
geq

Unnamed: 0,entropy,support,amount_wrong,probability_of_failure
0,0.000000,3000,1308,0.436000
1,0.198515,2997,1308,0.436436
2,0.325083,2990,1308,0.437458
3,0.394398,2988,1308,0.437751
4,0.422709,2978,1308,0.439221
...,...,...,...,...
516,2.830940,121,102,0.842975
517,2.857103,98,84,0.857143
518,2.926418,51,46,0.901961
519,2.926418,16,16,1.000000


In [10]:
workbook = XlsxWorkbook('out/test.xlsx')
worksheet = workbook.add_sheet('Entropy_GEQ', geq)
worksheet.add_scatter_chart('Entropy vs Probability of failure', 'entropy', 'probability_of_failure', 'H1')
workbook.close()