### **Import libraries**
`lab_v2` is a library containing methods that are often used throughout our GPT experiments.  
This library can be found at the following url:

In [76]:
from typing import List
from collections import Counter
from enum import Enum

import pandas

from lab_v2 import gpt_eval
from lab_v2 import io
from lab_v2 import stats
from lab_v2.xlsx_creation import XlsxWorkbook

### **Constants**
Specify a few constants to make the notebook easier to configure in the future.

In [77]:
class Dataset(Enum):
    LAST_LETTERS = 0
    CSQA = 1

DATASET = Dataset.LAST_LETTERS
QUESTION_SET_FILE_PATH = 'data/question-set/last_letters.jsonl'
RESPONSE_FILE_PATH = 'data/responses/last_letters/sample_0.jsonl'
OUT_FILE_PATH = 'out/Last Letters.xlsx'
NUM_SAMPLES = 10

# DATASET = Dataset.CSQA
# QUESTION_SET_FILE_PATH = 'data/question-set/csqa.jsonl'
# RESPONSE_FILE_PATH = 'data/responses/csqa/sample_0.jsonl'
# OUT_FILE_PATH = 'out/CSQA.xlsx'
# NUM_SAMPLES = 10

if DATASET == Dataset.LAST_LETTERS:
    QUESTION_SET_INDEX_NAME = 'iIndex'
    QUESTION_SET_ANSWER_NAME = 'answer'
    RESPONSE_INDEX_NAME = 'question_id'
    RESPONSE_SAMPLE_NAME = 'choices'
    EXTRACT_RESPONSE = lambda response: response['message']['content']
    ANSWER_EXTRACTION = gpt_eval.extract_last_letters
    COMPARE_ANSWERS = lambda x, y: x == y
if DATASET == Dataset.CSQA:
    QUESTION_SET_INDEX_NAME = 'id'
    QUESTION_SET_ANSWER_NAME = 'answerKey'
    RESPONSE_INDEX_NAME = 'question_id'
    RESPONSE_SAMPLE_NAME = 'choices'
    EXTRACT_RESPONSE = lambda response: response['message']['content']
    ANSWER_EXTRACTION = gpt_eval.extract_csqa
    COMPARE_ANSWERS = lambda x, y: x.lower() == y.lower()



RESPONSE_ANSWERS_NAME = 'answers'
MAJORITY_ANSWER_NAME = 'majority_answer'
MAJORITY_CORRECT_NAME = 'majority_correct'
ENTROPY_COLUMN = 'shannon_entropy'
GINI_IMPURITY_COLUMN = 'gini_impurity'

### **File I/O**
Specify a few functions that make it easier to read from files using pandas

In [78]:
question_set = io.read_file(QUESTION_SET_FILE_PATH)
question_set = question_set[[QUESTION_SET_INDEX_NAME, QUESTION_SET_ANSWER_NAME]]
display(len(question_set))
question_set.head(1)

3000

Unnamed: 0,iIndex,answer
0,0,rany


In [79]:
responses = io.read_file(RESPONSE_FILE_PATH)
display(len(responses))
responses.head(1)

3000

Unnamed: 0,question_id,id,object,created,model,choices,usage,question,n,temperature
0,31,chatcmpl-7XlFtWDh2tBIfBANVqU5WPrpi3cj6,chat.completion,1688280829,gpt-3.5-turbo,"[{'index': 0, 'message': {'role': 'assistant',...","{'prompt_tokens': 55, 'completion_tokens': 149...","At the end, say 'the answer is [put the concat...",20,0.7


In [80]:
joined = responses.set_index(RESPONSE_INDEX_NAME).join(question_set.set_index(QUESTION_SET_INDEX_NAME))
display(len(joined))
joined.head(1)

3000

Unnamed: 0_level_0,id,object,created,model,choices,usage,question,n,temperature,answer
question_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
31,chatcmpl-7XlFtWDh2tBIfBANVqU5WPrpi3cj6,chat.completion,1688280829,gpt-3.5-turbo,"[{'index': 0, 'message': {'role': 'assistant',...","{'prompt_tokens': 55, 'completion_tokens': 149...","At the end, say 'the answer is [put the concat...",20,0.7,yral


In [81]:
def extract_answers(row, column):
    answers = []
    for index, element in enumerate(row[column]): 
        if index > NUM_SAMPLES: break
        response = EXTRACT_RESPONSE(element)
        answers.append(ANSWER_EXTRACTION(response))
    row[RESPONSE_ANSWERS_NAME] = answers
    return row

joined = joined.apply(lambda row: extract_answers(row, RESPONSE_SAMPLE_NAME), axis=1)
joined.head(1)

Unnamed: 0_level_0,id,object,created,model,choices,usage,question,n,temperature,answer,answers
question_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
31,chatcmpl-7XlFtWDh2tBIfBANVqU5WPrpi3cj6,chat.completion,1688280829,gpt-3.5-turbo,"[{'index': 0, 'message': {'role': 'assistant',...","{'prompt_tokens': 55, 'completion_tokens': 149...","At the end, say 'the answer is [put the concat...",20,0.7,yral,"[yrela, yagl, dyal, yrela, yagl, yagl, yal, ya..."


In [82]:
def get_majority(answers: List[str]):
    counter = Counter(answers)
    return counter.most_common()[0][0]
joined[MAJORITY_ANSWER_NAME] = joined[RESPONSE_ANSWERS_NAME].apply(lambda row : get_majority(row))
joined[MAJORITY_CORRECT_NAME] = joined.apply(lambda row : COMPARE_ANSWERS(row[MAJORITY_ANSWER_NAME], row[QUESTION_SET_ANSWER_NAME]), axis=1)
joined.head(1)

Unnamed: 0_level_0,id,object,created,model,choices,usage,question,n,temperature,answer,answers,majority_answer,majority_correct
question_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
31,chatcmpl-7XlFtWDh2tBIfBANVqU5WPrpi3cj6,chat.completion,1688280829,gpt-3.5-turbo,"[{'index': 0, 'message': {'role': 'assistant',...","{'prompt_tokens': 55, 'completion_tokens': 149...","At the end, say 'the answer is [put the concat...",20,0.7,yral,"[yrela, yagl, dyal, yrela, yagl, yagl, yal, ya...",yagl,False


In [83]:
joined[ENTROPY_COLUMN] = joined[RESPONSE_ANSWERS_NAME].apply(lambda row : stats.shannon_entropy(row))
joined[GINI_IMPURITY_COLUMN] = joined[RESPONSE_ANSWERS_NAME].apply(lambda row : stats.gini_impurity(row))
joined.head(1)

Unnamed: 0_level_0,id,object,created,model,choices,usage,question,n,temperature,answer,answers,majority_answer,majority_correct,shannon_entropy,gini_impurity
question_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
31,chatcmpl-7XlFtWDh2tBIfBANVqU5WPrpi3cj6,chat.completion,1688280829,gpt-3.5-turbo,"[{'index': 0, 'message': {'role': 'assistant',...","{'prompt_tokens': 55, 'completion_tokens': 149...","At the end, say 'the answer is [put the concat...",20,0.7,yral,"[yrela, yagl, dyal, yrela, yagl, yagl, yal, ya...",yagl,False,1.672625,0.793388


In [84]:
def generate_chart(dataframe: pandas.DataFrame, x_axis: str, y_axis: str, filter):
    ret = []
    values = dataframe[x_axis].sort_values().unique().tolist()
    for value in values:
        filtered_entropy = dataframe[filter(dataframe[x_axis], value)]
        filtered_wrong = filtered_entropy[~filtered_entropy[y_axis]]
        
        if len(filtered_entropy) == 0: break
        ret.append({
            x_axis: value, 
            'support': len(filtered_entropy), 
            'amount_wrong': len(filtered_wrong), 
            'probability_of_failure': len(filtered_wrong) / len(filtered_entropy)
        })

    return pandas.DataFrame(ret)

def generate_geq(dataframe: pandas.DataFrame, x_axis: str, y_axis: str):
    return generate_chart(dataframe, x_axis, y_axis, lambda dat, y: dat >= y)

def generate_leq(dataframe: pandas.DataFrame, x_axis: str, y_axis: str):
    return generate_chart(dataframe, x_axis, y_axis, lambda dat, y: dat <= y)

In [85]:
entropy_geq = generate_geq(joined, ENTROPY_COLUMN, MAJORITY_CORRECT_NAME)
gini_impurity_geq = generate_geq(joined, GINI_IMPURITY_COLUMN, MAJORITY_CORRECT_NAME)

display(entropy_geq.head(1))
display(gini_impurity_geq.head(1))

Unnamed: 0,shannon_entropy,support,amount_wrong,probability_of_failure
0,0.0,3000,1473,0.491


Unnamed: 0,gini_impurity,support,amount_wrong,probability_of_failure
0,0.0,3000,1473,0.491


In [86]:
entropy_leq = generate_leq(joined, ENTROPY_COLUMN, MAJORITY_CORRECT_NAME)
gini_impurity_leq = generate_leq(joined, GINI_IMPURITY_COLUMN, MAJORITY_CORRECT_NAME)
display(entropy_leq.head(1))
display(gini_impurity_leq.head(1))

Unnamed: 0,shannon_entropy,support,amount_wrong,probability_of_failure
0,0.0,10,0,0.0


Unnamed: 0,gini_impurity,support,amount_wrong,probability_of_failure
0,0.0,10,0,0.0


In [87]:
workbook = XlsxWorkbook(OUT_FILE_PATH)
entropy_geq_worksheet = workbook.add_sheet('Entropy_GEQ', entropy_geq)
entropy_geq_worksheet.add_scatter_chart('Entropy vs Probability of failure (GEQ)', ENTROPY_COLUMN, 'probability_of_failure', 'H1')

entropy_leq_worksheet = workbook.add_sheet('Entropy_LEQ', entropy_leq)
entropy_leq_worksheet.add_scatter_chart('Entropy vs Probability of failure (LEQ)', ENTROPY_COLUMN, 'probability_of_failure', 'H1')

entropy_leq_worksheet = workbook.add_sheet('Gini_Impurity_LEQ', gini_impurity_geq)
entropy_leq_worksheet.add_scatter_chart('Gini Impurity vs Probability of failure (LEQ)', GINI_IMPURITY_COLUMN, 'probability_of_failure', 'H1')

entropy_leq_worksheet = workbook.add_sheet('Gini_Impurity_GEQ', gini_impurity_geq)
entropy_leq_worksheet.add_scatter_chart('Gini Impurity vs Probability of failure (GEQ)', GINI_IMPURITY_COLUMN, 'probability_of_failure', 'H1')

workbook.close()