In [5]:
import hashlib
from itertools import islice
from typing import Dict, List

import pandas as pd
import random
import csv

from tqdm import tqdm

from chatgpt import count_tokens
from hashing import short_hash
from prompt_1 import generate_prompt_1, classify_validation_set_1
from prompt_2 import classify_validation_set_2, generate_prompt_2

DEV_FILE_PATH = "dev_off.csv"
NUM_PROMPT_EXAMPLES = 20
NUM_CLASSIFICATION_EXAMPLES = 30


def load_dataset(file_path: str) -> List[Dict]:
    print(f"Loading data from {file_path}")
    with open(file_path, 'r') as f:
        reader = csv.DictReader(f)
        rows = list(reader)
    print(f"Loaded {len(rows)} rows")

    for row in rows:
        row['label'] = row['Annotation'] == 't'
        row['hash'] = short_hash(row['sentence'])
    print(f"Hashed {len(rows)} rows")

    return rows


def get_data_folds(data: List[Dict]):
    print(f"Splitting data into folds")

    random.seed(42)

    positive_examples = [row for row in data if row['label'] == True]
    negative_examples = [row for row in data if row['label'] == False]

    num_folds = len(positive_examples) // NUM_PROMPT_EXAMPLES
    for fold_i in range(num_folds):
        fold_start = fold_i * NUM_PROMPT_EXAMPLES
        fold_end = (fold_i + 1) * NUM_PROMPT_EXAMPLES

        prompt_tuning_positive = positive_examples[fold_start:fold_end]
        other_positive = positive_examples[:fold_start] + positive_examples[fold_end:]

        prompt_tuning_negative = negative_examples[fold_start:fold_end]
        other_negative = negative_examples[:fold_start] + negative_examples[fold_end:]

        validation_set = other_positive + other_negative
        random.shuffle(validation_set)

        yield {
            "positives": prompt_tuning_positive,
            "negatives": prompt_tuning_negative,
            "validation": validation_set
        }


def chunker(seq, size):
    return (seq[pos:pos + size] for pos in range(0, len(seq), size))




In [None]:

s1 = "I was so certain that I saw you ."
s2 = "I was so happy that I was freed ."
s3 = "I was so happy that I cried ."
s4 = "I was certain I saw you ."
s5 = "I was happy I was freed ."
s6 = "A further limit on the playing ability of working class teams was that working class players had to be careful how hard they played ."
s7 = "The Wobblies were careful that the strike demands reflected only the immediate needs of the workers , rather than long range goals of the IWW ."

ss = [s1, s2, s3, s4, s5, s6, s7]

# Load the pre-trained language model
nlp = spacy.load("en_core_web_sm")

for s in ss:
    # Process the sentence with spaCy
    doc = nlp(s)
    # Visualize the dependency parse tree
    displacy.render(doc, style="dep")

doc = nlp(s1)
for token in doc:
    print(f"Token: {token.text}, POS: {token.pos_}, Head: {token.idx}, Dep: {token.dep_}")

In [6]:
data = load_dataset(DEV_FILE_PATH)
folds = get_data_folds(data)

prompt_nums_to_functions = {1: (generate_prompt_1, classify_validation_set_1), 2: (generate_prompt_2, classify_validation_set_2)}

def get_cost_for_prompt(prompt_num):

    for fold in folds:
        validation_set = list(fold["validation"])
        classified_set = []
        classified_hashes = set()
        gpt_cost = 0
        while len(validation_set) > 0:
            chunk = validation_set[:NUM_CLASSIFICATION_EXAMPLES]
            initial_prompt = prompt_nums_to_functions[prompt_num][0](fold["positives"], fold["negatives"], chunk)
            classified_samples, gpt_cost_chunk = prompt_nums_to_functions[prompt_num][1](initial_prompt, chunk)
            gpt_cost += gpt_cost_chunk
            if len(classified_samples) == 0:
                print("No examples classified, rotating validation set")
                validation_set = validation_set[1:] + [validation_set[0]]
            else:
                # Remove classified examples from validation set
                classified_set += classified_samples
                classified_hashes = set([row['hash'] for row in classified_set])
                validation_set = [row for row in validation_set if row['hash'] not in classified_hashes]
                print(f"{len(validation_set)}/{len(fold['validation'])} examples remaining")

            if len(classified_set) > 0:
                # Get a confusion matrix for the fold (compare "label" and "prediction" columns)
                df = pd.DataFrame(classified_set)
                confusion_matrix = pd.crosstab(df['label'], df['prediction'], rownames=['Actual'], colnames=['Predicted'])
                print(confusion_matrix)
                tp = confusion_matrix[True][True]
                tn = confusion_matrix[False][False]
                fp = confusion_matrix[True][False]
                fn = confusion_matrix[False][True]
                human_cost_multiplier = (tp+fp)/tp
                fixed_gpt_cost = (tp+tn+fp+fn) / tp / tp * gpt_cost

        return (human_cost_multiplier, fixed_gpt_cost)
            
prompt_costs_dict = {}
for prompt_num in [1,2]:
    print(f"Prompt {prompt_num}")
    prompt_costs_dict[prompt_num] = get_cost_for_prompt(prompt_num)
    print("\n")



Loading data from dev_off.csv
Loaded 500 rows
Hashed 500 rows
Prompt 1
Splitting data into folds
Generating prompt 1
No examples classified, rotating validation set
Generating prompt 1
430/460 examples remaining
Predicted  False  True 
Actual                 
False          2     19
True           1      8
Generating prompt 1
400/460 examples remaining
Predicted  False  True 
Actual                 
False         12     31
True           2     15
Generating prompt 1
370/460 examples remaining
Predicted  False  True 
Actual                 
False         25     41
True           5     19
Generating prompt 1
340/460 examples remaining
Predicted  False  True 
Actual                 
False         27     62
True           5     26
Generating prompt 1
310/460 examples remaining
Predicted  False  True 
Actual                 
False         30     80
True           8     32
Generating prompt 1
280/460 examples remaining
Predicted  False  True 
Actual                 
False         36     97
T

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Define the parameters for the first line
a1 = 2
b1 = 3

# Define the parameters for the second line
a2 = -1
b2 = 5

# Generate x values
x = np.linspace(-10, 10, 100)

# Calculate y values for the first line
y1 = prompt_costs_dict[1][0] * x + prompt_costs_dict[1][1]

# Calculate y values for the second line
y2 = prompt_costs_dict[2][0] * x + prompt_costs_dict[2][1]

# Create the plot
plt.plot(x, y1, label='prompt 1')
plt.plot(x, y2, label='prompt 2')

# Add labels and a legend
plt.xlabel('human cost per annotation')
plt.ylabel('total cost per true positive')
plt.legend()

# Display the plot
plt.show()
