In [1]:
from openai import OpenAI
from config import *
import jsonlines
import numpy as np
from tqdm import tqdm, trange
from sklearn.manifold import TSNE
import pandas as pd
import matplotlib.pyplot as plt
# batched using threads
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from concurrent.futures import ThreadPoolExecutor
# sklearn confusion matrix
from sklearn.metrics import confusion_matrix

In [2]:
def read_jsonl(file_path):
    with jsonlines.open(file_path) as reader:
        data = [obj for obj in reader]
    return data

def read_npz(file_path):
    return np.load(file_path)['arr_0']

def get_datasets(meta, topic, model='Mistral-7B-Instruct-v0.2'):
    if meta == 'mmlu':
        train_path = f'datasets/mmlu/{topic}/{model}-train.jsonl'
        test_path = f'datasets/mmlu/{topic}/{model}-test.jsonl'

        train_emb_path = f'datasets/mmlu/{topic}/embedding-train.npz'
        test_emb_path = f'datasets/mmlu/{topic}/embedding-test.npz'

        train_ds, test_ds = read_jsonl(train_path), read_jsonl(test_path)
        train_emb, test_emb = read_npz(train_emb_path), read_npz(test_emb_path)


    elif meta == 'anthropic':
        train_path = f'datasets/anthropic-eval/{topic}/{model}-train.jsonl'
        test_path = f'datasets/anthropic-eval/{topic}/{model}-test.jsonl'

        train_emb_path = f'datasets/anthropic-eval/{topic}/embedding-train.npz'
        test_emb_path = f'datasets/anthropic-eval/{topic}/embedding-test.npz'

        train_ds, test_ds = read_jsonl(train_path), read_jsonl(test_path)
        train_emb, test_emb = read_npz(train_emb_path), read_npz(test_emb_path)
    
    else:
        raise ValueError(f"Unknown meta: {meta}")
    
    train_ds, test_ds = get_embedding_strs(train_ds, model, meta), get_embedding_strs(test_ds, model, meta)


    return train_ds, test_ds, train_emb, test_emb


no_to_letter = {0: 'A', 1: 'B', 2: 'C', 3: 'D'} #  0-3: A-D, ALL OTHERS: N

def format_single_entry_mmlu(entry, model):
    q, choice, gt, ans = entry['question'], entry['choices'], entry['answer'], entry[model]['answer']

    choice_str = '\n'.join([f'{no_to_letter[i]}: {c}' for i, c in enumerate(choice)])

    return {'question': f'Question: {q}\nChoices:\n{choice_str}', 'correctness' :(gt == ans)}

def format_single_entry_anthropic(entry):
    q = entry['question']
    return q

def get_embedding_strs(ds, model, meta):
    if meta == 'mmlu':
        return [format_single_entry_mmlu(entry, model) for entry in ds]
    elif meta == 'anthropic':
        return [format_single_entry_anthropic(entry, model) for entry in ds]
    else:
        raise ValueError(f"Unknown meta: {meta}")
    
def preprocess_ds(meta, topic, base='Mistral-7B-Instruct-v0.2'):
    train_ds, test_ds = get_datasets(meta, topic, base)
    train_strs, test_strs = get_embedding_strs(train_ds, meta), get_embedding_strs(test_ds, meta)
    return train_strs, test_strs


# Regression and plottings

In [3]:
def logistic_reg(train_emb, train_label):
    clf = LogisticRegression(random_state=0, max_iter=1000).fit(train_emb, train_label)
    return clf

def get_accuracy(clf, test_emb, test_label):
    pred = clf.predict(test_emb)
    return accuracy_score(test_label, pred)

def get_confusion_matrix(clf, test_emb, test_label):
    pred = clf.predict(test_emb)
    return confusion_matrix(test_label, pred)

def evaluate_reg(train_emb, train_label, test_emb, test_label):
    clf = logistic_reg(train_emb, train_label)
    return get_accuracy(clf, test_emb, test_label)

def knn(train_emb, train_label, test_emb, test_label, k=5):
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(train_emb, train_label)
    return get_accuracy(knn, test_emb, test_label)


def get_oracle_acc(test_label):
    return max(np.mean(test_label), 1 - np.mean(test_label))

def plot_tsne(embeddings, labels):
    tsne = TSNE(n_components=2, random_state=0)
    X_2d = tsne.fit_transform(embeddings)
    target_ids = range(len(labels))

    plt.figure(figsize=(6, 5))
    # use greish aethestically pleasing colors
    colors = ['tab:blue', 'tab:orange']
    correctness = ['Incorrect', 'Correct']
    for i, c in zip(target_ids, colors):
        plt.scatter(X_2d[labels == i, 0], X_2d[labels == i, 1], c=c, label=correctness[i])
    plt.legend()
    plt.show()

def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """

    # add label text (number) on each block


    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    fig, ax = plt.subplots()
    cax = ax.matshow(cm, cmap='Blues')
    fig.colorbar(cax)
    ax.set_xticklabels([''] + ['F', 'T'])
    ax.set_yticklabels([''] + ['F', 'T'])
    for (i, j), val in np.ndenumerate(cm):
        ax.text(j, i, f'{val}', ha='center', va='center')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title(f'Confusion Matrix')
    plt.show()

# Setup

In [4]:
meta = 'mmlu'
topics = ['machine_learning',
          'high_school_physics',
          'high_school_world_history',
          'high_school_chemistry',
          'college_biology',
          'high_school_mathematics'
          ]

topic = topics[1]
models = [ 'Mistral-7B-Instruct-v0.2',
          'Mixtral-8x7B-Instruct-v0.1',
          'Llama-2-13b-chat-hf',
          'Llama-2-70b-chat-hf',
          'gpt-3.5-turbo-1106']

# Log reg eval w/ oracle baseline

## Constant Oracle accuracies

In [6]:
df = pd.DataFrame(columns=['topic', 'model', 'oracle_acc', 'model_acc', 'logistic_reg_acc'])
for topic in tqdm(topics):
    for model in models:
        train_ds, test_ds, train_emb, test_emb = get_datasets(meta, topic, model)
        train_labels = np.array([entry['correctness'] for entry in train_ds], dtype=np.int32)
        test_labels = np.array([entry['correctness'] for entry in test_ds], dtype=np.int32)

        oracle_acc = get_oracle_acc(test_labels)
        model_acc = np.mean(test_labels)
        logistic_reg_acc = evaluate_reg(train_emb, train_labels, test_emb, test_labels)
        knn_acc = knn(train_emb, train_labels, test_emb, test_labels)
        df = df.append({'topic': topic, 'model': model, 'oracle_acc': oracle_acc, 'model_acc': model_acc, 'knn_accuracy' :knn_acc, 'logistic_reg_acc': logistic_reg_acc}, ignore_index=True)

In [7]:
# save to csv 
df.to_csv('results.csv', index=False)

In [6]:
# plot_tsne(test_emb, test_labels)

In [7]:
# plot_confusion_matrix(get_confusion_matrix(logistic_reg(train_emb, train_labels), test_emb, test_labels), ['Incorrect', 'Correct'], normalize=False)