# Logistic Combination Model

Example notebook for the BrainGPT project

## Load libraries

In [1]:
import numpy as np
import pandas as pd

from itertools import combinations
from sklearn.linear_model import LogisticRegression

## Prepare human-machine data

In [2]:
# Path to data directory
root_path = '../data/'

abstracts_fpath = 'testcases/BrainBench_GPT-4_v0.1.csv'

# List of classifiers to be analyzed
selected_LLMs = ['meta-llama--Llama-2-7b-chat-hf', 'meta-llama--Llama-2-13b-chat-hf', 'meta-llama--Llama-2-70b-chat-hf']

In [3]:
# Read human participants data
online_study = pd.read_csv(f"{root_path}human/data/participant_data.csv")

# Select GPT-4 generated abstracts
abstract_idx = online_study['journal_section'].str.startswith('machine')
online_study = online_study[abstract_idx]

# Extract DOI links from all test cases assessed by human participants
doi = pd.read_csv(f"{root_path}human/abstract_id_doi.csv")
doi = doi['DOI;abstract_id;abstract'].str.split(';', expand=True)[[0,1]]
doi.columns = ['doi', 'abstract_id']

# Extract DOI links from GPT-4 generated test cases
gpt4_doi = pd.read_csv(f"{root_path}testcases/BrainBench_GPT-4_v0.1.csv")

# Reorder human participants data based on the order of GPT-4 generated abstracts
gpt4_order = gpt4_doi.merge(doi, on='doi')['abstract_id'].astype(float)
online_study['abstract_id'] = pd.Categorical(online_study['abstract_id'], categories=gpt4_order, ordered=True)
online_study = online_study.sort_values('abstract_id')

In [4]:
# Initialize classification and confidence dataframes
classification = pd.DataFrame()
classification.loc[:,'abstract_id'] = np.array([np.where(gpt4_order==i)[0][0] for i in online_study['abstract_id']])
confidence = classification.copy()

# Set ground truth labels
order_labels = np.load(f"{root_path}machine/model_results/{selected_LLMs[0]}/llm_abstracts/labels.npy")
classification = classification.merge(pd.DataFrame(order_labels, columns=['true labels']), left_on='abstract_id', right_index=True)

# Set human classification and confidence
classification.loc[:,'Human'] = np.array([j if i == 1 else 1 - j for i, j in zip(online_study['correct'], classification['true labels'])])
confidence.loc[:,'Human'] = online_study['confidence'].values * (classification['Human'].values-0.5)*2

In [5]:
for i in selected_LLMs:
    
    # Read PPL scores of machine classifiers
    machine_PPL = np.load(f"{root_path}machine/model_results/{i}/llm_abstracts/PPL_A_and_B.npy")
    
    # Get classification results
    machine_name = i.lstrip('meta-llama--Llama-2-').rstrip('-chat-hf').upper()
    machine_classification = pd.DataFrame(np.argmin(machine_PPL, axis=1), columns=[machine_name])
    classification = classification.merge(machine_classification, left_on='abstract_id', right_index=True)
    
    # Define confidence as PPL difference
    machine_confidence = pd.DataFrame(pd.DataFrame(machine_PPL).diff(axis=1).iloc[:,1].values, columns=[machine_name])
    confidence = confidence.merge(machine_confidence, left_on='abstract_id', right_index=True)

In [6]:
# Get all possible combinations of classifiers
all_classifiers = confidence.columns[1:].values
all_combinations = []
for i in range(1, len(all_classifiers)+1):
    els = [list(x) for x in combinations(all_classifiers, i)]
    all_combinations.extend(els)
    
# Create a dataframe to store predictions
predictions = pd.DataFrame(columns=all_classifiers)
for i in range(len(all_combinations)):
    for element in all_classifiers:
        predictions.loc[i, element] = element in all_combinations[i]

## Run logistic combination model

In [7]:
# Leave-one-out cross-validation
for i in range(len(all_combinations)):
    
    tmp_pred = pd.DataFrame()
    
    for j in range(len(machine_PPL)):
        
        # Train/test data for current fold
        X_train = confidence[confidence['abstract_id']!=j]
        y_train = classification[classification['abstract_id']!=j]['true labels'].values
        
        X_test = confidence[confidence['abstract_id']==j]
        y_test = classification[classification['abstract_id']==j]
    
        if i < len(all_classifiers):
            
            # Get prediction accuracy of single classifiers
            acc = y_test[all_classifiers[i]].values == y_test['true labels'].values
            
            # Save prediction accuracy 
            tmp_pred = pd.concat([tmp_pred, pd.DataFrame({'Accuracy': acc})])
            
        else:
            
            # Train logistic regression model
            clf = LogisticRegression(random_state=1).fit(X_train[all_combinations[i]], y_train)
            
            # Get prediction accuracy for different teams
            acc = clf.predict(X_test[all_combinations[i]]) == y_test['true labels'].values
            
            # Save prediction accuracy 
            tmp_pred = pd.concat([tmp_pred, pd.DataFrame({'Accuracy': acc})])
    
    predictions.loc[i, 'Accuracy'] = tmp_pred.mean().values
           
# Save predictions 
predictions.to_csv(f"../results/Logistic_predictions.csv", index=False)
predictions

Unnamed: 0,Human,7B,13B,70B,Accuracy
0,True,False,False,False,0.697813
1,False,True,False,False,0.725646
2,False,False,True,False,0.725646
3,False,False,False,True,0.745527
4,True,True,False,False,0.785288
5,True,False,True,False,0.803181
6,True,False,False,True,0.795229
7,False,True,True,False,0.765408
8,False,True,False,True,0.741551
9,False,False,True,True,0.755467
