In [336]:
%load_ext autoreload

%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [337]:
import numpy as np
import pandas as pd
import json
import utils
import itertools

from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

In [338]:
config_dicts = utils.get_config_dicts()

In [339]:
config_df = pd.DataFrame(config_dicts)

In [340]:
base_models = sorted(set(config_df["base_model"].values))
datasets = sorted(set(config_df["dataset"].values))
drop_scoring_base_bools = [False, True]

In [341]:
ensemble_configs = list(dict(zip(("base_model", "drop_scoring_base", "dataset"), p)) for p in itertools.product(base_models, drop_scoring_base_bools, datasets))

In [342]:
ensemble_configs

[{'base_model': 'EleutherAI-gpt-neo-125m',
  'drop_scoring_base': False,
  'dataset': 'xsum'},
 {'base_model': 'EleutherAI-gpt-neo-125m',
  'drop_scoring_base': True,
  'dataset': 'xsum'},
 {'base_model': 'gpt2', 'drop_scoring_base': False, 'dataset': 'xsum'},
 {'base_model': 'gpt2', 'drop_scoring_base': True, 'dataset': 'xsum'},
 {'base_model': 'gpt2-medium', 'drop_scoring_base': False, 'dataset': 'xsum'},
 {'base_model': 'gpt2-medium', 'drop_scoring_base': True, 'dataset': 'xsum'}]

In [333]:
def get_aucs(**kwargs):
    df = utils.get_pred_df(**kwargs)
    
    y = df["y"]
    X = df.drop("y", axis=1).values

    auc_dict = {}

    auc_dict["max"] = roc_auc_score(y, X.max(axis=1))
    auc_dict["mean"] =  roc_auc_score(y, X.mean(axis=1))
    auc_dict["median"] = roc_auc_score(y, np.median(X, axis=1))

    X_train, X_test, y_train, y_test = train_test_split(X, y)

    model_lr = LogisticRegression()
    model_lr.fit(X_train, y_train)
    auc_dict["lr"] = roc_auc_score(y_test, model_lr.predict_proba(X_test)[:, 1])

    model_rf = RandomForestClassifier()
    model_rf.fit(X_train, y_train)
    auc_dict["rf"] = roc_auc_score(y_test, model_rf.predict_proba(X_test)[:, 1])
    return auc_dict

In [322]:
df_ensemble_results_nested = pd.DataFrame([{**c, "aucs":get_aucs(**c)} for c in ensemble_configs])

In [323]:
df_ensemble_results = df_ensemble_results_nested.join(df_ensemble_results_nested["aucs"].apply(pd.Series)).drop("aucs", axis=1)

In [324]:
utils.get_auc_single().sort_values(["base_model", "scoring_model"])

Unnamed: 0,base_model,scoring_model,dataset,auc
0,EleutherAI-gpt-neo-125m,EleutherAI-gpt-neo-125m,xsum,0.9982
2,EleutherAI-gpt-neo-125m,gpt2,xsum,0.69605
1,EleutherAI-gpt-neo-125m,gpt2-medium,xsum,0.247875
6,gpt2,EleutherAI-gpt-neo-125m,xsum,0.93255
8,gpt2,gpt2,xsum,0.978875
7,gpt2,gpt2-medium,xsum,0.536425
3,gpt2-medium,EleutherAI-gpt-neo-125m,xsum,0.899875
5,gpt2-medium,gpt2,xsum,0.901775
4,gpt2-medium,gpt2-medium,xsum,0.9252


In [325]:
df_ensemble_results

Unnamed: 0,base_model,drop_scoring_base,dataset,max,mean,median,lr,rf
0,EleutherAI-gpt-neo-125m,False,xsum,0.7833,0.80465,0.732375,1.0,1.0
1,EleutherAI-gpt-neo-125m,True,xsum,0.31355,0.464975,0.464975,0.976437,0.969409
2,gpt2,False,xsum,0.8176,0.898925,0.890325,0.999597,0.997383
3,gpt2,True,xsum,0.5578,0.785875,0.785875,0.961412,0.948686
4,gpt2-medium,False,xsum,0.9252,0.929075,0.8994,0.942192,0.94159
5,gpt2-medium,True,xsum,0.89935,0.9115,0.9115,0.906463,0.863107
