# Ensemble Self-Selection
This notebook allows custom ensembles to be constructed from our trained models, for experimentation.

## Imports & Settings

In [1]:
# Update working directory to parent so that we may use our custom functions
import os
os.chdir('..')
# os.getcwd( )

In [2]:
import re
import itertools
import pandas as pd
import matplotlib.pyplot as plt

from ast import literal_eval
from datasets import load_from_disk
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score

def most_common(lst):
    return max(set(lst), key=lst.count)

## Load Test Data

In [3]:
# load test data
dataset_path = "data/target_iSarcasmEval/itesd_iSarcasmEval_balanced.hf"
datasets = load_from_disk(dataset_path)
iSarcasm_test_df = datasets['test'].to_pandas()

true_preds = iSarcasm_test_df['label'].to_list()

In [4]:
print(len(true_preds))
print(type(true_preds))

1400
<class 'list'>


## Load Model Results Dataset

In [5]:
results_df = pd.read_csv('05_results/results_target.csv', converters={'predictions': pd.eval})
results_df.head()

Unnamed: 0,model_name,model_epoch,test_accuracy,test_f1,predictions
0,control_iSarcasm_01,E01_A0.75_F0.41,0.835714,0.454976,"[0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,control_iSarcasm_01,E02_A0.77_F0.39,0.825714,0.452915,"[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ..."
2,control_iSarcasm_01,E03_A0.75_F0.45,0.784286,0.430189,"[0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ..."
3,control_iSarcasm_01,E04_A0.76_F0.5,0.757857,0.402116,"[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ..."
4,control_iSarcasm_01,E05_A0.74_F0.45,0.768571,0.408759,"[0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ..."


In [6]:
results_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 210 entries, 0 to 209
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   model_name     210 non-null    object 
 1   model_epoch    210 non-null    object 
 2   test_accuracy  210 non-null    float64
 3   test_f1        210 non-null    float64
 4   predictions    210 non-null    object 
dtypes: float64(2), object(3)
memory usage: 8.3+ KB


## Filter Dataset
Below, we filter the dataset to retrieve only the best models by F1 score.

In [7]:
idx = results_df.groupby(['model_name'])['test_f1'].transform(max) == results_df['test_f1']

f1_estimators_df = results_df[idx]


patternDel = ".*control.*"
filter = f1_estimators_df['model_name'].str.contains(patternDel)

f1_estimators_df = f1_estimators_df[~filter]
f1_estimators_df

Unnamed: 0,model_name,model_epoch,test_accuracy,test_f1,predictions
36,target-iSarcasm_inter-SARC_01,E07_A0.77_F0.45,0.810714,0.455852,"[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, ..."
43,target-iSarcasm_inter-SARC_02,E04_A0.77_F0.4,0.805,0.457256,"[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ..."
53,target-iSarcasm_inter-SARC_03,E04_A0.8_F0.4,0.839286,0.494382,"[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
68,target-iSarcasm_inter-XED-binary_01,E09_A0.74_F0.32,0.812857,0.471774,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, ..."
72,target-iSarcasm_inter-XED-binary_02,E03_A0.73_F0.38,0.769286,0.434326,"[0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ..."
82,target-iSarcasm_inter-XED-binary_03,E03_A0.76_F0.37,0.831429,0.458716,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
99,target-iSarcasm_inter-XED-fine_01,E10_A0.77_F0.47,0.805,0.443992,"[0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, ..."
109,target-iSarcasm_inter-XED-fine_02,E10_A0.77_F0.45,0.795714,0.43254,"[0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
111,target-iSarcasm_inter-XED-fine_03,E02_A0.77_F0.47,0.782857,0.419847,"[0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, ..."
129,target-iSarcasm_inter-IMDB_01,E10_A0.73_F0.44,0.730714,0.370618,"[0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ..."


In [8]:
f1_estimators_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 18 entries, 36 to 202
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   model_name     18 non-null     object 
 1   model_epoch    18 non-null     object 
 2   test_accuracy  18 non-null     float64
 3   test_f1        18 non-null     float64
 4   predictions    18 non-null     object 
dtypes: float64(2), object(3)
memory usage: 864.0+ bytes


## Self-Select Estimators

In [None]:
f1_estimators_df.loc[f1_estimators_df['model_name']=='target-iSarcasm_inter-XED-binary_01', 'predictions'].tolist()[0]

In [None]:
# define particular estimators 
estimators = ['target-iSarcasm_inter-XED-binary_01', 
              'target-iSarcasm_inter-XED-binary_02', 
              'target-iSarcasm_inter-XED-binary_03',
              'target-iSarcasm_inter-SARC_01', 
              'target-iSarcasm_inter-SARC_02', 
              'target-iSarcasm_inter-SARC_03',
              'target-iSarcasm_inter-hellaswag_01', 
              'target-iSarcasm_inter-hellaswag_02', 
              'target-iSarcasm_inter-hellaswag_03']

# get predictions of the defined estimators
predictions =  [f1_estimators_df.loc[f1_estimators_df['model_name']==m, 'predictions'].tolist()[0] for m in estimators]

print(estimators)
print(len(predictions))

In [None]:
pred_groups = []
for i in range(len(true_preds)):
    pred_stage = []
    for prediction in predictions:
        pred_stage.append(prediction[i])
    pred_groups.append(pred_stage)

In [None]:
for i in range(20):    
    pred_index = i
    the_list = pred_groups[pred_index]
    prediction = max(set(the_list), key=the_list.count)

    if prediction == true_preds[pred_index]:
        print(prediction, true_preds[pred_index])
    else:
        print(prediction, true_preds[pred_index], iSarcasm_df['text'].iloc[i])

In [None]:
ensemble_preds = []

for i in pred_groups:
    ensemble_preds.append(most_common(i))

In [None]:
acc = accuracy_score(true_preds, ensemble_preds)
f1 = f1_score(true_preds, ensemble_preds, average='binary')

print(acc)
print(f1)