# Model Evaluation

This notebook is used to evaluate our control,  target, intermediate model epochs on their respective test sets. The resulting data is stored to 05_results/results_target.csv and 05_results/results_intermediate.csv depending on the test.

## Imports & Settings

In [1]:
# Update working directory to parent so that we may use our custom functions
import os
os.chdir('..')
# os.getcwd( )

In [2]:
from utils_eval import *
from datasets import load_from_disk

## Generate Results CSV

In [None]:
# generate_results = {'model_name': [], 'model_epoch': [], 'test_accuracy': [], 'test_f1': [], 'predictions':[]}
# generate_results_df = pd.DataFrame(data=generate_results)

# generate_results_df.to_csv("05_results/results_intermediate.csv", index=False)

## control_iSarcasm

In [None]:
# load test data
dataset_path = "data/target_iSarcasmEval/itesd_iSarcasmEval_balanced.hf"
datasets = load_from_disk(dataset_path)
iSarcasm_test_df = datasets['test'].to_pandas()

# get model paths
top_model_dirs = ["model_saves/control_iSarcasm_01","model_saves/control_iSarcasm_02","model_saves/control_iSarcasm_03"]

In [None]:
for top in top_model_dirs:
    models = parse_model_dir(top)
    evaluate_model(iSarcasm_test_df, models, 2, '05_results/results_target.csv')

## intermediate_XED_binary


In [None]:
# load test data
dataset_path = "data/inter_XED/itesd_xed_binary_balanced.hf"
datasets = load_from_disk(dataset_path)
xed_binary_test_df = datasets['test'].to_pandas()

# get model paths
top_model_dirs = ["model_saves/intermediate_XED_binary_01"]

In [None]:
for top in top_model_dirs:
    models = parse_model_dir(top)
    evaluate_model(xed_binary_test_df, models, 2, '05_results/results_intermediate.csv')

## intermediate_XED_fine


In [None]:
# load test data
dataset_path = "data/inter_XED/itesd_xed_fine_balanced.hf"
datasets = load_from_disk(dataset_path)
xed_fine_test_df = datasets['test'].to_pandas()

# get model paths
top_model_dirs = ["model_saves/intermediate_XED_fine_01"]

In [None]:
for top in top_model_dirs:
    models = parse_model_dir(top)
    evaluate_model(xed_fine_test_df, models, 8, '05_results/results_intermediate.csv')

## intermediate_SARC


In [None]:
# load test data
dataset_path = "data/inter_SARC/itesd_sarc_balanced.hf"
datasets = load_from_disk(dataset_path)
sarc_test_df = datasets['test'].to_pandas()

# get model paths
top_model_dirs = ["model_saves/intermediate_SARC_01"]

In [None]:
for top in top_model_dirs:
    models = parse_model_dir(top)
    evaluate_model(sarc_test_df, models, 2, 'results/results_intermediate.csv')

## intermediate_IMDB


In [None]:
# load test data
dataset_path = "data/inter_IMDB_sentiment/itesd_imdb_balanced.hf"
datasets = load_from_disk(dataset_path)
imdb_test_df = datasets['test'].to_pandas()

# get model paths
top_model_dirs = ["model_saves/intermediate_IMDB_01"]

In [None]:
for top in top_model_dirs:
    models = parse_model_dir(top)
    evaluate_model(imdb_test_df, models, 2, 'results/results_intermediate.csv')

## intermediate_hellaswag


In [3]:
# load test data
hellaswag_datasets = load_from_disk("data/inter_HellaSwag/itesd_hellaswag_balanced.hf")

# get model paths
top_model_dirs = ["model_saves/intermediate_HellaSwag_01"]

In [None]:
for top in top_model_dirs:
    models = parse_model_dir(top)
    evaluate_mc_model(hellaswag_datasets, models, 4, '05_results/results_intermediate.csv')

## intermediate_cosmosQA


In [None]:
# load test data
cosmos_datasets = load_from_disk("data/inter_cosmosqa/itesd_cosmosqa_balanced.hf")

# get model paths
top_model_dirs = ["model_saves/intermediate_CosmosQA_01"]

In [None]:
for top in top_model_dirs:
    models = parse_model_dir(top)
    evaluate_mc_model(cosmos_datasets, models, 4, '05_results/results_intermediate.csv')

## target_iSarcasm

In [None]:
# load test data
dataset_path = "data/target_iSarcasmEval/itesd_iSarcasmEval_balanced.hf"
datasets = load_from_disk(dataset_path)
iSarcasm_test_df = datasets['test'].to_pandas()

# get model paths
top_model_dirs = ["model_saves/target-iSarcasm_inter-XED-fine_01",
                  "model_saves/target-iSarcasm_inter-XED-fine_02",
                  "model_saves/target-iSarcasm_inter-XED-fine_03"]

In [None]:
for top in top_model_dirs:
    models = parse_model_dir(top)
    evaluate_model(iSarcasm_test_df, models, 2, '05_results/results_target.csv')

# Results Preview

In [None]:
results_df = pd.read_csv('05_results/results_target.csv')
results_df.head(40)

In [None]:
results_df.describe()

In [None]:
results_df.loc[results_df['model_name'] == "target-iSarcasm_inter-XED-fine_01"]


In [None]:
inter_results_df = pd.read_csv('05_results/results_intermediate.csv')
inter_results_df.head(60)

In [None]:
filtered = inter_results_df.loc[inter_results_df['model_name'] == "intermediate_XED_fine_01"]
# filtered['test_f1'].idxmax()
max_f1 = inter_results_df.iloc[filtered['test_f1'].idxmax()]
max_f1