In [1]:
%cd ~/RATER-D/results/ft

/home/daved/RATER-D/results/ft


In [2]:
from glob import glob
from pathlib import Path

import pandas as pd
import numpy as np

from sklearn.metrics import roc_auc_score, f1_score, confusion_matrix
from netcal.metrics import ECE

In [3]:
files = glob('*.csv')
model_names = [Path(file).stem for file in files]

In [4]:
res = []

for model_name in model_names:

    test = pd.read_csv(model_name + '.csv')
    test.head(3)
    
    y_true = test['target']
    preds = np.where(test['pred'] >= 5, 1, 0)
    probs = (test['pred'] - 1) / 6
    
    tn, fp, fn, tp = confusion_matrix(y_true, preds).ravel()
    auc = np.round(roc_auc_score(y_true, probs), 3)
    f1 = np.round(f1_score(y_true, preds), 3)
    macro_f1 = np.round(f1_score(y_true, preds, average = 'macro'), 3)
    
    n_bins = 7
    ece = np.round(ECE(bins = n_bins).measure(np.array(probs), np.array(y_true)), 3)
    
    metrics = {
        'model': model_name.replace('_', '/'),
        'tn': tn,
        'fp': fp,
        'fn': fn,
        'tp': tp,
        'f1': f1,
        'auc': auc,
        'macro_f1': macro_f1,
        'ece': ece
    }

    out = pd.DataFrame.from_dict(metrics, orient = 'index').transpose()
    res.append(out)
    
pd.concat(res).reset_index(drop = True).sort_values(['auc', 'macro_f1'], ascending = False)

Unnamed: 0,model,tn,fp,fn,tp,f1,auc,macro_f1,ece
0,gpt4o,25935,3254,1008,3107,0.593,0.893,0.759,0.245
12,unsloth/Qwen2.5-32B-Instruct-bnb-4bit,27325,1864,1507,2608,0.607,0.88,0.775,0.189
6,unsloth/Qwen2.5-72B-Instruct-bnb-4bit,27445,1744,1584,2531,0.603,0.878,0.773,0.213
3,unsloth/DeepSeek-R1-Distill-Qwen-32B-bnb-4bit,27215,1974,1570,2545,0.59,0.878,0.764,0.184
9,unsloth/llama-3-70b-Instruct-bnb-4bit,25808,3381,1132,2983,0.569,0.877,0.744,0.217
5,unsloth/Meta-Llama-3.1-70B-Instruct-bnb-4bit,26775,2414,1411,2704,0.586,0.872,0.76,0.199
10,unsloth/Llama-3.3-70B-Instruct-bnb-4bit,26084,3105,1314,2801,0.559,0.868,0.74,0.229
7,gpt4omini,26886,2303,1591,2524,0.565,0.861,0.749,0.228
11,unsloth/Qwen2.5-14B-Instruct-bnb-4bit,27463,1726,1810,2305,0.566,0.86,0.753,0.176
1,unsloth/phi-4-unsloth-bnb-4bit,26389,2800,1369,2746,0.568,0.86,0.748,0.232
