In [1]:
import json
import pandas as pd
import os
import numpy as np
from scipy.stats import pearsonr, kendalltau
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score

In [3]:
def get_eval_metrics(preds, labels):
    results = {}
    scaler = MinMaxScaler()
    scaled_labs = scaler.fit_transform(np.array(preds)[..., np.newaxis]).reshape(-1,)
    results['pearson'] = round(pearsonr(preds, labels)[0], 5)
    results['kendalltau'] = round(kendalltau(preds, labels)[0], 5)
    results['f1_macro'] = max([f1_score(labels, [int(x>y/100) for x in scaled_labs], average='macro') for y in range(0, 100, 1)])
    results['f1_binary'] = max([f1_score(labels, [int(x>y/100) for x in scaled_labs], average='binary') for y in range(0, 100, 1)])
    results['roc_auc'] = roc_auc_score(labels, preds)
    return results

In [5]:
files = [x for x in os.listdir('results/')]
for file in files:
    print(file)
    with open(f'results/{file}', 'r') as f:
        data = json.load(f)
    
    try:
        print(get_eval_metrics(data[0], data[1]))
        print(f'time: {data[-1]}')
        print('###############################\n\n')
    except:
        continue

sentli_large_results.json
{'pearson': -0.00503, 'kendalltau': -0.09583, 'f1_macro': 0.42670401493930904, 'f1_binary': 0.8534080298786181, 'roc_auc': 0.4222371514947934}
time: 12688.399463176727
###############################


flan_t5_xxl_results.json
{'pearson': 0.42646, 'kendalltau': 0.35351, 'f1_macro': 0.7177894911839777, 'f1_binary': 0.8617424242424243, 'roc_auc': 0.7868659724554922}
time: 4441.220121383667
###############################


flan_t5_large_results.json
{'pearson': 0.35256, 'kendalltau': 0.29159, 'f1_macro': 0.6691810344827587, 'f1_binary': 0.8566073102155576, 'roc_auc': 0.7366196394580674}
time: 884.8548352718353
###############################


rouge_results.json
bert_score_p_results.json
{'pearson': 0.12265, 'kendalltau': 0.08217, 'f1_macro': 0.568526945927766, 'f1_binary': 0.8571428571428572, 'roc_auc': 0.5666778636210951}
time: 4.222294569015503
###############################


flan_t5_base_results.json
{'pearson': 0.26936, 'kendalltau': 0.22876, 'f1_macro': 