In [3]:
import pandas as pd
import json
import scipy.stats as stats

In [4]:
models = ["mbert","mbert-ro-bio","mbert-nl-clin", "mbert-nl-ro"]
tasks = ["NER"]
num_folds = 10 # number of cross validation folds
results = []

# append all the cross-validation results to a joint dataframe 
for model in models:
  for task in tasks:
    for fold in range(num_folds):
        with open(f'{model}/predict_fold_{fold}_results.json') as f:
            fold_result = json.load(f)
            result = {
              'model': model,
              'task': task,
              'fold': fold,
              'overall_f1': round(fold_result['predict_overall_f1'], 3),
              'overall_precision': fold_result['predict_overall_precision'],
              'overall_recall': fold_result['predict_overall_recall'],
              'ANAT_precision': fold_result['predict_ANAT_precision'],
              'ANAT_recall': fold_result['predict_ANAT_recall'],
              'ANAT_f1': fold_result['predict_ANAT_f1'],
              'CHEM_precision': fold_result['predict_CHEM_precision'],
              'CHEM_recall': fold_result['predict_CHEM_recall'],
              'CHEM_f1': fold_result['predict_CHEM_f1'],
              'DISO_precision': fold_result['predict_DISO_precision'],
              'DISO_recall': fold_result['predict_DISO_recall'],
              'DISO_f1': fold_result['predict_DISO_f1'],
              'PROC_precision': fold_result['predict_PROC_precision'],
              'PROC_recall': fold_result['predict_PROC_recall'],
              'PROC_f1': fold_result['predict_PROC_f1'],
            }
            results.append(result)
result_df = pd.DataFrame(results)
result_df
# result_df[result_df['model'].isin(['Clinc_nl_ro_3', 'Clinc_nl_ro_6'])]

Unnamed: 0,model,task,fold,overall_f1,overall_precision,overall_recall,ANAT_precision,ANAT_recall,ANAT_f1,CHEM_precision,CHEM_recall,CHEM_f1,DISO_precision,DISO_recall,DISO_f1,PROC_precision,PROC_recall,PROC_f1
0,mbert,NER,0,0.696,0.677165,0.716667,0.6875,0.846154,0.758621,0.693333,0.776119,0.732394,0.71875,0.676471,0.69697,0.0,0.0,0.0
1,mbert,NER,1,0.743,0.729927,0.757576,0.363636,0.615385,0.457143,0.803571,0.818182,0.810811,0.807018,0.807018,0.807018,0.5,0.142857,0.222222
2,mbert,NER,2,0.751,0.72807,0.775701,0.740741,0.869565,0.8,0.730159,0.851852,0.786325,0.73913,0.68,0.708333,0.0,0.0,0.0
3,mbert,NER,3,0.712,0.719697,0.703704,0.791667,0.730769,0.76,0.672727,0.804348,0.732673,0.729167,0.714286,0.721649,0.8,0.285714,0.421053
4,mbert,NER,4,0.693,0.668966,0.718519,0.740741,0.769231,0.754717,0.688525,0.807692,0.743363,0.592593,0.727273,0.653061,1.0,0.230769,0.375
5,mbert,NER,5,0.77,0.746154,0.795082,0.782609,0.9,0.837209,0.833333,0.877193,0.854701,0.636364,0.823529,0.717949,0.333333,0.090909,0.142857
6,mbert,NER,6,0.75,0.707317,0.798165,0.741935,0.766667,0.754098,0.836364,0.901961,0.867925,0.529412,0.642857,0.580645,0.0,0.0,0.0
7,mbert,NER,7,0.766,0.732759,0.801887,0.55,0.846154,0.666667,0.781818,0.914894,0.843137,0.815789,0.738095,0.775,0.0,0.0,0.0
8,mbert,NER,8,0.752,0.721854,0.784173,0.736842,0.7,0.717949,0.739726,0.857143,0.794118,0.701754,0.784314,0.740741,0.5,0.2,0.285714
9,mbert,NER,9,0.718,0.699115,0.738318,0.833333,0.882353,0.857143,0.846154,0.785714,0.814815,0.584906,0.756098,0.659574,0.0,0.0,0.0


In [7]:
entity_type = ["ANAT", "CHEM", "DISO", "PROC"]

records = []

for model in result_df["model"].unique():
    for entity in entity_type:
        sub_df = result_df[result_df["model"] == model]

        avg_f1 = sub_df[f"{entity}_f1"].mean()
        std_f1 = sub_df[f"{entity}_f1"].std()
        
        avg_precision = sub_df[f"{entity}_precision"].mean()
        std_precision = sub_df[f"{entity}_precision"].std()
        
        avg_recall = sub_df[f"{entity}_recall"].mean()
        std_recall = sub_df[f"{entity}_recall"].std()

        records.append({
            "model": model,
            "enetity": entity,
            "avg_f1": avg_f1,
            "avg_precision": avg_precision,
            "avg_recall": avg_recall
        })


summary_df = pd.DataFrame(records)
summary_df
        

Unnamed: 0,model,enetity,avg_f1,avg_precision,avg_recall
0,mbert,ANAT,0.736355,0.6969,0.792628
1,mbert,CHEM,0.798026,0.762571,0.83951
2,mbert,DISO,0.706094,0.685488,0.734994
3,mbert,PROC,0.144685,0.313333,0.095025
4,mbert-ro-bio,ANAT,0.744398,0.710374,0.794796
5,mbert-ro-bio,CHEM,0.828988,0.795111,0.866523
6,mbert-ro-bio,DISO,0.756289,0.748553,0.767768
7,mbert-ro-bio,PROC,0.197433,0.331667,0.14654
8,mbert-nl-clin,ANAT,0.699687,0.666545,0.740079
9,mbert-nl-clin,CHEM,0.778711,0.736811,0.827697
