In [39]:
import os
from datetime import datetime
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support,f1_score,accuracy_score

In [47]:
def evaluate(res_filename, labels = [0,1]):
  res=pd.read_csv(res_filename)
  res =res.rename(columns={"annotation":"gold"})
  # res = res.sample(20)
  if res_filename.split("/")[2].split("_")[0] == "CN" : 
      res["gold_formatted"] = res["gold"].map({"yes":1, "no":0})
  elif res_filename.split("/")[2].split("_")[0].startswith("SX"):
     res["gold_formatted"] = res["gold"].map({"Sexiste":1, "NonSexiste":0, "Sexiste-reportage":0})

  def format_answer(string: str):
    if string.lower() in ['oui', 'yes']:return 1
    elif string.lower() in ["non", "no"]:return 0
    else: return np.nan
      
  res["pred_formatted"] = res["pred"].apply(format_answer)
  res = res[res.pred_formatted.notna()]
  # print(res)
  conf_mtx = confusion_matrix(res["gold_formatted"], res["pred_formatted"], labels=labels) 
  acc = accuracy_score(res["gold_formatted"], res["pred_formatted"])
  prec,rec,f1,_ = precision_recall_fscore_support(res["gold_formatted"], res["pred_formatted"],labels=labels, zero_division=np.nan)
  macro = f1_score(res["gold_formatted"], res["pred_formatted"], average="macro",labels=labels, zero_division=np.nan)
  weight = f1_score(res["gold_formatted"], res["pred_formatted"], average="weighted",labels=labels, zero_division=np.nan)

  metrics = [acc, prec[1], rec[1], f1[1], macro, weight]
  metrics = [round(metric,2) for metric in metrics]
  # print(conf_mtx)
  # print(metrics)
  return res, conf_mtx, metrics

In [48]:
# directory = "test_flabT5/flanT5_large"
directory = "test_flabT5/flanT5_XL"
directory = "llmjudge_experiment/test4"

filenames= os.listdir(directory)
filenames = [filename for filename in filenames if filename.endswith("csv")]
conf_mtxs = []; metrics = []
for filename in filenames :
    # print(filename)
    eval = evaluate(res_filename=os.path.join(directory,filename))
    # print(filename); print(eval[1]); print(eval[2]); print() ## PRINT to PEEk
    conf_mtxs.append(eval[1])
    metrics.append(eval[2])

## Brief

In [49]:
dataset_map = {"SX":"sexism full", "CN":"conan", "SXM":"sexism most", "SXL":"sexism least"}
current_time = datetime.now().strftime("%d%m")
filenames = sorted(filenames)
with open (f"{directory}/brief_{current_time}.txt", "w") as file:
    file.write(f"Brief on... \n- Pipeline name : {directory.split('/',1)[1]} \n")
    file.write("------------------------------\n")
    for filename,conf_mtx,metric in zip(filenames,conf_mtxs,metrics):
        file.write(filename+"\n")
        # file.write(f"Prompting type : {(filename.split('_',2)[-1]).split('.')[0]}\n")
        file.write(f"""[tn, fp]    {conf_mtx[0]}
[fn, tp]    {conf_mtx[1]}
Acc     Prec    Rec     F1      F1m     F1w
{'    '.join(map(str, metric))}

""")
