In [9]:
import pandas as pd
import os
from os import listdir
from os.path import isfile, join, isdir
import yaml
import datetime

def get_fold_datetimes_lists(list_of_artifacts_txt):
    list_of_fold = []
    list_of_datetimes = []
    for artifact in list_of_artifacts_txt:
        # print(artifact)
        index = artifact.find("2021")
        index2 = artifact.find("Fold")
        fold = int(artifact[index2+4:index])
        date_time_str = artifact[index:-4]
        date_time_obj = datetime.datetime.strptime(date_time_str, '%Y-%m-%d %H:%M:%S.%f')
        # print(f"fold = {fold}, date_time = {date_time_obj}")
        list_of_fold.append(fold)
        list_of_datetimes.append(date_time_obj)
    return list_of_fold, list_of_datetimes

def get_fold_index(list_of_fold, fold):
    indices = [i for i, x in enumerate(list_of_fold) if x == fold]
    return indices

def get_report_df(report_path):
    df = pd.read_csv(report_path, delimiter = r"\s{2,}", engine="python")
    df.drop(['micro avg', 'macro avg', 'weighted avg', 'samples avg'], inplace=True)
    df['class']= df.index
    df.drop(columns = ["precision","recall"], inplace=True)
    df.reset_index(drop=True, inplace=True)
    return df

def compare_dates_and_store_reports_to_df(indices, list_of_datetimes, df_GMTL, df_MTL):
    if list_of_datetimes[indices[0]] < list_of_datetimes[indices[1]]:
        MTL_index, GMTL_index = 0, 1
    else:
        MTL_index, GMTL_index = 1, 0

    # GMTL
    report_path = f"{ARTIFACT_DIR}/{list_of_artifacts_txt[indices[GMTL_index]]}"
    if df_GMTL.empty:
        df_GMTL = get_report_df(report_path)
    else:
        df_GMTL = df_GMTL.append(get_report_df(report_path))
    print(f"[report] {list_of_artifacts_txt[indices[GMTL_index]]} added to GMTL")
    
    # MTL
    report_path = f"{ARTIFACT_DIR}/{list_of_artifacts_txt[indices[MTL_index]]}"
    if df_MTL.empty:
        df_MTL = get_report_df(report_path)
    else:
        df_MTL = df_MTL.append(get_report_df(report_path))
    print(f"[report] {list_of_artifacts_txt[indices[MTL_index]]} added to MTL")
    
    return df_GMTL, df_MTL

def make_f1_table(df_GMTL, name):
    df_GMTL_mean = df_GMTL.groupby(['class']).mean().add_suffix('_mean').reset_index()
    df_GMTL_std = df_GMTL.groupby(['class']).std().add_suffix('_std').reset_index().drop(columns=['class', 'support_std'])

    result = pd.concat([df_GMTL_mean, df_GMTL_std], axis=1)
    #${79.86\pm4.16}$
    result[f'f1-score({name})'] = result.apply(lambda row: f"${{{round(row['f1-score_mean']*100,2)}\pm{round(row['f1-score_std']*100,2)}}}$", axis=1)
    total = result.support_mean.sum()
    print("total=", total)
    result['count(%)'] = result.support_mean.apply(lambda x : f"{x}({round((x/total)*100,1)})")
    result.drop(columns = ['f1-score_mean', 'f1-score_std', 'support_mean'], inplace = True)
    return result

# DIR_TO_CHECK = "../mlruns/1"
DIR_TO_CHECK = "../mlruns/2"

runs = [f for f in listdir(DIR_TO_CHECK) if isdir(join(DIR_TO_CHECK, f))]
TTEST_DIR = []
for DIR in runs:
    run_dir = f"{DIR_TO_CHECK}/{DIR}"
        # print(run_dir)
    f = open(f"{run_dir}/tags/mlflow.runName", "r")
    runname = f.readline()
    # print(runname[:5])
    if runname[:5] == 'TTEST':
        print(f"{run_dir}/tags/{runname}")
        print(runname)
        if runname == "TTEST_BioBERT" and DIR == "1d7533e948cf4e6289725fad4ab9ad1f":
        # if runname != "TTEST_BlueBERT":
        
            print("Found it!")
        else:
            continue
    else:
        continue
    ARTIFACT_DIR = f"{run_dir}/artifacts"
    list_of_artifacts_txt = [f for f in listdir(ARTIFACT_DIR) if isfile(join(ARTIFACT_DIR, f))]

    list_of_fold, list_of_datetimes = get_fold_datetimes_lists(list_of_artifacts_txt)
    
    latex_table = pd.DataFrame(columns=['class name', 'GMTL (f1-score)', 'MTL (f1-score)', 'count(%)'])

    df_GMTL, df_MTL = pd.DataFrame(), pd.DataFrame()
    for fold_i in range(1,11):
        #MTL is the first one, GMTL is the second one
        print(f"====================fold {fold_i}====================")
        indices = get_fold_index(list_of_fold, fold_i)

        df_GMTL, df_MTL = compare_dates_and_store_reports_to_df(indices, list_of_datetimes, df_GMTL, df_MTL)

    #-----now we have all the reports in df_GMTL and df_MTL
    result1 = make_f1_table(df_GMTL, "GMTL")
    result1.drop(columns=['count(%)'], inplace= True)
    result2 = make_f1_table(df_MTL, "MTL")
    result2.drop(columns=['class', ], inplace= True)
    result = pd.concat([result1, result2], axis=1)
    print("Done")
    break


../mlruns/2/aaf22b68231f4f30abaf48963c30f0c5/tags/TTEST_BioBERT
TTEST_BioBERT
../mlruns/2/89922d08fcb7410eb05a4e1e16cf9e3c/tags/TTEST_BlueBERT
TTEST_BlueBERT
../mlruns/2/3ffd6cc084f04ceaacc6112da6f278a2/tags/TTEST_bertuncased
TTEST_bertuncased
../mlruns/2/41dfea55249f4264988a3d632015abee/tags/TTEST_BioBERT
TTEST_BioBERT
../mlruns/2/1d7533e948cf4e6289725fad4ab9ad1f/tags/TTEST_BioBERT
TTEST_BioBERT
Found it!
[report] cls_report.Fold12021-03-27 11:06:21.925478.txt added to GMTL
[report] cls_report.Fold12021-03-27 10:25:35.119512.txt added to MTL
[report] cls_report.Fold22021-03-27 11:55:47.402520.txt added to GMTL
[report] cls_report.Fold22021-03-27 11:15:24.216188.txt added to MTL
[report] cls_report.Fold32021-03-27 12:45:19.865868.txt added to GMTL
[report] cls_report.Fold32021-03-27 12:04:47.585014.txt added to MTL
[report] cls_report.Fold42021-03-27 13:34:51.351637.txt added to GMTL
[report] cls_report.Fold42021-03-27 12:54:23.795743.txt added to MTL
[report] cls_report.Fold52021-03-2

In [10]:
print(result.to_latex(index=False))

\begin{tabular}{llll}
\toprule
class &  f1-score(GMTL) &   f1-score(MTL) &  count(\%) \\
\midrule
  C01 & \$\{79.5\textbackslash pm1.35\}\$ & \$\{79.5\textbackslash pm2.42\}\$ &  102(4.5) \\
  C02 &  \$\{62.6\textbackslash pm5.4\}\$ &  \$\{53.7\textbackslash pm4.0\}\$ &   47(2.1) \\
  C03 & \$\{92.0\textbackslash pm2.94\}\$ & \$\{87.9\textbackslash pm5.11\}\$ &   17(0.7) \\
  C04 & \$\{82.4\textbackslash pm1.51\}\$ &  \$\{84.9\textbackslash pm1.1\}\$ & 253(11.1) \\
  C05 & \$\{64.1\textbackslash pm2.92\}\$ & \$\{65.0\textbackslash pm2.75\}\$ &   67(2.9) \\
  C06 & \$\{78.2\textbackslash pm1.14\}\$ & \$\{78.9\textbackslash pm1.66\}\$ &  120(5.3) \\
  C07 & \$\{75.8\textbackslash pm3.79\}\$ & \$\{70.3\textbackslash pm4.03\}\$ &   21(0.9) \\
  C08 & \$\{73.3\textbackslash pm3.02\}\$ & \$\{72.7\textbackslash pm2.11\}\$ &  104(4.6) \\
  C09 & \$\{72.4\textbackslash pm2.67\}\$ & \$\{73.7\textbackslash pm3.53\}\$ &   29(1.3) \\
  C10 & \$\{68.2\textbackslash pm2.04\}\$ & \$\{68.2\textbackslas

In [185]:
print(result.to_latex(index=False))

\begin{tabular}{llll}
\toprule
class &  f1-score(GMTL) &   f1-score(MTL) &  count(\%) \\
\midrule
  C01 & \$\{80.1\textbackslash pm1.37\}\$ & \$\{76.4\textbackslash pm3.37\}\$ &  102(4.5) \\
  C02 & \$\{64.2\textbackslash pm6.68\}\$ & \$\{57.9\textbackslash pm4.48\}\$ &   47(2.1) \\
  C03 & \$\{91.5\textbackslash pm3.34\}\$ & \$\{89.6\textbackslash pm3.86\}\$ &   17(0.7) \\
  C04 & \$\{82.9\textbackslash pm1.97\}\$ & \$\{84.4\textbackslash pm0.97\}\$ & 253(11.1) \\
  C05 & \$\{63.6\textbackslash pm2.91\}\$ & \$\{65.8\textbackslash pm2.25\}\$ &   67(2.9) \\
  C06 & \$\{77.7\textbackslash pm1.34\}\$ & \$\{79.0\textbackslash pm1.76\}\$ &  120(5.3) \\
  C07 & \$\{77.0\textbackslash pm4.27\}\$ & \$\{72.8\textbackslash pm3.39\}\$ &   21(0.9) \\
  C08 & \$\{72.5\textbackslash pm2.59\}\$ & \$\{73.4\textbackslash pm2.37\}\$ &  104(4.6) \\
  C09 & \$\{73.4\textbackslash pm4.06\}\$ & \$\{73.2\textbackslash pm2.25\}\$ &   29(1.3) \\
  C10 & \$\{68.5\textbackslash pm1.27\}\$ &  \$\{68.3\textbacksla

In [144]:
df = pd.read_csv("../mlruns/1/def5404d07d74bf38ae849f78423c7b2/artifacts/cls_report.Fold12021-03-21 23:00:06.434145.txt", delimiter = r"\s{2,}", engine="python")
df.drop(['micro avg', 'macro avg', 'weighted avg', 'samples avg'], inplace=True)
df['class']= df.index
df.reset_index(drop=True, inplace=True)

df


Unnamed: 0,precision,recall,f1-score,support,class
0,0.94,0.97,0.95,75,Cardiomegaly
1,0.94,0.65,0.77,23,Deformity
2,1.0,0.95,0.98,22,Nodule
3,0.95,0.95,0.95,22,granulomatous disease
4,1.0,1.0,1.0,35,spine degenerative
5,0.91,0.94,0.93,33,Effusion
6,0.84,0.81,0.82,26,Medical Device
7,0.87,0.8,0.83,25,indwelling catheters
8,0.98,0.98,0.98,91,opacity
9,0.92,0.96,0.94,25,Airspace Disease
