In [1]:
import json
import glob
import pandas as pd

# TRAIN_DATASET = "SWAN_DF"
TRAIN_DATASET = "DeepSpeak_v1_1"

# Find all results.json files
results_files = glob.glob(f"../logs/{TRAIN_DATASET}/binary/*/version_*/results/*.json")

In [2]:
# List to hold all rows
rows = []

# Process each file
for file_path in results_files:
    eval_dataset = file_path.split("/")[-1].split(".")[0]  # Extract 'eval_dataset'
    version = file_path.split("/")[-3].split("_")[-1]  # Extract 'version_X'
    fusion = file_path.split("/")[-4]  # Extract 'fusion'
    with open(file_path, "r") as f:
        data = json.load(f)

    # Flatten the structure: (version, split, metric, value)
    for split in ["train", "val", "test"]:
        if split in data:
            row = {"fusion": fusion, "version": version, "split": split, "dataset": eval_dataset}
            row.update(data[split])
            rows.append(row)

# Create DataFrame
df = pd.DataFrame(rows)
df = df.groupby(['dataset'])

ds_dfs = {}
for name, group in df:
    group = group.drop(columns=["dataset"])
    # Split DataFrame
    train = group[group["split"] == "train"].drop(columns=["split"])
    dev = group[group["split"] == "val"].drop(columns=["split"])
    test = group[group["split"] == "test"].drop(columns=["split"])

    # Sorting: Best EER first
    train = train.sort_values(by="auc", ascending=False).reset_index(drop=True)
    dev = dev.sort_values(by="auc", ascending=False).reset_index(drop=True)
    test = test.sort_values(by="auc", ascending=False).reset_index(drop=True)
    
    ds_dfs[name[0]] = {
        "train": train,
        "dev": dev,
        "test": test
    }

In [3]:
maximize_metrics = ["acc", "ap", "prec", "rec", "f1", "auc"]
minimize_metrics = ["loss"]

In [4]:
def highlight_best_per_column(s):
    if s.name in maximize_metrics:
        is_best = s == s.max()
    elif s.name in minimize_metrics:
        is_best = s == s.min()
    else:
        is_best = [False] * len(s)
    return [
        (
            "background-color: red"
            if v and (s.name == "auc" or s.name == "ap")
            else "background-color: green" if v else ""
        )
        for v in is_best
    ]

In [5]:
# Display DeepSpeak_v1_1
df_1 = ds_dfs["DeepSpeak_v1_1"]

print(f"Models trained on {TRAIN_DATASET}.")
print(f"Results on DeepSpeak_v1_1.")
print("-" * 50)

print("- Train:")
display(df_1['train'].style.apply(highlight_best_per_column, axis=0))

print("- Dev:")
display(df_1['dev'].style.apply(highlight_best_per_column, axis=0))

print("- Test:")
display(df_1['test'].style.apply(highlight_best_per_column, axis=0))

Models trained on DeepSpeak_v1_1.
Results on DeepSpeak_v1_1.
--------------------------------------------------
- Train:


Unnamed: 0,fusion,version,acc,prec,rec,f1,ap,auc
0,CAFF,0,0.999789,0.999581,1.0,0.99979,1.0,1.0
1,CAFF,1,0.999683,0.999372,1.0,0.999686,1.0,1.0
2,CAFF,2,0.999577,0.999162,1.0,0.999581,1.0,1.0


- Dev:


Unnamed: 0,fusion,version,acc,prec,rec,f1,ap,auc
0,CAFF,0,0.987584,0.981378,0.99434,0.987816,0.999602,0.999591
1,CAFF,2,0.989494,0.979667,1.0,0.989729,0.999561,0.999551
2,CAFF,1,0.985673,0.983114,0.988679,0.985889,0.999263,0.999259


- Test:


Unnamed: 0,fusion,version,acc,prec,rec,f1,ap,auc
0,CAFF,0,0.645483,0.601662,0.919172,0.727273,0.800989,0.781714
1,CAFF,2,0.638269,0.595607,0.923848,0.724273,0.784499,0.767783
2,CAFF,1,0.667468,0.625059,0.8831,0.732004,0.786278,0.764764


SWAN_DF results

In [6]:
# Display SWAN_DF
df_1 = ds_dfs["SWAN_DF"]

print(f"Models trained on {TRAIN_DATASET}.")
print(f"Results on SWAN_DF.")
print("-" * 50)

print("- Train:")
display(df_1['train'].style.apply(highlight_best_per_column, axis=0))

print("- Dev:")
display(df_1['dev'].style.apply(highlight_best_per_column, axis=0))

print("- Test:")
display(df_1['test'].style.apply(highlight_best_per_column, axis=0))

Models trained on DeepSpeak_v1_1.
Results on SWAN_DF.
--------------------------------------------------
- Train:


Unnamed: 0,fusion,version,acc,prec,rec,f1,ap,auc
0,CAFF,0,0.62243,0.9,0.652322,0.756402,0.927275,0.538908
1,CFF,0,0.601873,0.896485,0.629672,0.739755,0.923915,0.530433
2,CAFF,1,0.559332,0.869337,0.599774,0.709824,0.914241,0.454561


- Dev:


Unnamed: 0,fusion,version,acc,prec,rec,f1,ap,auc
0,CFF,0,0.637313,0.856531,0.694444,0.767018,0.910861,0.574394
1,CAFF,0,0.613433,0.848352,0.670139,0.748788,0.903711,0.543329
2,CAFF,1,0.565672,0.81457,0.640625,0.717201,0.882194,0.44402


- Test:


Unnamed: 0,fusion,version,acc,prec,rec,f1,ap,auc
0,CAFF,0,0.751955,0.860158,0.848958,0.854522,0.948068,0.726921
1,CFF,0,0.755307,0.863576,0.848958,0.856205,0.945125,0.716371
2,CAFF,1,0.640223,0.822254,0.740885,0.779452,0.902871,0.528595
