In [1]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import math

import torch
import torch.nn as nn
import torch.nn.functional as F

from NegativeClassOptimization import utils, preprocessing, ml, config, datasets, visualisations



# ID

In [2]:
def get_id_results(dir_path, fname):
    base_p = Path(config.DATA_BASE_PATH) / dir_path
    df = pd.read_csv(base_p / fname, sep='\t')

    records = []
    for i, row in df.iterrows():
        task_str = row["task"]
        task = datasets.ClassificationTask.init_from_str(task_str)
        records.append({
            "task": task_str,
            "ag_pos": task.ag_pos,
            "ag_neg": task.ag_neg,
            "seed_id": task.seed_id,
            "split_id": task.split_id,
            "task_type": task.task_type.to_str(),
        })

    df = pd.merge(df, pd.DataFrame(records), on="task")

    map_task_type_to_clean = visualisations.PlotParams.map_task_type_to_clean.copy()
    df["task_type_clean"] = df["task_type"].map(map_task_type_to_clean)
    df["acc_closed"] = df["acc"].astype(float)
    return df

In [3]:
df_1 = get_id_results("Frozen_MiniAbsolut_ML", "closed_performance.tsv")
df_1["setup"] = "SN10"

df_2 = get_id_results("Frozen_MiniAbsolut_Linear_ML", "closed_performance_logistic.tsv")
df_2["setup"] = "Logistic"

df_3 = get_id_results("Frozen_MiniAbsolut_ML_Transformer_Parameters_Absolut/transformer_parameterset_a764f9c7", "closed_performance.tsv")
df_3["setup"] = "Transformer - Absolut"

df_3b = get_id_results("Frozen_MiniAbsolut_ML_Transformer_Parameters_Experimental/transformer_parameterset_085feb71", "closed_performance.tsv")
df_3b["setup"] = "Transformer - Experimental"

df_4 = get_id_results("Frozen_MiniAbsolut_ML_x10under", "closed_performance.tsv")
df_4["setup"] = "SN10 x10_undersampling"

df_5 = get_id_results("Frozen_MiniAbsolut_ML_x50under", "closed_performance.tsv")
df_5["setup"] = "SN10 x50_undersampling"

df_6 = get_id_results("Frozen_MiniAbsolut_ML_antiberta2", "closed_performance.tsv")
df_6["setup"] = "PLM Antiberta2"

df_6b = get_id_results("Frozen_MiniAbsolut_ML_antiberta2_experimental", "closed_performance.tsv")
df_6b["setup"] = "PLM Antiberta2 - Experimental"

df_7 = get_id_results("Frozen_MiniAbsolut_ML_esm2b", "closed_performance.tsv")
df_7["setup"] = "PLM ESM2b"

df_7b = get_id_results("Frozen_MiniAbsolut_ML_esm2b_experimental", "closed_performance.tsv")
df_7b["setup"] = "PLM ESM2b - Experimental"

df = pd.concat([df_1, df_2, df_3, df_3b, df_4, df_5, df_6, df_6b, df_7, df_7b], axis=0)
df = df.loc[df["task_type_clean"] != "vs 1"].copy()
df.head()

Unnamed: 0,task,acc,recall,precision,f1,ag_pos,ag_neg,seed_id,split_id,task_type,task_type_clean,acc_closed,setup,N_pos,N_neg
0,HIGH_VS_95LOW__3VRL__auto__0__42,0.9966,0.998,0.995213,0.996605,3VRL,auto,0,42,high_vs_95low,vs Non-binder,0.9966,SN10,,
1,HIGH_VS_LOOSER__3VRL__auto__0__42,0.9534,0.9604,0.94714,0.953724,3VRL,auto,0,42,high_vs_looser,vs Weak,0.9534,SN10,,
2,ONE_VS_NINE__3VRL__auto__0__42,0.972986,0.975,0.971116,0.973054,3VRL,auto,0,42,1v9,vs 9,0.972986,SN10,,
3,HIGH_VS_95LOW__3VRL__auto__1__42,0.9962,0.9986,0.99383,0.996209,3VRL,auto,1,42,high_vs_95low,vs Non-binder,0.9962,SN10,,
4,HIGH_VS_LOOSER__3VRL__auto__1__42,0.9526,0.9576,0.948119,0.952836,3VRL,auto,1,42,high_vs_looser,vs Weak,0.9526,SN10,,


In [4]:
df.groupby(["setup", "task_type_clean"])["acc_closed"].median().reset_index().pivot(columns=["setup"], values=["acc_closed"], index=["task_type_clean"])

Unnamed: 0_level_0,acc_closed,acc_closed,acc_closed,acc_closed,acc_closed,acc_closed,acc_closed,acc_closed,acc_closed,acc_closed
setup,Logistic,PLM Antiberta2,PLM Antiberta2 - Experimental,PLM ESM2b,PLM ESM2b - Experimental,SN10,SN10 x10_undersampling,SN10 x50_undersampling,Transformer - Absolut,Transformer - Experimental
task_type_clean,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
vs 9,0.864682,0.82963,,0.874875,,0.944422,0.890891,0.815,0.8619,
vs Non-binder,0.97165,0.9543,0.883,0.9749,0.901,0.988,0.963,0.94,0.95635,0.962
vs Weak,0.79155,0.76975,0.75,0.8111,0.742,0.91735,0.8235,0.675,0.65585,0.827


In [5]:
df.groupby(["setup", "task_type_clean"])["acc_closed"].apply(lambda x: f"{x.median():.2f} ({x.quantile(0.25):.2f} - {x.quantile(0.75):.2f})").reset_index().pivot(columns=["setup"], values=["acc_closed"], index=["task_type_clean"]).transpose()

Unnamed: 0_level_0,task_type_clean,vs 9,vs Non-binder,vs Weak
Unnamed: 0_level_1,setup,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
acc_closed,Logistic,0.86 (0.85 - 0.91),0.97 (0.96 - 0.99),0.79 (0.77 - 0.86)
acc_closed,PLM Antiberta2,0.83 (0.81 - 0.86),0.95 (0.94 - 0.97),0.77 (0.74 - 0.78)
acc_closed,PLM Antiberta2 - Experimental,,0.88 (0.88 - 0.88),0.75 (0.75 - 0.75)
acc_closed,PLM ESM2b,0.87 (0.86 - 0.90),0.97 (0.97 - 0.99),0.81 (0.79 - 0.85)
acc_closed,PLM ESM2b - Experimental,,0.90 (0.90 - 0.90),0.74 (0.74 - 0.74)
acc_closed,SN10,0.94 (0.94 - 0.96),0.99 (0.98 - 1.00),0.92 (0.89 - 0.94)
acc_closed,SN10 x10_undersampling,0.89 (0.87 - 0.92),0.96 (0.95 - 0.98),0.82 (0.80 - 0.87)
acc_closed,SN10 x50_undersampling,0.81 (0.78 - 0.85),0.94 (0.92 - 0.95),0.68 (0.65 - 0.70)
acc_closed,Transformer - Absolut,0.86 (0.85 - 0.87),0.96 (0.94 - 0.98),0.66 (0.63 - 0.70)
acc_closed,Transformer - Experimental,,0.96 (0.95 - 0.96),0.83 (0.80 - 0.83)


# OOD

In [6]:
def get_ood_results(dir_path, fname):
    base_p = Path(config.DATA_BASE_PATH) / dir_path
    df_o = pd.read_csv(base_p / fname, sep='\t')

    df_o["ag"] = df_o["task_1"].apply(lambda x: datasets.ClassificationTask.init_from_str(x).ag_pos)
    df_o["task_1_type"] = df_o["task_1"].apply(lambda x: datasets.ClassificationTask.init_from_str(x).task_type.to_str())
    df_o["task_2_type"] = df_o["task_2"].apply(lambda x: datasets.ClassificationTask.init_from_str(x).task_type.to_str())
    def get_replicate_id(task_str):
        task = datasets.ClassificationTask.init_from_str(task_str)
        return str(task.seed_id) + "_" + str(task.split_id)
    df_o["replicate_id"] = df_o["task_1"].apply(get_replicate_id)

    df_o = df_o.query("task_1_type != '1v1' and task_2_type != '1v1'").copy()

    df_o["model_task"] = df_o["task_1_type"].apply(lambda x: visualisations.PlotParams.map_task_type_to_clean[x])
    df_o["test_task"] = df_o["task_2_type"].apply(lambda x: visualisations.PlotParams.map_task_type_to_clean[x])

    return df_o

In [7]:
df_1 = get_ood_results("Frozen_MiniAbsolut_ML", "openset_performance.tsv")
df_1["setup"] = "SN10"

df_2 = get_ood_results("Frozen_MiniAbsolut_Linear_ML", "openset_performance_logistic.tsv")
df_2["setup"] = "Logistic"

df_3 = get_ood_results("Frozen_MiniAbsolut_ML_Transformer_Parameters_Absolut/transformer_parameterset_a764f9c7", "openset_performance.tsv")
df_3["setup"] = "Transformer - Absolut"

df_3b = get_ood_results("Frozen_MiniAbsolut_ML_Transformer_Parameters_Experimental/transformer_parameterset_085feb71", "openset_performance.tsv")
df_3b["setup"] = "Transformer - Experimental"

df_4 = get_ood_results("Frozen_MiniAbsolut_ML_x10under", "openset_performance.tsv")
df_4["setup"] = "SN10 x10_undersampling"

df_5 = get_ood_results("Frozen_MiniAbsolut_ML_x50under", "openset_performance.tsv")
df_5["setup"] = "SN10 x50_undersampling"

df_6 = get_ood_results("Frozen_MiniAbsolut_ML_antiberta2", "openset_performance.tsv")
df_6["setup"] = "PLM Antiberta2"

df_6b = get_ood_results("Frozen_MiniAbsolut_ML_antiberta2_experimental", "openset_performance.tsv")
df_6b["setup"] = "PLM Antiberta2 - Experimental"

df_7 = get_ood_results("Frozen_MiniAbsolut_ML_esm2b", "openset_performance.tsv")
df_7["setup"] = "PLM ESM2b"

df_7b = get_ood_results("Frozen_MiniAbsolut_ML_esm2b_experimental", "openset_performance.tsv")
df_7b["setup"] = "PLM ESM2b - Experimental"

df = pd.concat([df_1, df_2, df_3, df_3b, df_4, df_5, df_6, df_6b, df_7, df_7b], axis=0)
# df = df.loc[df["task_type_clean"] != "vs 1"].copy()
df.head()

Unnamed: 0,task_1,task_2,acc,recall,precision,f1,ag,task_1_type,task_2_type,replicate_id,model_task,test_task,setup
0,HIGH_VS_95LOW__3VRL__auto__0__42,HIGH_VS_95LOW__3VRL__auto__0__42,0.9966,0.998,0.995213,0.996605,3VRL,high_vs_95low,high_vs_95low,0_42,vs Non-binder,vs Non-binder,SN10
1,HIGH_VS_95LOW__3VRL__auto__0__42,HIGH_VS_LOOSER__3VRL__auto__0__42,0.7585,0.998,0.67478,0.805163,3VRL,high_vs_95low,high_vs_looser,0_42,vs Non-binder,vs Weak,SN10
2,HIGH_VS_95LOW__3VRL__auto__0__42,ONE_VS_NINE__3VRL__auto__0__42,0.912356,0.998,0.852117,0.919307,3VRL,high_vs_95low,1v9,0_42,vs Non-binder,vs 9,SN10
3,HIGH_VS_95LOW__3VRL__auto__0__42,HIGH_VS_95LOW__3VRL__auto__1__42,0.9966,0.998,0.995213,0.996605,3VRL,high_vs_95low,high_vs_95low,0_42,vs Non-binder,vs Non-binder,SN10
4,HIGH_VS_95LOW__3VRL__auto__0__42,HIGH_VS_LOOSER__3VRL__auto__1__42,0.7585,0.998,0.67478,0.805163,3VRL,high_vs_95low,high_vs_looser,0_42,vs Non-binder,vs Weak,SN10


In [8]:
df.groupby(["setup", "model_task", "test_task"])["acc"].median().reset_index().pivot(columns=["setup"], values=["acc"], index=["model_task", "test_task"])

Unnamed: 0_level_0,Unnamed: 1_level_0,acc,acc,acc,acc,acc,acc,acc,acc,acc,acc
Unnamed: 0_level_1,setup,Logistic,PLM Antiberta2,PLM Antiberta2 - Experimental,PLM ESM2b,PLM ESM2b - Experimental,SN10,SN10 x10_undersampling,SN10 x50_undersampling,Transformer - Absolut,Transformer - Experimental
model_task,test_task,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
vs 9,vs 9,0.865583,0.82963,,0.87968,,0.949675,0.891892,0.8,0.8607,
vs 9,vs Non-binder,0.79055,0.8448,,0.8475,,0.9174,0.826,0.6675,0.6295,
vs 9,vs Weak,0.6366,0.6632,,0.6673,,0.71425,0.659,0.5925,0.5453,
vs Non-binder,vs 9,0.705653,0.638939,,0.663764,,0.812206,0.71972,0.665,0.6762,
vs Non-binder,vs Non-binder,0.9727,0.9543,0.883,0.9735,0.901,0.9891,0.964,0.9125,0.95795,0.96
vs Non-binder,vs Weak,0.6598,0.6268,0.654,0.6802,0.66,0.72365,0.663,0.595,0.63925,0.667
vs Weak,vs 9,0.77939,0.723574,,0.752953,,0.904352,0.818819,0.7225,0.65075,
vs Weak,vs Non-binder,0.8967,0.8872,0.795,0.8862,0.801,0.9605,0.902,0.8075,0.88335,0.603
vs Weak,vs Weak,0.7932,0.76975,0.75,0.8159,0.742,0.92115,0.824,0.6575,0.66,0.828


In [9]:
df.groupby(["setup", "model_task", "test_task"])["acc"].apply(lambda x: f"{x.median():.2f} ({x.quantile(0.25):.2f} - {x.quantile(0.75):.2f})").reset_index().pivot(columns=["setup"], values=["acc"], index=["model_task", "test_task"]).transpose()

Unnamed: 0_level_0,model_task,vs 9,vs 9,vs 9,vs Non-binder,vs Non-binder,vs Non-binder,vs Weak,vs Weak,vs Weak
Unnamed: 0_level_1,test_task,vs 9,vs Non-binder,vs Weak,vs 9,vs Non-binder,vs Weak,vs 9,vs Non-binder,vs Weak
Unnamed: 0_level_2,setup,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
acc,Logistic,0.87 (0.85 - 0.91),0.79 (0.66 - 0.90),0.64 (0.62 - 0.72),0.71 (0.69 - 0.77),0.97 (0.96 - 0.99),0.66 (0.65 - 0.68),0.78 (0.75 - 0.88),0.90 (0.88 - 0.93),0.79 (0.77 - 0.86)
acc,PLM Antiberta2,0.83 (0.81 - 0.86),0.84 (0.77 - 0.91),0.66 (0.63 - 0.69),0.64 (0.61 - 0.67),0.95 (0.94 - 0.97),0.63 (0.62 - 0.64),0.72 (0.68 - 0.74),0.89 (0.87 - 0.90),0.77 (0.74 - 0.78)
acc,PLM Antiberta2 - Experimental,,,,,0.88 (0.88 - 0.88),0.65 (0.65 - 0.65),,0.80 (0.80 - 0.80),0.75 (0.75 - 0.75)
acc,PLM ESM2b,0.88 (0.86 - 0.90),0.85 (0.66 - 0.90),0.67 (0.66 - 0.69),0.66 (0.64 - 0.74),0.97 (0.96 - 0.98),0.68 (0.67 - 0.70),0.75 (0.74 - 0.78),0.89 (0.88 - 0.91),0.82 (0.80 - 0.85)
acc,PLM ESM2b - Experimental,,,,,0.90 (0.90 - 0.90),0.66 (0.66 - 0.66),,0.80 (0.80 - 0.80),0.74 (0.74 - 0.74)
acc,SN10,0.95 (0.94 - 0.96),0.92 (0.81 - 0.98),0.71 (0.68 - 0.80),0.81 (0.80 - 0.83),0.99 (0.98 - 1.00),0.72 (0.70 - 0.74),0.90 (0.86 - 0.94),0.96 (0.95 - 0.97),0.92 (0.90 - 0.94)
acc,SN10 x10_undersampling,0.89 (0.87 - 0.92),0.83 (0.73 - 0.90),0.66 (0.63 - 0.71),0.72 (0.70 - 0.75),0.96 (0.95 - 0.98),0.66 (0.65 - 0.68),0.82 (0.78 - 0.90),0.90 (0.88 - 0.93),0.82 (0.80 - 0.87)
acc,SN10 x50_undersampling,0.80 (0.79 - 0.82),0.67 (0.65 - 0.68),0.59 (0.55 - 0.63),0.67 (0.64 - 0.67),0.91 (0.90 - 0.92),0.59 (0.58 - 0.61),0.72 (0.69 - 0.74),0.81 (0.80 - 0.83),0.66 (0.64 - 0.67)
acc,Transformer - Absolut,0.86 (0.84 - 0.88),0.63 (0.51 - 0.77),0.55 (0.54 - 0.59),0.68 (0.54 - 0.82),0.96 (0.94 - 0.98),0.64 (0.61 - 0.66),0.65 (0.58 - 0.75),0.88 (0.85 - 0.92),0.66 (0.63 - 0.70)
acc,Transformer - Experimental,,,,,0.96 (0.93 - 0.96),0.67 (0.66 - 0.68),,0.60 (0.52 - 0.69),0.83 (0.81 - 0.84)
