In [1]:
# Set constants
DATASET = "Task 3"
ALLOW_ONLY_HP_MATCHES = True
DIFFERENCE_THRESHS = [1.05, 1.1, 1.5, 3, 4.5]

In [2]:
# Load data
import pandas as pd

data = pd.read_csv("latent_trials.csv")
data.head()

Unnamed: 0,Classifier,Based on AE,sigma,precision,recall,f-score,max_iter,auc,n_estimators,max_depth,min_samples_leaf,numEstimators,C,gamma,kernel,probability,algorithm,leaf_size,n_neighbors
0,ReconstructionThreshold,20230224233553,1.05,0.139535,0.222222,0.171429,,,,,,,,,,,,,
1,ReconstructionThreshold,20230224233553,1.1,0.139535,0.222222,0.171429,,,,,,,,,,,,,
2,ReconstructionThreshold,20230224233553,1.5,0.166667,0.185185,0.175439,,,,,,,,,,,,,
3,ReconstructionThreshold,20230224233553,3.0,0.0,0.0,0.0,,,,,,,,,,,,,
4,ReconstructionThreshold,20230224233553,4.5,0.0,0.0,0.0,,,,,,,,,,,,,


In [3]:
# Add processed classifer column
data["Processed Classifier"] = [classifier + "_" + str(sigma) if classifier == "ReconstructionThreshold" else classifier for classifier, sigma in data[["Classifier", "sigma"]].values]
data = data.drop(columns=["Classifier"])

data["Dataset"] = [DATASET for _ in data.index]

In [4]:
# Add AE data

ae_data = pd.read_csv("ae_trials.csv", keep_default_na=False)
ae_data = ae_data[["name", "type", "input_size", "attempt", "leader"]]
ae_data["Primary Model"] = ["%d" % float(leader) if leader else "%d" % float(base) for base, leader in ae_data[["name", "leader"]].values]
ae_data = ae_data.drop(columns=["leader"])
ae_data.head()

Unnamed: 0,name,type,input_size,attempt,Primary Model
0,20230224233553,Adversarial AE,4096,0,20230224233553
1,20230224234106,Vanilla AE,4096,0,20230224233553
2,20230224234323,Adversarial AE,4096,1,20230224233553
3,20230224234831,Vanilla AE,4096,1,20230224233553
4,20230224235042,Adversarial AE,4096,2,20230224233553


In [5]:
# Convert AE data to dict and add to ll data

ae_data = {name: [ae_type, input_size, attempt, primary] for name, ae_type, input_size, attempt, primary in ae_data.values}
data = data[[name in ae_data.keys() for name in data["Based on AE"].values]]
data[["Type", "Input Size", "Attempt", "Primary AE Model"]] = [ae_data[name] for name in data["Based on AE"].values]

data[data["Processed Classifier"] == "Discriminator_Thresh_30"]["Type"]

9       Adversarial AE
41      Adversarial AE
73      Adversarial AE
105     Adversarial AE
137     Adversarial AE
             ...      
3081    Adversarial AE
3113    Adversarial AE
3145    Adversarial AE
3177    Adversarial AE
3209    Adversarial AE
Name: Type, Length: 101, dtype: object

In [6]:
# Create groups and check if hyper-params are the same
hyper_params_to_check = [
    "max_iter",
    "n_estimators",
    "max_depth",
    "min_samples_leaf",
    "numEstimators",
    "C",
    "gamma",
    "kernel",
    "probability",
    "algorithm",
    "leaf_size",
    "n_neighbors"
]

data_uniqueness = data[["Primary AE Model", "Processed Classifier"] + hyper_params_to_check]
data_uniqueness = data_uniqueness.groupby(["Primary AE Model", "Processed Classifier"], group_keys=False).nunique()
data_uniqueness["HP Match"] = [all([n<=1 for n in v]) for v in data_uniqueness.values]

data_uniqueness = {(i[0], i[1]): same_hp for i, same_hp in zip(data_uniqueness.index.values, data_uniqueness["HP Match"].values)}

data["HP Match"] = [data_uniqueness[b_ae, pc] for b_ae, pc in data[["Primary AE Model", "Processed Classifier"]].values]
data["Classifier"] = data["Processed Classifier"]
data = data.drop(columns=["Processed Classifier"])

data = data[["Dataset", "Type", "Input Size", "Primary AE Model", "Classifier", "Attempt", "HP Match", "precision", "recall", "f-score", "auc"]]

data.head()

Unnamed: 0,Dataset,Type,Input Size,Primary AE Model,Classifier,Attempt,HP Match,precision,recall,f-score,auc
0,Task 3,Adversarial AE,4096,20230224233553,ReconstructionThreshold_1.05,0,True,0.139535,0.222222,0.171429,
1,Task 3,Adversarial AE,4096,20230224233553,ReconstructionThreshold_1.1,0,True,0.139535,0.222222,0.171429,
2,Task 3,Adversarial AE,4096,20230224233553,ReconstructionThreshold_1.5,0,True,0.166667,0.185185,0.175439,
3,Task 3,Adversarial AE,4096,20230224233553,ReconstructionThreshold_3.0,0,True,0.0,0.0,0.0,
4,Task 3,Adversarial AE,4096,20230224233553,ReconstructionThreshold_4.5,0,True,0.0,0.0,0.0,


In [7]:
# Rename p, r and f1
data = data.rename(columns={"precision": "Precision", "recall": "Recall", "f-score": "F1-Score", "auc": "Area Under Curve"})

In [8]:
# Create final table
def row_max(df):
    max_f1 = df["F1-Score"].max()
    for r, f1 in zip(df.values, df["F1-Score"].values):
        if f1 == max_f1:
            return pd.DataFrame([r], columns=["Precision", "Recall", "F1-Score", "Area Under Curve", "Precision Standard Deviation", "Recall Standard Deviation", "F1-Score Standard Deviation", "Area Under Curve Standard Deviation"])
    

if ALLOW_ONLY_HP_MATCHES:
    data = data[data["HP Match"]]

data = data[["Dataset", "Type", "Input Size", "Primary AE Model", "Classifier", "Precision", "Recall", "F1-Score", "Area Under Curve"]]
data_std = data.groupby(["Dataset", "Type", "Input Size", "Primary AE Model", "Classifier"]).std()
data = data.groupby(["Dataset", "Type", "Input Size", "Primary AE Model", "Classifier"]).mean()

data[["Precision Standard Deviation", "Recall Standard Deviation", "F1-Score Standard Deviation", "Area Under Curve Standard Deviation"]] = data_std[["Precision", "Recall", "F1-Score", "Area Under Curve"]]


data = data.groupby(["Dataset", "Input Size", "Type", "Classifier"]).apply(row_max)

formated_strings = ["%.4f (%.4f, %.4f)" % (f, p, r) for p, r, f in data[["Precision", "Recall", "F1-Score"]].values]
data["Formated Results"] = formated_strings

data = data.sort_values(["Type", "F1-Score"], ascending=False)

data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Precision,Recall,F1-Score,Area Under Curve,Precision Standard Deviation,Recall Standard Deviation,F1-Score Standard Deviation,Area Under Curve Standard Deviation,Formated Results
Dataset,Input Size,Type,Classifier,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Task 3,12288,Vanilla AE,ReconstructionThreshold_1.05,0,0.916667,0.814815,0.862745,,,,,,"0.8627 (0.9167, 0.8148)"
Task 3,12288,Vanilla AE,ReconstructionThreshold_1.1,0,0.916667,0.814815,0.862745,,,,,,"0.8627 (0.9167, 0.8148)"
Task 3,12288,Vanilla AE,ReconstructionThreshold_1.5,0,0.916667,0.814815,0.862745,,,,,,"0.8627 (0.9167, 0.8148)"
Task 3,4096,Vanilla AE,GradientBoostingClassifier,0,0.843333,0.725476,0.758095,,0.150739,0.273918,0.207523,,"0.7581 (0.8433, 0.7255)"
Task 3,12288,Vanilla AE,KNeighborsClassifier,0,0.666667,0.75,0.705882,,,,,,"0.7059 (0.6667, 0.7500)"


In [9]:
# Save final table

data.to_csv("Final LL Classifiers Table.csv")