In [1]:
# Set constants
DATASET = "Task 1"
ALLOW_ONLY_HP_MATCHES = False

In [2]:
# Load data
import pandas as pd

DIFFERENCE_THRESHS = [1.05, 1.1, 1.5, 3, 4.5]

data = pd.read_csv("latent_trials.csv")
data.head()

Unnamed: 0,Classifier,Based on AE,sigma,precision,recall,f-score,max_iter,auc,n_estimators,max_depth,min_samples_leaf,numEstimators,C,gamma,kernel,probability,algorithm,leaf_size,n_neighbors
0,ReconstructionThreshold,202302160023,1.05,0.833333,0.3125,0.454545,,,,,,,,,,,,,
1,ReconstructionThreshold,202302160023,1.1,0.833333,0.3125,0.454545,,,,,,,,,,,,,
2,ReconstructionThreshold,202302160023,1.5,1.0,0.25,0.4,,,,,,,,,,,,,
3,ReconstructionThreshold,202302160023,3.0,1.0,0.125,0.222222,,,,,,,,,,,,,
4,ReconstructionThreshold,202302160023,4.5,0.0,0.0,0.0,,,,,,,,,,,,,


In [3]:
# Add processed classifer column
data["Processed Classifier"] = [classifier + "_" + str(sigma) if classifier == "ReconstructionThreshold" else classifier for classifier, sigma in data[["Classifier", "sigma"]].values]
data = data.drop(columns=["Classifier"])

data["Dataset"] = [DATASET for _ in data.index]

In [4]:
# Add AE data
ae_data = pd.read_csv("ae_trials.csv")
ae_data = ae_data[["name", "type", "input_size", "attempt"]]
ae_data.head()

Unnamed: 0,name,type,input_size,attempt
0,202302160023,Adversarial AE,4096,0
1,202302160026,Vanilla AE,4096,0
2,202302160028,Adversarial AE,4096,1
3,202302160031,Vanilla AE,4096,1
4,202302160033,Adversarial AE,4096,2


In [5]:
# Convert AE data to dict and add to ll data
ae_data = {name: [ae_type, input_size, attempt] for name, ae_type, input_size, attempt in ae_data.values}

data[["Type", "Input Size", "Attempt"]] = [ae_data[name] for name in data["Based on AE"].values]

In [6]:
# Create groups and check if hyper-params are the same
columns_not_to_check = [""]

data_uniqueness = data
data_uniqueness = data_uniqueness.drop(columns=["precision", "recall", "f-score", "Dataset", "Type", "Input Size"])
data_uniqueness = data_uniqueness.groupby(["Based on AE", "Processed Classifier"], group_keys=False).nunique()
data_uniqueness["HP Match"] = [all([n<=1 for n in v]) for v in data_uniqueness.values]

data_uniqueness = {(i[0], i[1]): same_hp for i, same_hp in zip(data_uniqueness.index.values, data_uniqueness["HP Match"].values)}

data["HP Match"] = [data_uniqueness[b_ae, pc] for b_ae, pc in data[["Based on AE", "Processed Classifier"]].values]
data["Classifier"] = data["Processed Classifier"]
data = data.drop(columns=["Processed Classifier"])

data = data[["Dataset", "Type", "Input Size", "Based on AE", "Classifier", "Attempt", "HP Match", "precision", "recall", "f-score", "auc"]]

data.head()

Unnamed: 0,Dataset,Type,Input Size,Based on AE,Classifier,Attempt,HP Match,precision,recall,f-score,auc
0,Task 1,Adversarial AE,4096,202302160023,ReconstructionThreshold_1.05,0,True,0.833333,0.3125,0.454545,
1,Task 1,Adversarial AE,4096,202302160023,ReconstructionThreshold_1.1,0,True,0.833333,0.3125,0.454545,
2,Task 1,Adversarial AE,4096,202302160023,ReconstructionThreshold_1.5,0,True,1.0,0.25,0.4,
3,Task 1,Adversarial AE,4096,202302160023,ReconstructionThreshold_3.0,0,True,1.0,0.125,0.222222,
4,Task 1,Adversarial AE,4096,202302160023,ReconstructionThreshold_4.5,0,True,0.0,0.0,0.0,


In [7]:
# Rename p, r and f1
data = data.rename(columns={"precision": "Precision", "recall": "Recall", "f-score": "F1-Score", "auc": "Area Under Curve"})

In [8]:
data.to_csv("intermediate.csv")

In [8]:
# Create final table
def row_max(df):
    max_f1 = df["F1-Score"].max()
    for r, f1 in zip(df.values, df["F1-Score"].values):
        if f1 == max_f1:
            return pd.DataFrame([r], columns=["Precision", "Recall", "F1-Score", "Area Under Curve", "Precision Standard Deviation", "Recall Standard Deviation", "F1-Score Standard Deviation", "Area Under Curve Standard Deviation"])
    

if ALLOW_ONLY_HP_MATCHES:
    data = data[data["HP Match"]]

data = data[["Dataset", "Type", "Input Size", "Based on AE", "Classifier", "Precision", "Recall", "F1-Score", "Area Under Curve"]]
data_std = data.groupby(["Dataset", "Type", "Input Size", "Based on AE", "Classifier"]).std()
data = data.groupby(["Dataset", "Type", "Input Size", "Based on AE", "Classifier"]).mean()

data[["Precision Standard Deviation", "Recall Standard Deviation", "F1-Score Standard Deviation", "Area Under Curve Standard Deviation"]] = data_std[["Precision", "Recall", "F1-Score", "Area Under Curve"]]


data = data.groupby(["Dataset", "Type", "Input Size", "Classifier"]).apply(row_max)

data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Precision,Recall,F1-Score,Area Under Curve,Precision Standard Deviation,Recall Standard Deviation,F1-Score Standard Deviation,Area Under Curve Standard Deviation
Dataset,Type,Input Size,Classifier,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Task 1,Adversarial AE,4096,Discriminator,0,1.0,1.0,1.0,,,,,
Task 1,Adversarial AE,4096,Discriminator_Thresh_1,0,0.0,0.0,0.0,,,,,
Task 1,Adversarial AE,4096,Discriminator_Thresh_10,0,0.0,0.0,0.0,,,,,
Task 1,Adversarial AE,4096,Discriminator_Thresh_20,0,0.0,0.0,0.0,,,,,
Task 1,Adversarial AE,4096,Discriminator_Thresh_30,0,0.0,0.0,0.0,,,,,


In [9]:
# Save final table
data.to_csv("Final LL Classifiers Table.csv")