In [2]:
# Standard
from pathlib import Path
import os
import sys
import warnings
import random
import dataclasses
import math
import multiprocessing as mp

# Extra
import pandas as pd
import numpy as np
from sklearn import metrics
from sklearn.model_selection import cross_validate, RandomizedSearchCV
from sklearn.decomposition import PCA
import statsmodels.stats.api as sms
import tensorflow as tf
from keras import backend as K
from keras.models import Model
from keras.layers import (
    Dense,
    Input,
    Conv1D,
    MaxPooling1D,
    Flatten,
    Lambda,
    Conv2D,
    MaxPooling2D,
    Dropout,
    BatchNormalization,
    GlobalAveragePooling1D,
    Activation
)
from keras.utils import plot_model
from keras.optimizers import Adam, SGD,RMSprop
from keras.models import load_model
from keras.callbacks import Callback
from tqdm.auto import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.display import display

# Custom
module_path = os.path.abspath(os.path.join(".."))  # supposed to be parent folder
if module_path not in sys.path:
    sys.path.append(module_path)
from src.utility.dataset_loader_hdf5 import DatasetLoader

# Global utitlity functions are loaded from separate notebook:
%run utils.ipynb

Using TensorFlow backend.


### 1.2 Configuration <a id='1.2'>&nbsp;</a>

In [3]:
# Configure Data Loading & Seed
SEED = 712  # Used for every random function
HMOG_HDF5 = Path.cwd().parent / "data" / "processed" / "hmog_dataset.hdf5"
EXCLUDE_COLS = ["sys_time"]
CORES = mp.cpu_count()

# For plots and CSVs
OUTPUT_PATH = Path.cwd() / "output" / "chapter-6-1-4-siamese-cnn"  # Cached data & csvs
OUTPUT_PATH.mkdir(parents=True, exist_ok=True)
REPORT_PATH = Path.cwd().parent / "reports" / "figures" # Figures for thesis
REPORT_PATH.mkdir(parents=True, exist_ok=True)

# Improve performance of Tensorflow (this improved speed _a_lot_ on my machine!!!)
K.tf.set_random_seed(SEED)
conf = K.tf.ConfigProto(
    device_count={"CPU": CORES},
    allow_soft_placement=True,
    intra_op_parallelism_threads=CORES,
    inter_op_parallelism_threads=CORES,
)
K.set_session(K.tf.Session(config=conf))

# Plotting
%matplotlib inline
utils_set_output_style()

# Silence various deprecation warnings...
tf.logging.set_verbosity(tf.logging.ERROR)
np.warnings.filterwarnings("ignore")
warnings.filterwarnings("ignore")

In [4]:
# Workaround to remove ugly spacing between tqdm progress bars:
HTML("<style>.p-Widget.jp-OutputPrompt.jp-OutputArea-prompt:empty{padding: 0;border: 0;} div.output_subarea{padding:0;}</style>")

### 1.3 Experiment Parameters <a id='1.3'>&nbsp;</a> 
Selection of parameters set that had been tested in this notebook. Select one of them to reproduce results.

In [5]:
@dataclasses.dataclass
class ExperimentParameters:
    """Contains all relevant parameters to run an experiment."""

    name: str  # Name of Experiments Parameter set. Used as identifier for charts etc.

    # Data / Splitting:
    frequency: int
    feature_cols: list  # Columns used as features
    max_subjects: int
    exclude_subjects: list  # Don't load data from those users
    n_valid_train_subjects: int
    n_valid_test_subjects: int
    n_test_train_subjects: int
    n_test_test_subjects: int
    seconds_per_subject_train: float
    seconds_per_subject_test: float
    task_types: list  # Limit scenarios to [1, 3, 5] for sitting or [2, 4, 6] for walking, or don't limit (None)

    # Reshaping
    window_size: int  # After resampling
    step_width: int  # After resampling

    # Normalization
    scaler: str  # {"std", "robust", "minmax"}
    scaler_scope: str  # {"subject", "session"}
    scaler_global: bool  # scale training and testing sets at once (True), or fit scaler on training only (False)

    # Siamese Network
    max_pairs_per_session: int  # Max. number of pairs per session
    margin: float  # Contrastive Loss Margin
    model_variant: str  # {"1d", "2d"} Type of architecture
    filters: list  # List of length 4, containing number of filters for conv layers
    epochs_best: int  # Train epochs to for final model
    epochs_max: int
    batch_size: int
    optimizer: str  # Optimizer to use for Siamese Network
    optimizer_lr: float  # Learning Rate
    optimizer_decay: float

    # OCSVM
    ocsvm_nu: float  # Best value found in random search, used for final model
    ocsvm_gamma: float  # Best value found in random search, used for final model

    # Calculated values
    def __post_init__(self):
        # HDF key of table:
        self.table_name = f"sensors_{self.frequency}hz"

        # Number of samples per _session_ used for training:
        self.samples_per_subject_train = math.ceil(
            (self.seconds_per_subject_train * 100)
            / (100 / self.frequency)
            / self.window_size
        )

        # Number of samples per _session_ used for testing:
        self.samples_per_subject_test = math.ceil(
            (self.seconds_per_subject_test * 100)
            / (100 / self.frequency)
            / self.window_size
        )


# INSTANCES
# ===========================================================

# NAIVE_MINMAX (2D Filters)
# -----------------------------------------------------------
NAIVE_MINMAX_2D = ExperimentParameters(
    name="NAIVE-MINMAX-2D",
    # Data / Splitting
    frequency=25,
    feature_cols=[
        "acc_x",
        "acc_y",
        "acc_z",
        "gyr_x",
        "gyr_y",
        "gyr_z",
        "mag_x",
        "mag_y",
        "mag_z",
    ],
    max_subjects=90,
    exclude_subjects=[
        "733162",  # No 24 sessions
        "526319",  # ^
        "796581",  # ^
        "539502",  # Least amount of sensor values
        "219303",  # ^
        "737973",  # ^
        "986737",  # ^
        "256487",  # Most amount of sensor values
        "389015",  # ^
        "856401",  # ^
    ],
    n_valid_train_subjects=40,
    n_valid_test_subjects=10,
    n_test_train_subjects=10,
    n_test_test_subjects=30,
    seconds_per_subject_train=67.5,
    seconds_per_subject_test=67.5,
    task_types=None,
    # Reshaping
    window_size=25,  # 1 sec
    step_width=25,
    # Normalization
    scaler="minmax",
    scaler_scope="subject",
    scaler_global=True,
    # Siamese Network
    model_variant="2d",
    filters=[32, 64, 128, 32],
    epochs_best=35,
    epochs_max=40,
    batch_size=200,
    optimizer="sgd",
    optimizer_lr=0.01,
    optimizer_decay=0,
    max_pairs_per_session=60,  # => 4min
    margin=0.2,
    # OCSVM
    ocsvm_nu=0.092,
    ocsvm_gamma=1.151,
)  # <END NAIVE_APPROACH>

# VALID_MINMAX (2D)
# -----------------------------------------------------------
VALID_MINMAX_2D = dataclasses.replace(
    NAIVE_MINMAX_2D,
    name="VALID-MINMAX-2D",
    task_types=None,
    scaler_global=False,
    epochs_max=40,
    ocsvm_nu=0.110,
    ocsvm_gamma=59.636,
)

# NAIVE_ROBUST (2D)
# -----------------------------------------------------------
NAIVE_ROBUST_2D = dataclasses.replace(
    NAIVE_MINMAX_2D,
    name="NAIVE-ROBUST-2D",
    scaler="robust",
    optimizer="sgd",
    optimizer_lr=0.05, # Decreased, to avoid "all zeros" prediction
    optimizer_decay=0.002,
    epochs_best=5,
    ocsvm_nu=0.214,
    ocsvm_gamma=2.354,
)

# VALID_ROBUST (2D)
# -----------------------------------------------------------
VALID_ROBUST_2D = dataclasses.replace(
    NAIVE_MINMAX_2D,
    name="VALID-ROBUST-2D",
    scaler="robust",
    scaler_global=False,
    epochs_best=6,
    epochs_max=20,
    optimizer="sgd",
    optimizer_lr=0.05,  # Decrease LR, to avoid "all zeros" prediction
    optimizer_decay=0.002,
    ocsvm_nu=0.190,
    ocsvm_gamma=0.069,
)

# VALID_ROBUST (1D)
# -----------------------------------------------------------
VALID_ROBUST_1D = dataclasses.replace(
    NAIVE_MINMAX_2D,
    name="VALID-ROBUST-1D",
    scaler="robust",
    scaler_global=False,
    model_variant="1d", 
    filters=[32, 64, 128, 64],    
    epochs_best=9,
    epochs_max=20,
    ocsvm_nu=0.156,
    ocsvm_gamma=33.932,
)

# FCN_ROBUST (1D)
# -----------------------------------------------------------
VALID_FCN_ROBUST = dataclasses.replace(
    NAIVE_MINMAX_2D,
    name="VALID-FCN-ROBUST-FINAL",
    task_types=[2, 4, 6],
    feature_cols=["acc_x", "acc_y", "acc_z"], 
    frequency=25,
    window_size=25*5,
    step_width=25*5,
    scaler="robust",
    scaler_global=False,
    seconds_per_subject_train=60 * 10,
    seconds_per_subject_test=60 * 10,
    max_pairs_per_session=60 * 10,
    model_variant="fcn",
    filters=[32, 64, 32],
    optimizer="adam",
    optimizer_lr=0.001,
    optimizer_decay=None,
    batch_size=300,
    margin=1,
    epochs_best=40,
    epochs_max=80,
    ocsvm_nu=0.165,
    ocsvm_gamma=8.296,
)

### 1.4 Select Approach <a id='1.4'>&nbsp;</a> 
Select the parameters to use for current notebook execution here!

In [5]:
P = NAIVE_MINMAX_2D

In [6]:
utils_ppp(P)

Unnamed: 0,Value
batch_size,200
epochs_best,35
epochs_max,40
exclude_subjects,"[733162, 526319, 796581, 539502, 219303, 73797..."
feature_cols,"[acc_x, acc_y, acc_z, gyr_x, gyr_y, gyr_z, mag..."
filters,"[32, 64, 128, 32]"
frequency,25
margin,0.2
max_pairs_per_session,60
max_subjects,90


## 2. Initial Data Preparation <a id='2'>&nbsp;</a> 

### 2.1 Load Dataset <a id='2.1'>&nbsp;</a> 

**Plot Distribution of EER per subject:**

In [6]:
from pyod.models.abod import ABOD
from pyod.models.cblof import CBLOF
from pyod.models.feature_bagging import FeatureBagging
from pyod.models.hbos import HBOS
from pyod.models.iforest import IForest
from pyod.models.knn import KNN
from pyod.models.lof import LOF
from pyod.models.loci import LOCI
from pyod.models.mcd import MCD
from pyod.models.ocsvm import OCSVM
from pyod.models.pca import PCA
from pyod.models.sos import SOS
from pyod.models.lscp import LSCP
from pyod.models.cof import COF
from pyod.models.sod import SOD
from pyod.models.xgbod import XGBOD
from pyod.models.vae import VAE


name="NAIVE-MINMAX-2D" 
cont = .1 
name="VALID-FCN-ROBUST-FINAL" 
cont = .2

%run utils.ipynb
from pathlib import Path
OUTPUT_PATH = Path.cwd() / "output" / "chapter-6-1-4-siamese-cnn"  # Cached data & csvs
REPORT_PATH = Path.cwd().parent / "reports" / "figures" # Figures for thesis

import pandas as pd


In [48]:
modelss = [
    ABOD(contamination=cont,), 
    #CBLOF(contamination=cont,), 
    #FeatureBagging(contamination=cont,), 
    #HBOS(contamination=cont,), 
    #IForest(contamination=cont,), 
    KNN(contamination=cont,), 
    #LOF(contamination=cont,), 
    #MCD(contamination=cont,), 
    #OCSVM(contamination=cont,), 
    #PCA(contamination=cont,), 
    VAE([16,1],[1,16])
]
'''
name="VALID-FCN-ROBUST-FINAL" 
for mooo in modelss: 
    #print(str(mooo)[:10])
    try: 
        df_results=pd.read_csv(OUTPUT_PATH / f"{name+str(mooo)[:10]}_ALLMODELS_TEST_results.csv")
    except: 
        continue

    df_plot = df_results.rename(
        columns={"test_accuracy": "Test Accuracy", "test_eer": "Test EER", "owner": "Owner"}
    ).astype({"Owner": str})
    print(str(mooo)[:10])
    print("{:.4f}".format(round(df_results["test_eer"].mean(), 4)))
    print("{:.4f}".format(round(df_results["test_accuracy"].mean(), 4)))
    print("{:.4f}".format(round(df_results["score_time"].mean(), 4)))
    #fig = utils_plot_acc_eer_dist(df_plot, "Test EER") # with new archi [32,16,4,1],[1,4,16,32] instead of [32,16,8,4,1],[1,4,8,16,32]
    #utils_save_plot(plt, REPORT_PATH / f"buech2019-siamese-{name+str(mooo)[:10]}-eer.pdf")
    #fig = utils_plot_acc_eer_dist(df_plot, "Test Accuracy")
    #utils_save_plot(plt, REPORT_PATH / f"buech2019-siamese-{name+str(mooo)[:10]}-acc.pdf")
    #fig = utils_plot_acc_eer_dist(df_plot, "score_time")
    #utils_save_plot(plt, REPORT_PATH / f"buech2019-siamese-{name+str(mooo)[:10]}-time.pdf")
print('==============')
'''    
name="NAIVE-MINMAX-2D"
#name="VALID-FCN-ROBUST-FINAL" 
cont=.1
print (name)
for mooo in modelss: 
    #print(str(mooo)[:10])
    try: 
        df_results=pd.read_csv(OUTPUT_PATH / f"{name+str(mooo)[:10]}2_ALLMODELS_TEST_results.csv")
    except: 
        continue
        
    

    #df_plot = df_results.rename(
    #    columns={"test_accuracy": "Test Accuracy", "test_eer": "Test EER", "owner": "Owner"}
    #).astype({"Owner": str})
    print(str(mooo)[:10])

    print("EER= Mean {:.4f} Var {:.4f}".format(round(df_results["test_eer"].mean(), 4),(round(df_results["test_eer"].var(), 4))))
    print("Acc= Mean {:.4f} Var {:.4f}".format(round(df_results["test_accuracy"].mean(), 4),(round(df_results["test_accuracy"].var(), 4))))
    print("Time= Mean {:.4f} Var {:.4f}".format(round(df_results["score_time"].mean(), 4),(round(df_results["score_time"].var(), 4))))

    #fig = utils_plot_acc_eer_dist(df_plot, "Test EER") # with new archi [32,16,4,1],[1,4,16,32] instead of [32,16,8,4,1],[1,4,8,16,32]
    #utils_save_plot(plt, REPORT_PATH / f"buech2019-siamese-{name+str(mooo)[:10]}-eer.pdf")
    #fig = utils_plot_acc_eer_dist(df_plot, "Test Accuracy")
    #utils_save_plot(plt, REPORT_PATH / f"buech2019-siamese-{name+str(mooo)[:10]}-acc.pdf")
    #fig = utils_plot_acc_eer_dist(df_plot, "score_time")
    #utils_save_plot(plt, REPORT_PATH / f"buech2019-siamese-{name+str(mooo)[:10]}-time.pdf")
    
    
print (" second run above first run down here") 
for mooo in modelss: 
    #print(str(mooo)[:10])
    try: 
        df_results=pd.read_csv(OUTPUT_PATH / f"{name+str(mooo)[:10]}_ALLMODELS_TEST_results.csv")
    except: 
        continue

    #df_plot = df_results.rename(
    #    columns={"test_accuracy": "Test Accuracy", "test_eer": "Test EER", "owner": "Owner"}
    #).astype({"Owner": str})
    print(str(mooo)[:10])
    
    print("EER= Mean {:.4f} Var {:.4f}".format(round(df_results["test_eer"].mean(), 4),(round(df_results["test_eer"].var(), 4))))
    print("Acc= Mean {:.4f} Var {:.4f}".format(round(df_results["test_accuracy"].mean(), 4),(round(df_results["test_accuracy"].var(), 4))))
    print("Time= Mean {:.4f} Var {:.4f}".format(round(df_results["score_time"].mean(), 4),(round(df_results["score_time"].var(), 4))))


NAIVE-MINMAX-2D


KeyError: "['std_test_accuracy', 'std_test_eer', 'param_epochs', 'mean_test_accuracy', 'mean_test_eer'] not in index"

In [26]:
#print(str(mooo)[:10])

df_results=pd.read_csv(OUTPUT_PATH / "VALID-FCN-ROBUST-FINALVAE(batch_size=3_ALLMODELS_TEST_results.csv")

print("EER= Mean {:.4f} Var {:.4f}".format(round(df_results["test_eer"].mean(), 4),(round(df_results["test_eer"].var(), 4))))
print("Acc= Mean {:.4f} Var {:.4f}".format(round(df_results["test_accuracy"].mean(), 4),(round(df_results["test_accuracy"].var(), 4))))
print("Time= Mean {:.4f} Var {:.4f}".format(round(df_results["score_time"].mean(), 4),(round(df_results["score_time"].var(), 4))))


EER= Mean 0.3771 Var 0.0166
Acc= Mean 0.6431 Var 0.0215
Time= Mean 0.0064 Var 0.0000


In [51]:
x="NAIVE-MINMAX-2D"
for enc in encoder_architectures[::-1]: 
    if os.path.isfile(OUTPUT_PATH / f"{x+str(enc)}__VAE_ez_yes_params_in_construct__random_search_results_again.csv"): 
        print(enc)
        df_results = pd.read_csv(OUTPUT_PATH / f"{x+str(enc)}__VAE_ez_yes_params_in_construct__random_search_results_again.csv")
        
       
        display(
        df_results[df_results["owner"] == 785873][
            [
                "owner",
                "mean_test_eer",
                "std_test_eer",
                "param_epochs", 
                "mean_test_accuracy",
                "std_test_accuracy",
            ]
        ].sort_values("mean_test_eer").head(10)
        )    
            
            
        '''
        display(
        df_results[df_results["rank_test_eer"] <= 1][
            [
            "owner",
            "rank_test_eer",
            "mean_fit_time",
            "mean_test_eer",
            "std_test_eer",
            "mean_test_accuracy",
            "std_test_accuracy",
            ]
        ].sort_values("mean_test_eer").head(3) )
        '''    

NameError: name 'encoder_architectures' is not defined

In [None]:
fig = utils_plot_acc_eer_dist(df_plot, "Test EER") # with new archi [32,16,4,1],[1,4,16,32] instead of [32,16,8,4,1],[1,4,8,16,32]
utils_save_plot(plt, REPORT_PATH / f"buech2019-siamese-{P.name.lower()}-eer.pdf")

In [None]:
fig = utils_plot_acc_eer_dist(df_plot, "mean_test_eer")
utils_save_plot(plt, REPORT_PATH / f"buech2019-siamese-{P.name.lower()}-eer.pdf")

### 7.3 Evaluate increasing Training Set Size (Training Delay) <a id='7.3'>&nbsp;</a> 

In [None]:
training_set_sizes = [1, 2, 3, 4, 10, 30, 60, 90, 125, 175, 250, 375]  # In samples

deep_feature_model = load_deep_feature_model(OUTPUT_PATH / f"{P.name}_model.h5")

df_results = None  # Will be filled with cv scores
for i in tqdm(range(5), desc="Run", leave=False):  # Run whole test 5 times
    for n_train_samples in tqdm(training_set_sizes, desc="Train Size", leave=False):
        for df_cv_scenarios, owner, impostors in tqdm(
            utils_generate_cv_scenarios(
                df_ocsvm_train_test,
                samples_per_subject_train=P.samples_per_subject_train,
                samples_per_subject_test=P.samples_per_subject_test,
                limit_train_samples=n_train_samples,  # samples overall
                seed=SEED + i,
                scaler=P.scaler,
                scaler_global=P.scaler_global,
                scaler_scope=P.scaler_scope,
                deep_model=deep_feature_model,
                model_variant=P.model_variant,
                feature_cols=P.feature_cols,
            ),
            desc="Owner",
            total=df_ocsvm_train_test["subject"].nunique(),
            leave=False,
        ):
            X = np.array(df_cv_scenarios["X"].values.tolist())
            y = df_cv_scenarios["label"].values

            train_test_cv = utils_create_cv_splits(df_cv_scenarios["mask"].values, SEED)

            model = VAE([32,16,8,4,1],[1,4,8,16,32], contamination=.5, optimizer='sgd',                
                gamma=3 , 
                capacity=.5 , 
                loss=losses.kld,
                batch_size=32, 
                dropout_rate=.25,
                epochs=200,
                output_activation=activations.selu, 
                hidden_activation=activations.sigmoid, 
    )

            warnings.filterwarnings("ignore")
            scores = cross_validate(
                model,
                X,
                y,
                cv=train_test_cv,
                scoring={"eer": utils_eer_scorer},
                n_jobs=CORES,
                verbose=0,
                return_train_score=True,
            )
            df_score = pd.DataFrame(scores)
            df_score["owner"] = owner
            df_score["train_samples"] = n_train_samples
            df_score["train_eer"] = df_score[
                "train_eer"
            ].abs()  # Revert scorer's signflip
            df_score["test_eer"] = df_score["test_eer"].abs()
            df_results = pd.concat([df_results, df_score], axis=0)

df_results.to_csv(OUTPUT_PATH / f"{P.name}_train_delay_results.csv", index=False)
df_results.head()

**Load Results from "Training set size" evaluation & prepare for plotting:**

In [None]:
df_results = pd.read_csv(OUTPUT_PATH / f"{P.name}_train_delay_results.csv")
df_plot = (
    df_results[["test_eer", "owner", "train_samples"]]
    .groupby(["owner", "train_samples"], as_index=False)
    .mean()
    .astype({"owner": "category"})
    .rename(
        columns={
            "test_eer": "Test EER",
            "owner": "Owner",
        }
    )
)
df_plot["Training Data in Seconds"] = df_plot["train_samples"] * P.window_size / P.frequency

**Plot EER with increasing number of training samples:**

In [None]:
utils_plot_training_delay(df_plot)
utils_save_plot(plt, REPORT_PATH / f"buech2019-siamese-{P.name.lower()}-train-size.pdf")

### 7.4 Evaluate increasing Test Set Sizes (Detection Delay)<a id='7.4'>&nbsp;</a> 

In [None]:
# Load Siamese CNN Model
deep_feature_model = load_deep_feature_model(OUTPUT_PATH / f"{P.name}_model.h5")

df_results = None  # Will be filled with cv scores
for i in tqdm(range(50), desc="Run", leave=False):  # Run whole test 5 times
    for df_cv_scenarios, owner, impostors in tqdm(
        utils_generate_cv_scenarios(
            df_ocsvm_train_test,
            samples_per_subject_train=P.samples_per_subject_train,
            samples_per_subject_test=P.samples_per_subject_test,
            limit_test_samples=1,  # Samples overall
            seed=SEED + i,
            scaler=P.scaler,
            scaler_global=P.scaler_global,
            scaler_scope=P.scaler_scope,
            deep_model=deep_feature_model,
            model_variant=P.model_variant,
            feature_cols=P.feature_cols,
        ),
        desc="Owner",
        total=df_ocsvm_train_test["subject"].nunique(),
        leave=False,
    ):
        X = np.array(df_cv_scenarios["X"].values.tolist())
        y = df_cv_scenarios["label"].values

        train_test_cv = utils_create_cv_splits(df_cv_scenarios["mask"].values, SEED)

        model = VAE([32,16,8,4,1],[1,4,8,16,32], contamination=.5, optimizer='sgd',                
                gamma=3 , 
                capacity=.5 , 
                loss=losses.kld,
                batch_size=32, 
                dropout_rate=.25,
                epochs=200,
                output_activation=activations.selu, 
                hidden_activation=activations.sigmoid, 
    )

        warnings.filterwarnings("ignore")
        scores = cross_validate(
            model,
            X,
            y,
            cv=train_test_cv,
            scoring={"eer": utils_eer_scorer},
            n_jobs=CORES,
            verbose=0,
            return_train_score=True,
        )
        df_score = pd.DataFrame(scores)
        df_score["owner"] = owner
        df_score["run"] = i
        df_score["train_eer"] = df_score["train_eer"].abs()  # Revert scorer's signflip
        df_score["test_eer"] = df_score["test_eer"].abs()
        df_results = pd.concat([df_results, df_score], axis=0)

df_results.to_csv(OUTPUT_PATH / f"{P.name}_detect_delay_results.csv", index=False)
df_results.head()

**Load Results from "Detection Delay" evaluation & prepare for plotting:**

In [None]:
df_results = pd.read_csv(OUTPUT_PATH / f"{P.name}_detect_delay_results.csv")
df_results["owner"] = df_results["owner"].astype(str)
df_plot = df_results.copy()

**Plot Expanding Mean EER and confidence interval:**

In [None]:
utils_plot_detect_delay(df_plot, factor=P.window_size / P.frequency, xlim=160)
utils_save_plot(
    plt, REPORT_PATH / f"buech2019-siamese-{P.name.lower()}-detection-delay.pdf"
)

In [30]:
df_results = pd.read_csv(OUTPUT_PATH/"VALID-FCN-ROBUST-FINALVAE(b, decoder_neurons=[1, 16, 64], dropout_rate=0.2,_random_srch_rzlts.csv") 

FileNotFoundError: [Errno 2] File b'/gpfs/fs0/scratch/u/uhengart/ezzeldi/vae_hmog/ContinAuth/notebooks/output/chapter-6-1-4-siamese-cnn/VALID-FCN-ROBUST-FINALVAE(b, decoder_neurons=[1, 16, 64], dropout_rate=0.2,_random_srch_rzlts.csv' does not exist: b'/gpfs/fs0/scratch/u/uhengart/ezzeldi/vae_hmog/ContinAuth/notebooks/output/chapter-6-1-4-siamese-cnn/VALID-FCN-ROBUST-FINALVAE(b, decoder_neurons=[1, 16, 64], dropout_rate=0.2,_random_srch_rzlts.csv'

# param 

[{(16, 500): [0, 0],
  (16, 1500): [0, 0],
  (16, 2500): [0, 0],
  (16, 3500): [0, 0],
  (16, 4500): [0, 0],
  (16, 5500): [0, 0],
  (16, 6500): [0, 0],
  (16, 7500): [0, 0],
  (16, 8500): [0, 0],
  (16, 9500): [0, 0],
  (32, 500): [0, 0],
  (32, 1500): [0, 0],
  (32, 2500): [0, 0],
  (32, 3500): [0, 0],
  (32, 4500): [0, 0],
  (32, 5500): [0, 0],
  (32, 6500): [0, 0],
  (32, 7500): [0, 0],
  (32, 8500): [0, 0],
  (32, 9500): [0, 0]},
 {(16, 500): [0, 0],
  (16, 1500): [0, 0],
  (16, 2500): [0, 0],
  (16, 3500): [0, 0],
  (16, 4500): [0, 0],
  (16, 5500): [0, 0],
  (16, 6500): [0, 0],
  (16, 7500): [0, 0],
  (16, 8500): [0, 0],
  (16, 9500): [0, 0],
  (32, 500): [0, 0],
  (32, 1500): [0, 0],
  (32, 2500): [0, 0],
  (32, 3500): [0, 0],
  (32, 4500): [0, 0],
  (32, 5500): [0, 0],
  (32, 6500): [0, 0],
  (32, 7500): [0, 0],
  (32, 8500): [0, 0],
  (32, 9500): [0, 0]},
 {(16, 500): [0, 0],
  (16, 1500): [0, 0],
  (16, 2500): [0, 0],
  (16, 3500): [0, 0],
  (16, 4500): [0, 0],
  (16, 5500):

In [130]:
!ls output/chapter-6-1-4-siamese-cnn *.csv | grep 'VALID'

ls: cannot access *.csv: No such file or directory
VALID_df_ocsvm_train_test.msg
VALID_df_ocsvm_train_valid.msg
VALID_df_siamese_train.msg
VALID_df_siamese_train_pairs.msg
VALID_df_siamese_valid.msg
VALID_df_siamese_valid_pairs.msg
VALID-FCN-ROBUST-FINAL[16, 1]__VAE_ez_noparams_in_construct__random_search_results_again.csv
VALID-FCN-ROBUST-FINAL[16, 1]__VAE_ez_(outsoft,lossmse,hiddentanh)_in_construct__random_search_results_again.csv
VALID-FCN-ROBUST-FINAL[16, 1]__VAE_ez_yes_params_in_construct__random_search_results_again.csv
VALID-FCN-ROBUST-FINAL[32, 16, 1]__VAE_ez_noparams_in_construct__random_search_results_again.csv
VALID-FCN-ROBUST-FINAL[32, 16, 1]__VAE_ez_(outsoft,lossmse,hiddentanh)_in_construct__random_search_results_again.csv
VALID-FCN-ROBUST-FINAL[32, 16, 1]__VAE_ez__random_search_results_again.csv
VALID-FCN-ROBUST-FINAL[32, 16, 1]__VAE_ez_yes_params_in_construct__random_search_results_again.csv
VALID-FCN-ROBUST-FINAL[32, 16, 2, 1]__VAE_ez_noparams_in_construc

In [178]:
modelss_valid_test

[]

In [None]:
modelss_valid_rand=[]
modelss_valid_test=[]
modelss_valid_nonVAE=[]
modelss_valid_VAE=[]

In [179]:

for r in modelss_allVALIDS: 
    if "rando" not in r:
        modelss_valid_test.append(r)
    else: 
        modelss_valid_rand.append(r)

In [143]:
modelss_allVALIDS=[
"VALID-FCN-ROBUST-FINAL[16, 1]__VAE_ez_noparams_in_construct__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[16, 1]__VAE_ez_(outsoft,lossmse,hiddentanh)_in_construct__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[16, 1]__VAE_ez_yes_params_in_construct__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[32, 16, 1]__VAE_ez_noparams_in_construct__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[32, 16, 1]__VAE_ez_(outsoft,lossmse,hiddentanh)_in_construct__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[32, 16, 1]__VAE_ez__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[32, 16, 1]__VAE_ez_yes_params_in_construct__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[32, 16, 2, 1]__VAE_ez_noparams_in_construct__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[32, 16, 2, 1]__VAE_ez_(outsoft,lossmse,hiddentanh)_in_construct__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[32, 16, 2, 1]__VAE_ez__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[32, 16, 2, 1]__VAE_ez_yes_params_in_construct__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[32, 16, 2]__VAE__random_search_results.csv",
"VALID-FCN-ROBUST-FINAL[32, 16, 8, 1]__VAE_ez_noparams_in_construct__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[32, 16, 8, 1]__VAE_ez_(outsoft,lossmse,hiddentanh)_in_construct__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[32, 16, 8, 1]__VAE_ez__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[32, 16, 8, 1]__VAE_ez_yes_params_in_construct__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[32, 16, 8, 2]__VAE_ez_(outsoft,lossmse,hiddentanh)_in_construct__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[32, 16, 8, 2]__VAE__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[32, 16, 8, 4, 1]__VAE_ez_noparams_in_construct__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[32, 16, 8, 4, 1]__VAE_ez_(outsoft,lossmse,hiddentanh)_in_construct__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[32, 16, 8, 4, 1]__VAE_ez__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[32, 16, 8, 4, 1]__VAE_ez_yes_params_in_construct__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[32, 16, 8, 4, 1]__VAE_REV__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[32, 16, 8, 4, 2, 1]__VAE_ez_noparams_in_construct__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[32, 16, 8, 4, 2, 1]__VAE_ez_(outsoft,lossmse,hiddentanh)_in_construct__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[32, 16, 8, 4, 2, 1]__VAE_ez__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[32, 16, 8, 4, 2, 1]__VAE_ez_yes_params_in_construct__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[32, 16, 8, 4, 2, 1]__VAE_REV__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[32, 16, 8, 4, 2]__VAE_ez_(outsoft,lossmse,hiddentanh)_in_construct__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[32, 16, 8, 4]__VAE_ez_(outsoft,lossmse,hiddentanh)_in_construct__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[32, 1]__VAE_ez_noparams_in_construct__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[32, 1]__VAE_ez_(outsoft,lossmse,hiddentanh)_in_construct__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[32, 1]__VAE_ez_yes_params_in_construct__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[32, 24, 16, 8, 4, 2, 1]__HMM_ez_random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[32, 24, 16, 8, 4, 2, 1]__VAE_REV__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[32, 2]__VAE_ez_noparams_in_construct__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[32, 2]__VAE_ez_(outsoft,lossmse,hiddentanh)_in_construct__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[32, 2]__VAE_ez_yes_params_in_construct__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[32, 2]__VAE__random_search_results.csv",
"VALID-FCN-ROBUST-FINAL[32, 4]__VAE__random_search_results.csv",
"VALID-FCN-ROBUST-FINAL[64, 1]__VAE_ez_(outsoft,lossmse,hiddentanh)_in_construct__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINAL[64, 1]__VAE_ez__random_search_results_again.csv",
"VALID-FCN-ROBUST-FINALABOD(conta_All_MODELS_random_search_results.csv",
"VALID-FCN-ROBUST-FINALABOD(conta_ALLMODELS_TEST_results.csv",
"VALID-FCN-ROBUST-FINALABOD(contamination=0.1, method='fast', n_neighbors=5)_All_MODELS_random_search_results.csv",
"VALID-FCN-ROBUST-FINALCBLOF(alpha=0.9, beta=5, check_estimator=False, clustering_estimator=None, contamination=0.1, n_clusters=8, n_jobs=1, random_state=None, use_weights=False)_All_MODELS_random_search_results.csv",
"VALID-FCN-ROBUST-FINALCBLOF(alph_All_MODELS_random_search_results.csv",
"VALID-FCN-ROBUST-FINALCBLOF(alph_ALLMODELS_TEST_results.csv",
"VALID-FCN-ROBUST-FINALCOF(contam_All_MODELS_random_search_results.csv",
"VALID-FCN-ROBUST-FINALFeatureBag_All_MODELS_random_search_results.csv",
"VALID-FCN-ROBUST-FINALFeatureBag_ALLMODELS_TEST_results.csv",
"VALID-FCN-ROBUST-FINALHBOS(alpha_All_MODELS_random_search_results.csv",
"VALID-FCN-ROBUST-FINALHBOS(alpha_ALLMODELS_TEST_results.csv",
"VALID-FCN-ROBUST-FINALIForest(be_All_MODELS_random_search_results.csv",
"VALID-FCN-ROBUST-FINALIForest(be_ALLMODELS_TEST_results.csv",
"VALID-FCN-ROBUST-FINALKNN(algori_All_MODELS_random_search_results.csv",
"VALID-FCN-ROBUST-FINALKNN(algori_ALLMODELS_TEST_results.csv",
"VALID-FCN-ROBUST-FINALLOCI(alpha_All_MODELS_random_search_results.csv",
"VALID-FCN-ROBUST-FINALLOF(algori_All_MODELS_random_search_results.csv",
"VALID-FCN-ROBUST-FINALLOF(algori_ALLMODELS_TEST_results.csv",
"VALID-FCN-ROBUST-FINALMCD(assume2_ALLMODELS_TEST_results.csv",
"VALID-FCN-ROBUST-FINALMCD(assume_All_MODELS_random_search_results.csv",
"VALID-FCN-ROBUST-FINALMCD(assume_ALLMODELS_TEST_results.csv",
"VALID-FCN-ROBUST-FINALOCSVM(cach_All_MODELS_random_search_results.csv",
"VALID-FCN-ROBUST-FINALOCSVM(cach_ALLMODELS_TEST_results.csv",
"VALID-FCN-ROBUST-FINALPCA(contam_All_MODELS_random_search_results.csv",
"VALID-FCN-ROBUST-FINALPCA(contam_ALLMODELS_TEST_results.csv",
"VALID-FCN-ROBUST-FINAL_random_search_results.csv",
"VALID-FCN-ROBUST-FINALSOD(alpha=_All_MODELS_random_search_results.csv",
"VALID-FCN-ROBUST-FINALSOS(contam_All_MODELS_random_search_results.csv",
"VALID-FCN-ROBUST-FINAL_test_results.csv",
"VALID-FCN-ROBUST-FINALVAE[1,16,64]_random_srch_rzlts.csv",
"VALID-FCN-ROBUST-FINALVAE[1,16]_random_srch_rzlts.csv",
"VALID-FCN-ROBUST-FINALVAE[1,32]_random_srch_rzlts.csv",
"VALID-FCN-ROBUST-FINALVAE[1,4,32]_random_srch_rzlts.csv",
"VALID-FCN-ROBUST-FINALVAE(batch_2_ALLMODELS_TEST_results.csv",
"VALID-FCN-ROBUST-FINALVAE(batch__ALLMODELS_TEST_results.csv",
"VALID-FCN-ROBUST-FINALVAE(batch_size=3_ALLMODELS_TEST_results.csv",
"VALID-FCN-ROBUST-FINAL__VAE__random_search_results.csv",
"VALID-FCN-ROBUST-FINAL_VAE_test_results.csv",
]

In [161]:
mooo

'VALID-FCN-ROBUST-FINAL_VAE_test_results.csv'

In [160]:
df_results.keys()

Index(['mean_fit_time', 'std_fit_time', 'mean_score_time', 'std_score_time', 'param_output_activation', 'param_optimizer', 'param_loss', 'param_l2_regularizer', 'param_hidden_activation', 'param_gamma', 'param_epochs', 'param_dropout_rate', 'param_contamination', 'param_capacity', 'param_batch_size', '879155_test_eer', '785873_test_eer', '201848_test_eer', '368258_test_eer', '710707_test_eer', '186676_test_eer', '827212_test_eer', '815316_test_eer', '698266_test_eer', 'mean_test_eer', 'std_test_eer', 'rank_test_eer', '879155_test_accuracy', '785873_test_accuracy', '201848_test_accuracy', '368258_test_accuracy', '710707_test_accuracy', '186676_test_accuracy', '827212_test_accuracy', '815316_test_accuracy', '698266_test_accuracy', 'mean_test_accuracy', 'std_test_accuracy', 'rank_test_accuracy', 'owner', 'run', '264325_test_eer', '264325_test_accuracy'], dtype='object')

In [181]:
modelsss=[

"VALID-FCN-ROBUST-FINALVAE[1,16,64]_random_srch_rzlts.csv", 
"VALID-FCN-ROBUST-FINALVAE[1,4,32]_random_srch_rzlts.csv", 
"VALID-FCN-ROBUST-FINALVAE[1,32]_random_srch_rzlts.csv", 
"VALID-FCN-ROBUST-FINALVAE[1,16]_random_srch_rzlts.csv"
]

modelss = modelss_valid_test
test=0


epochsss=[500,1500,2500,3500,4500,5500,6500,7500,8500,9500]
batches=[16,32]
batches=[1]
epochsss=[1]
rr=[] 
for l in range(len(modelss)): 
    rr.append({})
for k in range(len(rr)):
    for b in batches: 
        for ep in epochsss: 
            rr[k][b,ep]=[0,0]

k=0
mini=1
maxi=0
for mooo in modelss: 
    if mooo == 'VALID-FCN-ROBUST-FINAL_VAE_test_results.csv':
        continue
    if 'VAE' not in mooo: 
        continue 
    if 'test' or 'TEST' in mooo: 
        test=1
    else: 
        test=0 
    df_results = pd.read_csv(OUTPUT_PATH / mooo)
    print(mooo)
    
    '''
    print("EER= Mean {:.4f} Var {:.4f}".format(round(df_results["mean_test_eer"].mean(), 4),(round(df_results["std_test_eer"].var(), 4))))
    print("Acc= Mean {:.4f} Var {:.4f}".format(round(df_results["mean_test_accuracy"].mean(), 4),(round(df_results["std_test_accuracy"].var(), 4))))

    df_results[df_results["param_epochs"] == 2500][
            [
                "mean_test_accuracy",
            ]
       ].sort_values("mean_test_eer").head(1)
    ''' 
    for b in batches: 
        for ep in epochsss: 
            #print(int(ep),int(b))
            if test==1: 
                df=df_results
                rr[k][b,ep][0]=df["test_eer"].mean()
                rr[k][b,ep][1]=df["test_accuracy"].mean()
                if df["test_accuracy"].mean() > maxi: 
                    maxi= df["test_accuracy"].mean()
                    maxi_k=k
                    maxi_b=b
                    maxi_ep=ep

                if df["test_eer"].mean() < mini: 
                    mini= df["test_eer"].mean()
                    mini_k=k
                    mini_b=b
                    mini_ep=ep
            else:
                df=df_results.loc[(df_results["rank_test_accuracy"] == 1)]
                #df=df_results.loc[(df_results["param_epochs"] == ep)&(df_results["param_batch_size"] == b)]
                #print("EER= Mean {:.4f} Var {:.4f}".format(round(df["mean_test_eer"].mean(), 4),(round(df["std_test_eer"].var(), 4))))
                #print("Acc= Mean {:.4f} Var {:.4f}".format(round(df["mean_test_accuracy"].mean(), 4),(round(df["std_test_accuracy"].var(), 4))))
                rr[k][b,ep][0]=df["mean_test_eer"].mean()
                rr[k][b,ep][1]=df["mean_test_accuracy"].mean()

                if df["mean_test_accuracy"].mean() > maxi: 
                    maxi= df["mean_test_accuracy"].mean()
                    maxi_k=k
                    maxi_b=b
                    maxi_ep=ep

                if df["mean_test_eer"].mean() < mini: 
                    mini= df["mean_test_eer"].mean()
                    mini_k=k
                    mini_b=b
                    mini_ep=ep
    k=k+1

VALID-FCN-ROBUST-FINALVAE(batch_2_ALLMODELS_TEST_results.csv
VALID-FCN-ROBUST-FINALVAE(batch__ALLMODELS_TEST_results.csv
VALID-FCN-ROBUST-FINALVAE(batch_size=3_ALLMODELS_TEST_results.csv


In [182]:
#VAEs_testvalid
print(maxi,maxi_k,maxi_b,maxi_ep)
print(mini,mini_k,mini_b,mini_ep)

0.6662862616310893 1 1 1
0.356974071266925 1 1 1


In [173]:
#VAEs_valid
print(maxi,maxi_k,maxi_b,maxi_ep)
print(mini,mini_k,mini_b,mini_ep)

0.7004913076341648 7 1 1
0.33006295155891235 7 1 1


In [174]:
modelss[7]

'VALID-FCN-ROBUST-FINAL[32, 16, 2, 1]__VAE_ez_noparams_in_construct__random_search_results_again.csv'

In [169]:
#randvalid
print(maxi,maxi_k,maxi_b,maxi_ep)
print(mini,mini_k,mini_b,mini_ep)

0.7523809523809524 42 1 1
0.28231821075746943 42 1 1


In [170]:
modelss[42]

'VALID-FCN-ROBUST-FINALABOD(conta_All_MODELS_random_search_results.csv'

In [166]:
#testvalid
print(maxi,maxi_k,maxi_b,maxi_ep)
print(mini,mini_k,mini_b,mini_ep)

0.7414340448823208 8 1 1
0.291574267298776 8 1 1


In [167]:
modelss[8]

'VALID-FCN-ROBUST-FINALMCD(assume_ALLMODELS_TEST_results.csv'

In [139]:
#allvalid 
print(maxi,maxi_k,maxi_b,maxi_ep)
# best acc so far is 1,32 with batch32 and epochs 5500 

0.7523809523809524 42 1 1


In [141]:
#allvalid 
print(mini,mini_k,mini_b,mini_ep)
# best eer so far is 1,16 with batch32 and epochs 9500 

0.28231821075746943 42 1 1


In [142]:
modelss[42]

'VALID-FCN-ROBUST-FINALABOD(conta_All_MODELS_random_search_results.csv'

In [109]:
rr[2][32,5500]

[0.354938901730488, 0.671957671957672]

In [108]:
rr[3][32,9500]

[0.35468621491346186, 0.6716931216931218]

In [111]:
mooo="VALID-FCN-ROBUST-FINALVAE[1,32]_random_srch_rzlts.csv", 
mooo="VALID-FCN-ROBUST-FINALVAE[1,16]_random_srch_rzlts.csv"
df_results = pd.read_csv(OUTPUT_PATH / mooo)
ep=500
b=32
df=df_results.loc[(df_results["param_epochs"] == ep)&(df_results["param_batch_size"] == b)]
print("EER= Mean {:.4f} Var {:.4f}".format(round(df["mean_test_eer"].mean(), 4),(round(df["std_test_eer"].var(), 4))))
print("Acc= Mean {:.4f} Var {:.4f}".format(round(df["mean_test_accuracy"].mean(), 4),(round(df["std_test_accuracy"].var(), 4))))

EER= Mean 0.3560 Var 0.0008
Acc= Mean 0.6710 Var 0.0004


In [102]:
def print_best_params_and_scores(rr): 
    

{(16, 500): [0.36357159484298796, 0.6611111111111112],
 (16, 1500): [0.3630649431374183, 0.6612433862433862],
 (16, 2500): [0.36264453732926916, 0.6617724867724867],
 (16, 3500): [0.36321293881082795, 0.6617724867724868],
 (16, 4500): [0.36298163970275654, 0.6619047619047618],
 (16, 5500): [0.3632524521659445, 0.6616402116402116],
 (16, 6500): [0.3630491498717968, 0.6619047619047619],
 (16, 7500): [0.36327442023849194, 0.6615079365079366],
 (16, 8500): [0.36309327627008015, 0.6616402116402116],
 (16, 9500): [0.3631112931107206, 0.6615079365079366],
 (32, 500): [0.3638491279572681, 0.6607142857142857],
 (32, 1500): [0.3639347722526097, 0.6605820105820106],
 (32, 2500): [0.3635670630905786, 0.660978835978836],
 (32, 3500): [0.3634975506125232, 0.6611111111111112],
 (32, 4500): [0.3637391341297499, 0.6611111111111112],
 (32, 5500): [0.3637101891473765, 0.6611111111111111],
 (32, 6500): [0.3631413870950591, 0.6613756613756613],
 (32, 7500): [0.3622653300576328, 0.6624338624338624],
 (32, 8

In [55]:
modelss=[

"VALID-FCN-ROBUST-FINALVAE[1,16,64]_random_srch_rzlts.csv", 
"VALID-FCN-ROBUST-FINALVAE[1,4,32]_random_srch_rzlts.csv", 
"VALID-FCN-ROBUST-FINALVAE[1,32]_random_srch_rzlts.csv", 
"VALID-FCN-ROBUST-FINALVAE[1,16]_random_srch_rzlts.csv"
]

OUTPUT_PATH = Path.cwd() / "output" / "chapter-6-1-4-siamese-cnn" 

for mooo in modelss: 
    df_results = pd.read_csv(OUTPUT_PATH / mooo)
    print(mooo)
    
    '''
        
    print("EER= Mean {:.4f} Var {:.4f}".format(round(df_results["mean_test_eer"].mean(), 4),(round(df_results["std_test_eer"].var(), 4))))
    print("Acc= Mean {:.4f} Var {:.4f}".format(round(df_results["mean_test_accuracy"].mean(), 4),(round(df_results["std_test_accuracy"].var(), 4))))

    
    ''' 
    display(
        df_results[df_results["owner"] == 785873][
            [
                "owner",
                "mean_test_eer",
                "std_test_eer",
                "param_epochs", 
                "mean_test_accuracy",
                "std_test_accuracy",
            ]
        ].sort_values("mean_test_eer").head(1)
    )
      
    print("Best results for each owner:")
    display(
        df_results[df_results["rank_test_eer"] <= 1][
            [
                "owner",
                "mean_test_eer",
                "std_test_eer",
                "param_epochs", 
                "mean_test_accuracy",
                "std_test_accuracy",
                "param_epochs",
            ]
        ].sort_values("mean_test_eer").head(3)
    )

    
    print("\n\n\nMost relevant statistics:")
    display(
        df_results[df_results["rank_test_eer"] <= 1][
            [
                "mean_fit_time",
                "mean_test_accuracy",
                "std_test_accuracy",
                "mean_test_eer",
                "std_test_eer",
                "param_epochs",
            ]
        ].describe()
    )
    

VALID-FCN-ROBUST-FINALVAE[1,16,64]_random_srch_rzlts.csv


Unnamed: 0,owner,mean_test_eer,std_test_eer,param_epochs,mean_test_accuracy,std_test_accuracy
41,785873,0.275875,0.106506,1500,0.765873,0.097525


Best results for each owner:


Unnamed: 0,owner,mean_test_eer,std_test_eer,param_epochs,mean_test_accuracy,std_test_accuracy,param_epochs.1
157,827212,0.225427,0.116185,7500,0.808201,0.133211,7500
41,785873,0.275875,0.106506,1500,0.765873,0.097525,1500
53,785873,0.275875,0.106506,3500,0.765873,0.097525,3500





Most relevant statistics:


Unnamed: 0,mean_fit_time,mean_test_accuracy,std_test_accuracy,mean_test_eer,std_test_eer,param_epochs
count,14.0,14.0,14.0,14.0,14.0,14.0
mean,268.424267,0.652589,0.124859,0.369817,0.111617,5928.571429
std,153.792765,0.116632,0.022791,0.097989,0.018306,3081.315546
min,121.800812,0.478836,0.089635,0.225427,0.060385,1500.0
25%,193.304797,0.55787,0.102573,0.287056,0.106588,2750.0
50%,227.279949,0.683862,0.12849,0.34239,0.115395,7000.0
75%,265.933067,0.754299,0.143391,0.452225,0.116041,8500.0
max,641.560724,0.808201,0.154587,0.510397,0.148597,9500.0


VALID-FCN-ROBUST-FINALVAE[1,4,32]_random_srch_rzlts.csv


Unnamed: 0,owner,mean_test_eer,std_test_eer,param_epochs,mean_test_accuracy,std_test_accuracy
42,785873,0.275875,0.106506,2500,0.765873,0.097525


Best results for each owner:


Unnamed: 0,owner,mean_test_eer,std_test_eer,param_epochs,mean_test_accuracy,std_test_accuracy,param_epochs.1
142,827212,0.225409,0.122611,2500,0.805556,0.142083,2500
42,785873,0.275875,0.106506,2500,0.765873,0.097525,2500
38,879155,0.321006,0.120471,8500,0.710317,0.131253,8500





Most relevant statistics:


Unnamed: 0,mean_fit_time,mean_test_accuracy,std_test_accuracy,mean_test_eer,std_test_eer,param_epochs
count,15.0,15.0,15.0,15.0,15.0,15.0
mean,300.111888,0.62672,0.130897,0.392577,0.112805,5300.0
std,150.69679,0.110397,0.019964,0.092532,0.016916,2426.04911
min,113.717769,0.478836,0.091891,0.225409,0.0624,1500.0
25%,204.995898,0.510582,0.118981,0.326761,0.107641,3000.0
50%,233.847738,0.645503,0.13041,0.396304,0.115395,5500.0
75%,438.326323,0.702381,0.148873,0.489815,0.117933,7000.0
max,562.788305,0.805556,0.154587,0.510397,0.14491,8500.0


VALID-FCN-ROBUST-FINALVAE[1,32]_random_srch_rzlts.csv


Unnamed: 0,owner,mean_test_eer,std_test_eer,param_epochs,mean_test_accuracy,std_test_accuracy
50,785873,0.325887,0.126249,500,0.707672,0.128232


Best results for each owner:


Unnamed: 0,owner,mean_test_eer,std_test_eer,param_epochs,mean_test_accuracy,std_test_accuracy,param_epochs.1
146,827212,0.204486,0.14214,6500,0.833333,0.133748,6500
151,827212,0.204486,0.14214,1500,0.832011,0.133565,1500
125,186676,0.29477,0.121484,5500,0.740741,0.125459,5500





Most relevant statistics:


Unnamed: 0,mean_fit_time,mean_test_accuracy,std_test_accuracy,mean_test_eer,std_test_eer,param_epochs
count,11.0,11.0,11.0,11.0,11.0,11.0
mean,218.920067,0.689394,0.115256,0.338539,0.107668,4045.454545
std,133.112899,0.086338,0.021114,0.080509,0.028155,2296.241989
min,34.10477,0.585979,0.078856,0.204486,0.066895,500.0
25%,144.168518,0.621032,0.097121,0.306168,0.084768,2500.0
50%,218.570939,0.691799,0.123392,0.334332,0.121484,5500.0
75%,266.198068,0.724206,0.130898,0.404966,0.125712,5500.0
max,454.131598,0.833333,0.141726,0.431424,0.14214,6500.0


VALID-FCN-ROBUST-FINALVAE[1,16]_random_srch_rzlts.csv


Unnamed: 0,owner,mean_test_eer,std_test_eer,param_epochs,mean_test_accuracy,std_test_accuracy
58,785873,0.325451,0.126166,8500,0.707672,0.1286


Best results for each owner:


Unnamed: 0,owner,mean_test_eer,std_test_eer,param_epochs,mean_test_accuracy,std_test_accuracy,param_epochs.1
155,827212,0.204486,0.14214,5500,0.833333,0.133748,5500
135,186676,0.294218,0.12248,5500,0.740741,0.127204,5500
99,368258,0.31703,0.076905,9500,0.705026,0.080503,9500





Most relevant statistics:


Unnamed: 0,mean_fit_time,mean_test_accuracy,std_test_accuracy,mean_test_eer,std_test_eer,param_epochs
count,10.0,10.0,10.0,10.0,10.0,10.0
mean,254.80676,0.674868,0.114871,0.351991,0.105413,6300.0
std,181.582499,0.076549,0.020086,0.071009,0.025817,3614.784456
min,38.975368,0.584656,0.080503,0.204486,0.073016,500.0
25%,180.809207,0.613757,0.098191,0.319135,0.081017,4750.0
50%,219.473975,0.668651,0.12043,0.356995,0.108086,7000.0
75%,286.127932,0.707011,0.128251,0.411789,0.125863,9500.0
max,573.180895,0.833333,0.142906,0.429959,0.14214,9500.0


In [70]:
def uutils_plot_randomsearch_results(df_results, n_top=1):
    # Prepare data for plotting
    df_plot = df_results[df_results["rank_test_eer"] <= n_top].rename(
        columns={
            "param_epochs": r"$\epochs$",
            "param_gamma": r"$\gamma$",
            "mean_test_accuracy": "Mean Test Acc.",
            "mean_test_eer": "Mean Test EER",
        }
    )
    df_plot["Mean Test EER"] = df_plot["Mean Test EER"] * -1  # Because fewer is more

    median_nu = df_plot[r"$\epochs$"].median()
    median_gamma = df_plot[r"$\epochs$"].median()

    # Plot
    fig = plt.figure(figsize=(5.473 / 1.3, 2), dpi=180)
    g = sns.scatterplot(
        x=r"$\epochs$",
        y=r"$\gamma$",
        data=df_plot,
        size="Mean Test EER",
        sizes=(7, 60),
        hue="Mean Test EER",
        alpha=1,
        #        palette="Blues",
        linewidth=0,
    )

    # Format Legend labels
    leg = g.get_legend()
    new_handles = [h for h in leg.legendHandles]
    new_labels = []
    for i, handle in enumerate(leg.legendHandles):
        label = handle.get_label()
        try:
            new_labels.append(f"{abs(float(label)):.3f}")
        except ValueError:
            new_labels.append("")

    # Plot mean values
    plt.plot(
        [-0.01, 0.31],
        [median_gamma, median_gamma],
        linestyle="dashed",
        linewidth=0.8,
        alpha=0.7,
        color="black",
    )
    plt.text(
        0.23,
        median_gamma * 1.7 ** 2,
        r"median($\epochs$)",
        fontsize=6,
        color="black",
        alpha=0.9,
    )
    plt.text(
        0.23,
        median_gamma * 1.2 ** 2,
        f"{median_gamma:.3f}",
        fontsize=5,
        color="black",
        alpha=0.9,
    )

    plt.plot(
        [median_nu, median_nu],
        [0.0001, 1000],
        linestyle="dashed",
        linewidth=0.8,
        alpha=0.7,
        color="black",
    )
    plt.text(
        median_nu + 0.005, 400, r"median($\epochs$)", fontsize=6, color="black", alpha=0.9
    )
    plt.text(
        median_nu + 0.005, 200, f"{median_nu:.3f}", fontsize=5, color="black", alpha=0.9
    )

    # Adjust axes & legend
    plt.yscale("log")
    plt.ylim(0.0001, 1000)
    plt.xlim(0, 0.305)
    plt.legend(
        new_handles,
        new_labels,
        bbox_to_anchor=(1.02, 1),
        loc=2,
        borderaxespad=0.0,
        title="Mean EER per Owner\n(Validation Results)",
        title_fontsize=5,
    )

    fig.tight_layout()
    return fig

In [10]:
df_results.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_output_activation,param_optimizer,param_loss,param_l2_regularizer,param_hidden_activation,param_gamma,param_epochs,param_dropout_rate,param_contamination,param_capacity,param_batch_size,879155_test_eer,785873_test_eer,201848_test_eer,368258_test_eer,710707_test_eer,186676_test_eer,827212_test_eer,815316_test_eer,698266_test_eer,mean_test_eer,std_test_eer,rank_test_eer,879155_test_accuracy,785873_test_accuracy,201848_test_accuracy,368258_test_accuracy,710707_test_accuracy,186676_test_accuracy,827212_test_accuracy,815316_test_accuracy,698266_test_accuracy,mean_test_accuracy,std_test_accuracy,rank_test_accuracy,owner,run,264325_test_eer,264325_test_accuracy
0,44.716449,0.946053,0.037612,0.015437,<function softmax at 0x2aee6ae22e18>,sgd,<function mean_squared_error at 0x2aee6add8d08>,0.5,<function tanh at 0x2aee6ae331e0>,1.5,500,0.25,0.137,1,16,0.270833,0.485294,0.23913,0.40678,0.453125,0.3,0.351852,0.513889,0.40678,0.380854,0.09113,2.0,0.761905,0.52381,0.785714,0.630952,0.571429,0.738095,0.690476,0.47619,0.630952,0.645503,0.101757,1,264325,0,,
1,109.801075,2.765361,0.022697,0.009667,<function softmax at 0x2aee6ae22e18>,sgd,<function mean_squared_error at 0x2aee6add8d08>,0.5,<function tanh at 0x2aee6ae331e0>,1.5,1500,0.25,0.137,1,16,0.255319,0.492754,0.23913,0.416667,0.453125,0.3,0.351852,0.513889,0.416667,0.382156,0.094995,13.0,0.77381,0.511905,0.785714,0.619048,0.571429,0.738095,0.690476,0.47619,0.619048,0.642857,0.105439,15,264325,0,,
2,175.247235,4.848013,0.03198,0.008724,<function softmax at 0x2aee6ae22e18>,sgd,<function mean_squared_error at 0x2aee6add8d08>,0.5,<function tanh at 0x2aee6ae331e0>,1.5,2500,0.25,0.137,1,16,0.255319,0.492754,0.23913,0.416667,0.453125,0.3,0.351852,0.513889,0.40678,0.381057,0.094646,5.0,0.77381,0.511905,0.785714,0.619048,0.571429,0.738095,0.690476,0.47619,0.630952,0.64418,0.105207,5,264325,0,,
3,243.487188,2.799758,0.021167,0.005925,<function softmax at 0x2aee6ae22e18>,sgd,<function mean_squared_error at 0x2aee6add8d08>,0.5,<function tanh at 0x2aee6ae331e0>,1.5,3500,0.25,0.137,1,16,0.270833,0.492754,0.23913,0.40678,0.453125,0.313725,0.375,0.513889,0.40678,0.38578,0.090246,19.0,0.761905,0.511905,0.785714,0.630952,0.571429,0.72619,0.666667,0.47619,0.619048,0.638889,0.101482,19,264325,0,,
4,303.454199,3.601334,0.024455,0.010206,<function softmax at 0x2aee6ae22e18>,sgd,<function mean_squared_error at 0x2aee6add8d08>,0.5,<function tanh at 0x2aee6ae331e0>,1.5,4500,0.25,0.137,1,16,0.255319,0.485294,0.23913,0.416667,0.453125,0.3,0.351852,0.513889,0.40678,0.380228,0.093692,1.0,0.77381,0.52381,0.785714,0.619048,0.571429,0.738095,0.690476,0.47619,0.630952,0.645503,0.103598,1,264325,0,,


In [71]:
uutils_plot_randomsearch_results(df_results)

ValueError: 
\epochs
^
Unknown symbol: \epochs (at char 0), (line:1, col:1)

Error in callback <function install_repl_displayhook.<locals>.post_execute at 0x2b8f6b28f7b8> (for post_execute):


ValueError: 
\epochs
^
Unknown symbol: \epochs (at char 0), (line:1, col:1)

ValueError: 
\epochs
^
Unknown symbol: \epochs (at char 0), (line:1, col:1)

<Figure size 757.8x360 with 1 Axes>