In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from new_aeb_gplvm import *
from utils.data_generator import DataGenerator
from utils.myutils import Utils
from tqdm import trange
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import matplotlib.pyplot as plt
import random
from tqdm import tqdm_notebook
import json
import os
import pandas as pd
from sklearn.model_selection import train_test_split
import seaborn as sns
import numpy as np
from scipy.stats import spearmanr, pearsonr
import matplotlib.pyplot as plt

utils = Utils()
import time

In [None]:
def create_dataframe(experiment):
    datagenerator = DataGenerator()

    noise_type = (
        None if experiment["noise_type"] == "normal" else experiment["noise_type"]
    )  # irrelevant,duplicated
    anomaly_type = (
        None if experiment["anomaly_type"] == "normal" else experiment["anomaly_type"]
    )  # cluster,global,local,dependency

    datagenerator.dataset = experiment["dataset"]
    data = datagenerator.generator(
        la=1.00,
        realistic_synthetic_mode=anomaly_type,
        noise_type=noise_type,
        noise_ratio=float(experiment["noise_ratio"]),
        stdscale=True,
        minmax=False,
    )

    Y_train = torch.tensor(data["X_train"], dtype=torch.float32)
    Y_test = torch.tensor(data["X_test"], dtype=torch.float32)
    lb_train = torch.tensor(data["y_train"], dtype=torch.float32)
    lb_test = torch.tensor(data["y_test"], dtype=torch.float32)

    return Y_train, Y_test, lb_train, lb_test

In [None]:
EXPERIMENTS_FILE = "experiments/complete/000_datasets_01_47_normal_03_best.json"
with open(EXPERIMENTS_FILE) as file:
    experiments = json.load(file)
df = pd.DataFrame(experiments)
df = df[df.dataset != "34_smtp"]
experiments = df.to_dict(orient="records")

In [None]:
df[["dataset", "val_auc_roc"]].head(15)

In [None]:
success_experiments = []
failed_experiments = []
for experiment in tqdm_notebook(experiments[0:1]):
    Y_train, Y_test, lb_train, lb_test = create_dataframe(experiment)
    Y_val, Y_test, lb_val, lb_test = train_test_split(
        Y_test, lb_test, test_size=0.50, random_state=42
    )

    n_train = len(Y_train)
    data_dim = Y_train.shape[1]
    kernel = experiment["kernel"]
    latent_dim = int(experiment["latent_dim"])
    nn_layers = tuple(map(int, experiment["layers"].split(",")))
    n_inducing = int(experiment["n_inducing"])
    n_epochs = int(experiment["n_epochs"])
    lr = float(experiment["learning_rate"])
    batch_size = int(experiment["batch_size"])
    dataset = experiment["dataset"]

    # print(experiment)

    try:
        gplvm = AD_GPLVM(
            latent_dim, n_inducing, n_epochs, nn_layers, lr, batch_size, kernel
        )

        # Fitting the Model
        train_start_time = time.time()
        gplvm.fit(Y_train)
        train_end_time = time.time()

        # Validating the Model
        val = []
        for i in range(100):
            score = gplvm.predict_score(Y_val)
            val.append(score)
        validation_score = np.mean(val, axis=0)
        # validation_score = gplvm.calculate_train_elbo(Y_train_normal)

        # Results
        pred_start_time = time.time()
        test_score = gplvm.predict_score(Y_test)
        pred_end_time = time.time()

        # Save Metrics
        metrics = utils.metric(y_true=lb_val, y_score=validation_score)
        validation_metrics = utils.metric(y_true=lb_test, y_score=test_score)
        experiment["negative_elbo"] = validation_score.sum()  # validation_score.sum()
        experiment["train_loss_curve"] = gplvm.loss_list
        experiment["val_auc_roc"] = validation_metrics["aucroc"]
        experiment["val_auc_pr"] = validation_metrics["aucpr"]
        experiment["test_auc_roc"] = metrics["aucroc"]
        experiment["test_auc_pr"] = metrics["aucpr"]
        experiment["training_time"] = train_end_time - train_start_time
        experiment["inference_time"] = pred_end_time - pred_start_time

        # Reconstrucao
        Y_val_recon, Y_val_recon_covar = gplvm.model.reconstruct_y(Y_val)
        experiment["val_reconstruct_error"] = float(utils.rmse(Y_val, Y_val_recon.T))

        pd.Series(experiment).to_json(
            f"experiments/complete/results/contaminated/{dataset}.json"
        )

        # utils.save_experiment(experiment)

    except Exception as error:
        print("An exception occurred:", error)
        experiment["test_auc_roc"] = 0.0
        experiment["test_auc_pr"] = 0.0
        pd.Series(experiment).to_json(
            f"experiments/complete/results/contaminated/{dataset}.json"
        )
        # utils.save_experiment(experiment)

In [None]:
df_contamined = utils.read_json_from_folder(
    "experiments/complete/results/contaminated/"
)

In [None]:
df_contamined.to_json("000_datasets_01_47_contamined_01_results.json", orient="records")

In [None]:
(df["test_auc_roc"] - df_contamined["test_auc_roc"]).median()

In [None]:
plt.boxplot([df["test_auc_roc"], df_contamined["test_auc_roc"]])
None

In [None]:
df_contamined