In [None]:
%load_ext autoreload
%autoreload 2
from new_aeb_gplvm import *
from utils.data_generator import DataGenerator
from utils.myutils import Utils
from tqdm import trange
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import matplotlib.pyplot as plt
import random
from tqdm import tqdm_notebook

random.seed(42)
import json
import os
import pandas as pd

datagenerator = DataGenerator()
utils = Utils()
import time

In [None]:
def create_dataframe(experiment):
    noise_type = (
        None if experiment["noise_type"] == "normal" else experiment["anomaly_type"]
    )  # irrelevant,duplicated
    anomaly_type = (
        None if experiment["anomaly_type"] == "normal" else experiment["anomaly_type"]
    )  # cluster,global,local,dependency

    datagenerator.dataset = experiment["dataset"]
    data = datagenerator.generator(
        la=1.00,
        realistic_synthetic_mode=anomaly_type,
        noise_type=noise_type,
        noise_ratio=float(experiment["noise_ratio"]),
        stdscale=True,
        minmax=False,
    )

    Y_train = torch.tensor(data["X_train"], dtype=torch.float32)
    Y_test = torch.tensor(data["X_test"], dtype=torch.float32)
    lb_train = torch.tensor(data["y_train"], dtype=torch.float32)
    lb_test = torch.tensor(data["y_test"], dtype=torch.float32)

    if not anomaly_type and not noise_type:
        print("Experimento para dados normais")
        idx_n = np.where(lb_train == 0)[0]
        Y_train = Y_train[idx_n]
        lb_train = lb_train[idx_n]

    return Y_train, Y_test, lb_train, lb_test

In [None]:
EXPERIMENTS_FILE = "experiments/refine/001_complete_normal_study.json"
OUTPUT_FILE = "experiments/refine/001_complete_normal_study_results.json"
OUTPUT_FAIL_FILE = "experiments/refine/001_complete_normal_study_failed.json"
with open(EXPERIMENTS_FILE) as file:
    experiments = json.load(file)
failed_experiments = []

for experiment in tqdm_notebook(experiments[:10]):
    Y_train, Y_test, lb_train, lb_test = create_dataframe(experiment)
    n_train = len(Y_train)
    data_dim = Y_train.shape[1]
    kernel = experiment["kernel"]
    latent_dim = int(experiment["latent_dim"])
    nn_layers = tuple(map(int, experiment["layers"].split(",")))
    n_inducing = int(experiment["n_inducing"])
    n_epochs = int(experiment["n_epochs"])
    lr = float(experiment["learning_rate"])
    batch_size = int(experiment["batch_size"])

    try:
        gplvm = AD_GPLVM(latent_dim, n_inducing, n_epochs, nn_layers, lr, batch_size)
        
        # Fitting the Model
        train_start_time = time.time()
        gplvm.fit(Y_train)
        train_end_time = time.time()
        
        
        # Validating the Model
        validation_score = gplvm.calculate_train_elbo(Y_train)
        
        # Results
        pred_start_time = time.time()
        test_score = gplvm.predict_score(Y_test)
        pred_end_time = time.time()
        
        #Save Metrics
        metrics = utils.metric(y_true=lb_test, y_score=test_score)       
        experiment["elbo"] = validation_score
        experiment["loss_list"] = gplvm.loss_list
        experiment["auc_roc"] = metrics["aucroc"]
        experiment["auc_pr"] = metrics["aucpr"]
        experiment["training_time"] =  train_end_time -train_start_time
        experiment["inference_time"] =  pred_end_time - pred_start_time
        
        with open(OUTPUT_FILE, "w") as file:
            json.dump(experiments, file)
    
    except:
        experiment["auc_roc"] = 0.0
        experiment["auc_pr"] = 0.0
        failed_experiments.append(experiment)
        with open(OUTPUT_FILE, "w") as file:
            json.dump(failed_experiments, file)