In [5]:
import numpy as np
import pandas as pd
import torch
import seaborn as sns
import matplotlib.pyplot as plt
import scipy
import os

from ssl_eeg import preprocessing as pr, preprocessing_nback as prn, preprocessing_unlabeled as pru, model, prediction as prd, metrics, plot

models_doc_path = model.models_doc_path
models_conf_path = model.models_conf_path
models_doc = pd.read_csv(models_doc_path, index_col=0)
models_conf = pd.read_csv(models_conf_path, index_col=0)

root_dir = os.getcwd().split("code")[0]
accuracies_path = os.path.join(root_dir, "models", "accuracies")

### Metric calculation for encoders

In [None]:
# calculate validation and test accuracy for n-back using knn
d_type = "euclidean"
pool_labels = True

for i, row in models_doc.iterrows():
    cur_conf = models_conf.loc[row["conf_id"]]
    
    print(row["model_name"])
    test_model = model.load_model(row["model_name"]+"_best_val", cur_conf["out_dim"], dropout_p=cur_conf["dropout_p"])

    # n-back data
    blocks, chunks = prn.arange_data(lowpass=cur_conf["filter"], trans_band_auto=False, verbose=False)
    train_chunks, test_chunks = prn.get_train_test_sets(chunks, cur_conf["test_session"])
    
    pool_txt = ""
    if pool_labels:
        pool_txt = "pool_"

    if cur_conf["supervised"] == "supervised":
        # validation
        folds = pr.get_folds(train_chunks, k_folds=10)
        train_set, val_set = pr.get_train_val_sets(folds, row["val_idx"])
        pred_out, acc = prd.make_predictions(val_set, train_set, test_model, k=7, blocks=blocks, distance_type=d_type, pool_labels=pool_labels)
        mae = metrics.calc_mae(pred_out)

        models_doc.loc[i, "val_" + pool_txt + "mae"] = mae
        models_doc.loc[i, "val_" + pool_txt + "acc"] = acc
        print(acc, mae)

        # test
        pred_out, acc = prd.make_predictions(test_chunks, train_chunks, test_model, k=7, blocks=blocks, distance_type=d_type, pool_labels=pool_labels)
        mae = metrics.calc_mae(pred_out)

        models_doc.loc[i, "test_" + pool_txt + "mae"] = mae
        models_doc.loc[i, "test_" + pool_txt + "acc"] = acc
        print(acc, mae)
    
    elif cur_conf["supervised"] == "self-supervised":           
        # test using disjoint sessions
        acc_mean = 0
        mae_mean = 0

        for t_s in range(4):
            train_chunks, test_chunks = prn.get_train_test_sets(chunks, test_session=t_s+1)
            pred_out, acc = prd.make_predictions(test_chunks, train_chunks, test_model, k=7, blocks=blocks, 
            distance_type="euclidean", pool_labels=pool_labels)
            acc_mean += acc
            mae_mean += metrics.calc_mae(pred_out)

        acc_mean = acc_mean / 4
        mae_mean = mae_mean / 4
        models_doc.loc[i, "test_" + pool_txt + "mae"] = mae_mean
        models_doc.loc[i, "test_" + pool_txt + "acc"] = acc_mean
        print(acc_mean)
        

In [5]:
# calculate mean values for validation loss and accuracy
for c_id, frame in models_doc.groupby("conf_id"):
    m = frame[["val_loss", "train_loss"]].mean(axis=0)
    v = frame[["val_loss", "train_loss"]].std(axis=0)
    models_conf.loc[c_id, ["mean_val_loss", "mean_train_loss", "std_val_loss", "std_train_loss"]] = [m["val_loss"], m["train_loss"], v["val_loss"], v["train_loss"]]
    
    mtr = ["acc", "mae"]
    for me in mtr:
        if not ((pool_txt == "pool_") and (me == "mae")):
            m_a = frame["val_" + pool_txt + me].mean(axis=0)
            v_a = frame["val_" + pool_txt + me].std(axis=0)
            models_conf.loc[c_id, ["mean_val_" + pool_txt + me, "std_val_" + pool_txt + me]] = [m_a, v_a]
            
            m_t = frame["test_" + pool_txt + me].mean(axis=0)
            v_t = frame["test_" + pool_txt + me].std(axis=0)
            models_conf.loc[c_id, ["mean_test_" + pool_txt + me, "std_test_" + pool_txt + me]] = [m_t, v_t]

In [None]:
# calculate triplet validation accuracy for self-supervised models
for i, row in models_doc.iterrows():
    cur_conf = models_conf.loc[row["conf_id"]]
    
    if (cur_conf["supervised"] == "self-supervised"):
        print(row["model_name"])
        test_model = model.load_model(row["model_name"]+"_best_val", cur_conf["out_dim"], dropout_p=cur_conf["dropout_p"])

        # unlabeled data
        blocks_u, chunks_u = pru.arange_data(lowpass=cur_conf["filter"], trans_band_auto=False, verbose=False)
        blocks_ut = torch.from_numpy(blocks_u.to_numpy()).T

        # for disjoint training and validation sets
        val_ses = row["val_idx"]
        tr_chunks = chunks_u.loc[chunks_u["session_no"] != val_ses]
        val_chunks = chunks_u.loc[chunks_u["session_no"] == val_ses]
        tr_chunks = torch.from_numpy(tr_chunks.to_numpy())
        val_chunks = torch.from_numpy(val_chunks.to_numpy())

        val_acc = metrics.calc_triplet_accuracies_ssl(test_model, val_chunks, blocks_ut, augmentation_scale=cur_conf["augmentation_scale"], loss_margin=0, file_name=row["model_name"])
        print(val_acc)
        models_doc.loc[i, "triplet_val_acc"] = val_acc

In [None]:
# calculate triplet test accuracy for self-supervised models on n-back data

for i, row in models_doc.iterrows():
    cur_conf = models_conf.loc[row["conf_id"]]
    
    if (cur_conf["supervised"] == "self-supervised"):
        print(row["model_name"])
        test_model = model.load_model(row["model_name"]+"_best_val", cur_conf["out_dim"], dropout_p=cur_conf["dropout_p"])

        # n-back data
        blocks, chunks = prn.arange_data(lowpass=cur_conf["filter"], trans_band_auto=False, verbose=False)
        blocks_t = torch.from_numpy(blocks.drop(columns=["time_stamp", "n"]).to_numpy()).T

        chunks_t = torch.from_numpy(chunks.drop(columns=["n", "offset"]).to_numpy())
        
        acc = metrics.calc_triplet_accuracies_ssl(test_model, chunks_t, blocks_t, augmentation_scale=cur_conf["augmentation_scale"], loss_margin=0, file_name=row["model_name"] + "_nback")
        print(acc)
        models_doc.loc[i, "triplet_test_acc"] = acc

In [None]:
# calculate triplet validation and test accuracy for supervised models on n-back data
calc_val_acc = True
calc_test_acc = True

for i, row in models_doc.iterrows():
    cur_conf = models_conf.loc[row["conf_id"]]
    
    if (cur_conf["supervised"] == "supervised"):
        print(row["model_name"])
        test_model = model.load_model(row["model_name"]+"_best_val", cur_conf["out_dim"], dropout_p=cur_conf["dropout_p"])

        # n-back data
        blocks, chunks = prn.arange_data(lowpass=cur_conf["filter"], trans_band_auto=False, verbose=False)
        train_chunks, test_chunks = prn.get_train_test_sets(chunks, cur_conf["test_session"])

        # validation
        if calc_val_acc:
            chunks_data_X, chunks_data_Y = prn.get_samples_data(train_chunks, blocks)
            chunks_data_X = pr.normalize_data(chunks_data_X)
            folds = pr.get_folds(train_chunks, k_folds=10)
            
            train_set, val_set = pr.get_train_val_sets(folds, row["val_idx"])
            val_triplets = prn.make_triplets(val_set)
            train_triplets = prn.make_triplets(train_set)
            
            val_acc = metrics.calc_triplet_accuracies_sl(test_model, val_triplets, chunks_data_X, loss_margin=0, file_name=row["model_name"] + "_nback")
        
            print(val_acc)
            models_doc.loc[i, "triplet_val_acc"] = val_acc

        # test
        if calc_test_acc:
            test_chunks_data_X, test_chunks_data_Y = prn.get_samples_data(test_chunks, blocks)
            test_chunks_data_X = pr.normalize_data(test_chunks_data_X)
            test_triplets = prn.make_triplets(test_chunks)

            test_acc = metrics.calc_triplet_accuracies_sl(test_model, test_triplets, test_chunks_data_X, loss_margin=0, file_name=row["model_name"] + "_nback_test")

            print(test_acc)
            models_doc.loc[i, "triplet_test_acc"] = test_acc
        

In [23]:
# calculate mean values for triplet accuracy
for c_id, frame in models_doc.groupby("conf_id"):
    m = frame[["triplet_val_acc"]].mean(axis=0)
    v = frame[["triplet_val_acc"]].std(axis=0)
    models_conf.loc[c_id, ["mean_triplet_val_acc", "std_triplet_val_acc"]] = [m["triplet_val_acc"], v["triplet_val_acc"]]

    m = frame[["triplet_test_acc"]].mean(axis=0)
    v = frame[["triplet_test_acc"]].std(axis=0)
    models_conf.loc[c_id, ["mean_triplet_test_acc", "std_triplet_test_acc"]] = [m["triplet_test_acc"], v["triplet_test_acc"]]

In [7]:
models_conf.to_csv(model.models_conf_path)

In [8]:
models_doc.to_csv(model.models_doc_path)