# Setup Variables

MNIST, FashionMNIST, GTSRB, Cifar10

In [None]:
DATASET = 'MNIST'
# DATASET = 'FashionMNIST'
# DATASET = 'GTSRB'
SEED = 42
CUDA = 0
GPU_NAME = f'cuda:{CUDA}'

In [None]:
import os
from pathlib import Path

base = Path().cwd()

if base.name != 'runtime-monitoring':
    os.chdir('../')
    base = Path().cwd()

# Libraries

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import json
from fastprogress import progress_bar, master_bar

In [None]:
import torch
import torchvision
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torch.nn.functional as F
from torchinfo import summary


cudnn.benchmark = True
torch.set_float32_matmul_precision('high')

In [None]:
from utilities.utils import *
from utilities.pathManager import fetchPaths
from utilities.scaleFunctions import *
from utilities.pcaFunctions import *

In [None]:
# disable warnings
import warnings
warnings.filterwarnings('ignore')

## Load model and settings

In [None]:
from models.mnist_model import MNIST_Model
from models.fashionmnist_model import FashionMNIST_CNN
from models.gtsrb_model import GTSRB_CNN

from models.transform import transform

models = {
    'mnist': MNIST_Model,
    'fashionmnist': FashionMNIST_CNN,
    'gtsrb': GTSRB_CNN
}

model_ = models[DATASET.lower()]
transform_ = transform[DATASET.lower()]

# Paths

In [None]:
paths = fetchPaths(base, DATASET, '', False)

path_data = paths['data']
path_lhl = paths['lhl']
path_stats = paths['saved_models'].parent.parent

configs = load_json(paths['configuration'])
config = configs['configuration']
model_setup = configs['model_setup']
model_config = configs['model_config']
optim_name = list(config['optimizer'].keys())[0]
optim_args = config['optimizer'][optim_name]
scheduler_name = list(config['scheduler'].keys())[0]
scheduler_args = config['scheduler'][scheduler_name]

# GPU Device & Seed

In [None]:
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)

In [None]:
device = get_device(GPU_NAME)

# Load / Split / DataLoader

In [None]:
feature_names = get_labels(DATASET)
# transform test to avoid any data augmentation regarding the train
train_data = get_dataset(DATASET, path_data, train=True, transform=transform_['test'])
test_data = get_dataset(DATASET, path_data, train=False, transform=transform_['test'])
len(train_data), len(test_data)

In [None]:
trainloader = get_dataLoader(train_data, model_config['batch_size'], True)
testloader = get_dataLoader(test_data, model_config['batch_size'], False)

# Helper Functions

In [None]:
path_lhl = paths['lhl'].parent
path_saved_models = paths['saved_models'].parent

In [None]:
models = [m for m in path_saved_models.glob(f'*/{DATASET}_*.pth.tar')]
models = [m for m in filter(lambda m: m.name.find("_selected_classes")==-1, models)]

models

In [None]:
mb = master_bar(range(len(models)))

for _, m in zip(mb, models):
    # model name
    model_name = m.parent.name
    lhl = model_name.split('-')[-1]
    
    # load model parameters
    model_setup['last_hidden_neurons'] = lhl
    model = model_(**model_setup).to(device)
    model = torch.compile(model)
    
    model.load_state_dict(torch.load(m)['model'])
    
    # evaluation mode
    model.eval()
    
    # export raw LHL
    path_lhl_raw = path_lhl / 'raw'
    path_lhl_raw.mkdir(exist_ok=True)
    
    export_last_hidden_layer(trainloader, model, device, lhl, path_lhl_raw, model_name, 'raw_train', mb)
    export_last_hidden_layer(testloader, model, device, lhl, path_lhl_raw, model_name, 'raw_test', mb)
    
    # load Scaler and PCA
    scaler_ = load_pickle(path_lhl / 'scaler.pkl', scaler_)
    pca_ = load_pickle(path_lhl / 'pca.pkl', pca_)
    
    # export PCA data
    path_lhl_pca = path_lhl / 'pca'
    path_lhl_pca.mkdir(exist_ok=True)
    
    # load data
    train = pd.read_csv(path_lhl_raw / f'{model_name}_raw_train.csv')
    test = pd.read_csv(path_lhl_raw / f'{model_name}_raw_test.csv')
    
    applyPCASingle(train, scaler_, pca_, lhl).to_csv(path_lhl_pca / f'{model_name}_pca_train.csv', index=False)
    applyPCASingle(test, scaler_, pca_, lhl).to_csv(path_lhl_pca / f'{model_name}_pca_test.csv', index=False)