In [1]:
import pandas as pd
import numpy as np
import torch
import torch.utils.data
import torch.nn as nn
from torch.autograd import Variable
from sklearn.model_selection import train_test_split
import joblib
from tqdm import tqdm
import matplotlib.pyplot as plt
from copy import deepcopy

In [2]:
from isaac.utils import get_cuda_device_if_available, create_directory
device = get_cuda_device_if_available()
print(device)

data_directory = "adding_noise_to_training/"
create_directory(data_directory)
create_directory("models")
create_directory("scalers")

cuda:0


In [3]:
from isaac.dataset import read_dataset, prepare_dataset
from isaac.utils import plot_confusion_matrix
from isaac.constants import BASIC_TRAINING_COLS, FORCE_CLASS_COLS, MASS_CLASS_COLS
from isaac.training import training_loop
from isaac.models import MultiBranchModel, initialise_model
from isaac.evaluation import get_best_model_and_its_accuracy
from isaac.noise import add_noise_to_dataloader

In [4]:
BATCH_SIZE = 128
EPOCHS = 100
NORMALISE_DATA = True
STEP_SIZE = 3
SEQ_END = 1800

INPUT_DIM = len(BASIC_TRAINING_COLS)    # input dimension
HIDDEN_DIM = 25  # hidden layer dimension
N_LAYERS = 4     # number of hidden layers
OUTPUT_DIM = 3   # output dimension
DROPOUT = 0.5

network_params = (INPUT_DIM, HIDDEN_DIM, OUTPUT_DIM, DROPOUT)

## Read dataset and preprocess it

In [5]:
train_trials = read_dataset("data/train_passive_trials.h5")
val_trials = read_dataset("data/val_passive_trials.h5")

100%|██████████| 3500/3500 [00:28<00:00, 121.97it/s]
100%|██████████| 1000/1000 [00:09<00:00, 105.14it/s]


## Train model and plot loss and accuracy

In [6]:
N_MODELS = 3

stats_dfs = []
loaders, scaler = prepare_dataset([train_trials, val_trials], 
                                  class_columns=[list(MASS_CLASS_COLS), list(FORCE_CLASS_COLS)], 
                                  multiclass=True,
                                  batch_size=BATCH_SIZE, normalise_data=NORMALISE_DATA,
                                  device=device)


for noise_level in [0.025, 0.0625, 0.125, 0.25, 0]:
    print(noise_level)
    noise_directory = "models/noise_level_%.4f/" % noise_level
    create_directory(noise_directory)
    
    train_loader = add_noise_to_dataloader(loaders[0], noise_deviation=noise_level)

    for seed in range(N_MODELS):
        df = pd.DataFrame()

        model, error, optimizer = initialise_model(network_params, lr=0.01, seed=seed, device=device, arch=MultiBranchModel)
        epoch_losses, epoch_accuracies, [best_mass_model, best_force_model] = training_loop(model, optimizer, 
                                                                                            error,
                                                                                            train_loader, loaders[1], 
                                                                                            EPOCHS, seq_end=SEQ_END,
                                                                                            step_size=STEP_SIZE,
                                                                                            multibranch=True)

        torch.save(best_mass_model.state_dict(), noise_directory + "best_mass_model_seed_%d.pt" % seed)
        torch.save(best_force_model.state_dict(), noise_directory + "best_force_model_seed_%d.pt" % seed)


        train_accuracies = np.array(epoch_accuracies[0])
        val_accuracies = np.array(epoch_accuracies[1]) 

        df["Epoch"] = np.arange(EPOCHS)
        df["Mass Loss"] = epoch_losses[:, 0]
        df["Force Loss"] = epoch_losses[:, 1]
        df["Mass Train Accuracy"] = train_accuracies[:, 0]
        df["Mass Val Accuracy"] = val_accuracies[:, 0]
        df["Force Train Accuracy"] = train_accuracies[:, 1]
        df["Force Val Accuracy"] = val_accuracies[:,1]
        df["seed"] = str(seed)
        df["noise"] = noise_level
        stats_dfs.append(df)
        
stats = pd.concat(stats_dfs)
stats.to_hdf(data_directory+"stats.h5", key="stats")

100%|██████████| 3500/3500 [00:05<00:00, 689.93it/s]
100%|██████████| 1000/1000 [00:01<00:00, 670.96it/s]


0.025


Train_loss: ([0.68585552 0.69788508]) Train_acc: ([64.37142857 56.22857143]) Val_acc: ([53.6 47.9]): 100%|██████████| 100/100 [14:33<00:00,  8.81s/it]
Train_loss: ([0.63198239 0.64164907]) Train_acc: ([70.91428571 65.31428571]) Val_acc: ([57.6 56.8]): 100%|██████████| 100/100 [14:35<00:00,  8.69s/it]
Train_loss: ([0.5871331  0.51192162]) Train_acc: ([72.65714286 71.08571429]) Val_acc: ([54.9 52. ]): 100%|██████████| 100/100 [14:31<00:00,  8.77s/it]


0.0625


Train_loss: ([0.28693282 0.31830745]) Train_acc: ([84.88571429 82.48571429]) Val_acc: ([61.2 63.9]): 100%|██████████| 100/100 [14:35<00:00,  8.79s/it]
Train_loss: ([0.78342802 0.79568502]) Train_acc: ([61.25714286 57.8       ]) Val_acc: ([51.3 51.8]): 100%|██████████| 100/100 [14:34<00:00,  8.77s/it]
Train_loss: ([0.88038532 0.905165  ]) Train_acc: ([55.37142857 48.42857143]) Val_acc: ([48.8 45.7]): 100%|██████████| 100/100 [14:36<00:00,  8.79s/it]


0.125


Train_loss: ([0.67635185 0.7433301 ]) Train_acc: ([67.25714286 58.71428571]) Val_acc: ([54.3 50.9]): 100%|██████████| 100/100 [14:26<00:00,  8.72s/it]
Train_loss: ([0.55982399 0.51008848]) Train_acc: ([77.4        75.65714286]) Val_acc: ([53.  55.1]): 100%|██████████| 100/100 [14:37<00:00,  8.78s/it]
Train_loss: ([0.40258311 0.40281282]) Train_acc: ([83.08571429 78.85714286]) Val_acc: ([54.5 54.8]): 100%|██████████| 100/100 [14:26<00:00,  8.70s/it]


0.25


Train_loss: ([0.64920697 0.59976909]) Train_acc: ([70.48571429 70.4       ]) Val_acc: ([57.5 53.5]): 100%|██████████| 100/100 [14:33<00:00,  8.77s/it]
Train_loss: ([0.7080596  0.61524862]) Train_acc: ([66.37142857 72.11428571]) Val_acc: ([53.5 51.1]): 100%|██████████| 100/100 [14:29<00:00,  8.77s/it]
Train_loss: ([0.35488293 0.45096434]) Train_acc: ([84.45714286 76.82857143]) Val_acc: ([56.5 53.6]): 100%|██████████| 100/100 [14:34<00:00,  8.79s/it]


0


Train_loss: ([0.58330193 0.57347459]) Train_acc: ([71.82857143 68.08571429]) Val_acc: ([49.6 55.2]): 100%|██████████| 100/100 [14:33<00:00,  8.78s/it]
Train_loss: ([0.86719576 0.84949754]) Train_acc: ([54.65714286 46.97142857]) Val_acc: ([48.6 44.6]): 100%|██████████| 100/100 [14:26<00:00,  8.59s/it]
Train_loss: ([0.99715294 0.99506863]) Train_acc: ([47.88571429 42.8       ]) Val_acc: ([46.8 41.4]): 100%|██████████| 100/100 [14:31<00:00,  8.77s/it]


## Save model and scaler

In [7]:
joblib.dump(scaler, "scalers/passive_dual_scaler.sk")

['scalers/passive_dual_scaler.sk']