In [None]:
import torch
import torch.nn as nn
import sklearn.metrics as metrics
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sklearn.model_selection as model_selection
import os

from model import lib as MyModels

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
device

device(type='cuda')

# Training Functions

In [None]:
def train_one_epoch(model, trainloader, optimizer, epoch_index, lossfunc, clipping=False):
    running_loss = 0.

    for i, data in enumerate(trainloader):
        # Input-label pair
        inputs, labels = data 

        # Gradient INIT
        optimizer.zero_grad()
        # Predictions and loss for current batch
        outputs = model(inputs.to(device))
        loss = lossfunc(inputs.to(device), outputs.to(device))
        # Gradient computation and learning weight adjust
        loss.backward()
        
        if clipping==True:
            nn.utils.clip_grad_norm_(model.parameters(), 1)
        optimizer.step()
        # Gather data
        running_loss += loss.item()
            
    last_loss = running_loss / (i+1)
    
    return last_loss

In [None]:
def train_model(model, trainloader, validationloader, epochs, learning_rate, patience, report_every_n_epochs, clipping=True):
    
    trigger_times = 0
    last_loss = 0.

    optimizer = torch.optim.Adadelta(model.parameters(), lr=learning_rate)
    mse = nn.MSELoss(reduction='sum')
    train_loss = []
    val_loss = []

    for epoch in range(epochs):
        running_vloss = 0.

        # Make sure gradient tracking is on, and do a pass over the data
        model.train(True)
        avg_loss = train_one_epoch(model, trainloader, optimizer, epoch, mse, clipping)
        train_loss.append(avg_loss)

        # We don't need gradients on to do reporting
        model.train(False)

        running_vloss = 0.0
        for i, vdata in enumerate(validationloader):
            vinputs, vlabels = vdata
            voutputs = model(vinputs.to(device))
            vloss = mse(vinputs.to(device), voutputs.to(device)).cpu().detach()
            running_vloss += vloss

        avg_vloss = running_vloss / (i + 1)
        val_loss.append(avg_vloss)
        
        if (epoch+1) % report_every_n_epochs == 0:
            print('Epoch {}:'.format(epoch + 1))
            print('LOSS train {} valid {}'.format(avg_loss, avg_vloss))

        # Early stopping 
        # stop if training loss does not decrease significantly
        if abs(avg_loss-last_loss) < threshold:
            trigger_times += 1

            if trigger_times >= patience:
                print('Early stopping!\nStart to test process.')
                return model

        else:
            trigger_times = 0
        last_loss = avg_loss

    return model, [train_loss, val_loss]

In [None]:
def train_ensemble(
            train_dataset,
            validation_dataset,
            num_models = 40,
            hidden_layers = 8,
            skip_max = 30,
            epochs = 50, 
            learning_rate = 0.05,
            report_every_n_epochs = 10, 
            clipping=True, 
            patience = 10):

    trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=len(train_dataset), shuffle=True)
    validationloader = torch.utils.data.DataLoader(validation_dataset, batch_size=len(validation_dataset), shuffle=True)
    
    time_steps = len(train_dataset[0][0])
    train_errors = np.empty((num_models, len(train_dataset), time_steps))
    val_errors = np.empty((num_models, len(validation_dataset), time_steps))
    train_loss = np.empty((num_models, epochs))
    val_loss = np.empty((num_models, epochs))
    model_params = pd.DataFrame(columns=['path','enc_skip_size', 'dec_skip_size', 'train_errors', 'val_errors', 'train_loss', 'val_loss'])
  
    for model_counter in range(num_models):
        print('Model %s'%model_counter)
        example_train, example_label = next(iter(trainloader))
        model = MyModels.rand_seq2seq(example_train.shape[-1], hidden_layers, skip_max, device=device)
        
        model, loss = train_model(model, trainloader, validationloader, epochs, learning_rate, patience, report_every_n_epochs)
        enc_skip_size, dec_skip_size = model.skip_sizes
        model_path = './drive/MyDrive/model_checkpoints/model_%s'%model_counter
        torch.save(model.state_dict(), model_path)
        train_loss[model_counter] = loss[0]
        val_loss[model_counter] = loss[1]
        model.train(False)
        model.cpu()
        
        train_errors[model_counter] = model_errors(train_dataset, model)
        val_errors[model_counter] = model_errors(validation_dataset, model)
        
        df = pd.DataFrame([[model_path, 
                           enc_skip_size, 
                           dec_skip_size, 
                           train_errors[model_counter], 
                           val_errors[model_counter],
                           train_loss[model_counter],
                           val_loss[model_counter]]],
                          columns = model_params.columns)
        
        model_params = pd.concat((model_params, df), ignore_index=True)
        del model
        
    return model_params, train_errors, val_errors

# Training

In [None]:
def minmax_scaling(dataset):
    minimum = np.min(dataset)
    maximum = np.max(dataset)
    return (dataset-minimum)/(maximum-minimum)

In [None]:
def sum_of_squared_errors(_inputs, _outputs):
    # outlier score of each timeseries for each timestep
    inputs = _inputs.detach().numpy()
    outputs = _outputs.detach().numpy()
    square_errors = np.square(inputs-outputs)
    return np.sum(square_errors, axis=-1)

def Z_score(_inputs):
    mu = np.mean(_inputs)
    std = np.std(_inputs)
    return (_inputs-mu)/std

def model_errors(dataset, model):
    inputs, labels = dataset.dataset[dataset.indices]
    output = model(inputs)
    return sum_of_squared_errors(inputs, output)

In [None]:
def make_label(zscore):
  y_pred = np.zeros(np.shape(zscore))
  threshold = 0
  y_pred[zscore>threshold] = 1
  y_pred[zscore<threshold] = 0
  return y_pred

In [None]:
def evaluate(label, score):
  print('ROC-AUC: ', metrics.roc_auc_score(label, score))
  pred = make_label(score)
  print('Accuracy: ', metrics.accuracy_score(label, pred))
  print('Precision: ', metrics.precision_score(label, pred))
  print('Recall: ', metrics.recall_score(label, pred))
  print('F1: ', metrics.f1_score(label, pred))

## Train on CHB-MIT dataset
### Load the Data

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
def prepare_CHB_data(_input):
    scaled = minmax_scaling(_input)
    scaled = np.transpose(scaled, (0,2,1))
    return torch.from_numpy(scaled).float()

In [None]:
file = np.load('./drive/MyDrive/dataset/CHB_MIT/eeg-seizure_train.npz')
traindata = prepare_CHB_data(file['train_signals'])
trainlabel = torch.from_numpy(file['train_labels'])

file = np.load('./drive/MyDrive/dataset/CHB_MIT/eeg-seizure_test.npz')
testdata = prepare_CHB_data(file['test_signals'])

file = np.load('./drive/MyDrive/dataset/CHB_MIT/eeg-seizure_val.npz')
valdata = prepare_CHB_data(file['val_signals'])
vallabel = torch.from_numpy(file['val_labels'])

In [None]:
traindataset_full = torch.utils.data.TensorDataset(traindata, trainlabel)
validationdataset_full = torch.utils.data.TensorDataset(valdata, vallabel)
traindataset, _ = torch.utils.data.random_split(traindataset_full, [0.1, 0.9], generator=torch.Generator().manual_seed(42))
validationdataset, _ = torch.utils.data.random_split(validationdataset_full, [0.1, 0.9], generator=torch.Generator().manual_seed(42))

In [None]:
len(traindataset)

9417

### Start Training

In [None]:
import gc
torch.cuda.empty_cache()
gc.collect()

0

In [None]:
model, t_err, v_err= train_ensemble(traindataset, validationdataset, num_models=40, hidden_layers=8, epochs=100, learning_rate=0.05, patience=5, skip_max=30)

Model 0
Epoch 10:
LOSS train 20770946.0 valid 4724215.0
Epoch 20:
LOSS train 20310460.0 valid 4622130.0
Epoch 30:
LOSS train 19830232.0 valid 4517402.5
Epoch 40:
LOSS train 19364304.0 valid 4417818.5
Epoch 50:
LOSS train 18905528.0 valid 4313165.0
Epoch 60:
LOSS train 18442224.0 valid 4211115.5
Epoch 70:
LOSS train 17978710.0 valid 4106047.5
Epoch 80:
LOSS train 17499148.0 valid 4000321.75
Epoch 90:
LOSS train 17023444.0 valid 3893281.25
Epoch 100:
LOSS train 16544711.0 valid 3785521.5
Model 1
Epoch 10:
LOSS train 14610750.0 valid 3364433.5
Epoch 20:
LOSS train 14340751.0 valid 3304504.5
Epoch 30:
LOSS train 14069499.0 valid 3244272.75
Epoch 40:
LOSS train 13786525.0 valid 3181127.0
Epoch 50:
LOSS train 13495844.0 valid 3114405.5
Epoch 60:
LOSS train 13174641.0 valid 3045903.5
Epoch 70:
LOSS train 12861974.0 valid 2971654.0
Epoch 80:
LOSS train 12510668.0 valid 2895586.75
Epoch 90:
LOSS train 12158105.0 valid 2814548.25
Epoch 100:
LOSS train 11786142.0 valid 2725050.75
Model 2
Epoch 10

In [None]:
trainreport_name = './drive/MyDrive/CHB_trainreport.csv'

In [None]:
model.to_csv(trainreport_name)

### Evaluate model

In [None]:
score=Z_score(np.sum(np.median(t_err, axis=0), axis=-1))

In [None]:
evaluate(trainlabel, score)