In [13]:
import numpy as np
import pandas as pd
import os
import os.path as op

import argparse
import time
import yaml
import shutil

import tsaug as ts
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

import sys
sys.path.append(os.path.dirname(os.getcwd()))

from utils.train_utils import evaluate_model, epoch_trainer, epoch_validation
from utils.models import LSTMNet
from utils.pytorchtools import EarlyStopping
from utils.mySummary import SummaryLogger
from utils.augmentation import * 

In [14]:
run_path = os.path.dirname(os.getcwd()) # 'experiment directory'
output_dir = run_path+'\\output'

In [15]:
da_methods_mapping = {
    'convolve': ts.Convolve(window="hann"),
    'pool': ts.Pool(size=3),
    'jitter': ts.AddNoise(scale=0.05),
    'quantize': ts.Quantize(n_levels=17),
    'reverse': ts.Reverse(),
    'timewarp': ts.TimeWarp(n_speed_change=4, max_speed_ratio=1.5),
    'spawner': spawner,
    'scaling': scaling,
    'magnitude_warp': magnitude_warp,
    'window_warp': window_warp
}

In [16]:
is_cuda = torch.cuda.is_available()
if is_cuda:
    device = torch.device("cuda")
    torch.cuda.set_device(args.gpu_number)
    print(torch.cuda.current_device())
else:
    device = torch.device("cpu")
print(device)

cpu


In [17]:
def create_directory(logdir):
    try:
        os.makedirs(logdir)
    except FileExistsError:
        pass
    
create_directory(output_dir) 

In [18]:
def build_dataloader(x_data, y_data, batch_size, shuffle=True):
    train_data = TensorDataset(torch.from_numpy(x_data).float(), torch.from_numpy(y_data))
    train_loader = DataLoader(train_data, shuffle=shuffle, batch_size=batch_size, drop_last=False)
    return train_loader

In [19]:
def augment_dataset(i_sp, batch_size, da_method, augment_times=1):
    data_dir = run_path + '\\data'
    train_x = np.load(op.join(data_dir, 'study_period_X_'+str(i_sp)+'_train.npy'))
    train_y = np.load(op.join(data_dir, 'study_period_Y_'+str(i_sp)+'_train.npy'))

    validation_split = 0.2
    dataset_size=train_x.shape[0]
    indices = list(range(dataset_size))
    split = dataset_size - int(np.floor(validation_split*dataset_size))

    trainX, trainY = train_x[:split], train_y[:split]
    if da_method in ['convolve', 'pool', 'jitter', 'quantize', 'reverse', 'timewarp']:
        trainX = np.concatenate([trainX, *[da_methods_mapping[da_method].augment(trainX) for i in range(augment_times)]])
        trainY = np.concatenate([trainY, *[trainY for i in range(augment_times)]])
    elif da_method in ['magnitude_warp', 'window_warp', 'scaling']:
        trainX = np.concatenate([trainX, *[da_methods_mapping[da_method](trainX) for i in range(augment_times)]])
        trainY = np.concatenate([trainY, *[trainY for i in range(augment_times)]])
    train_loader = build_dataloader(trainX, trainY, batch_size=batch_size)
    valid_loader = build_dataloader(train_x[split:], train_y[split:], batch_size=batch_size)
    return train_loader, valid_loader

In [20]:
def train_eval_single_model(model, train_loader, valid_loader, n_epochs, path, i_sp, device, patience):
    logger = SummaryLogger(path)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(), lr=0.001)
    early_stopping = EarlyStopping(patience=patience, verbose=True, path=path)
    print('Start training')
    for epoch in range(n_epochs):
        counter = 0
        loss, acc = epoch_trainer(model, train_loader, optimizer, criterion, logger, device)
        valid_loss, valid_acc = epoch_validation(model, valid_loader, logger, device)
        print(epoch, loss, acc, valid_loss, valid_acc)
        early_stopping(valid_loss, model)
        if early_stopping.early_stop:
            print("Early stopping")
            break       
    logger.close()
    model_file_name = os.path.join(path, 'checkpoint.pt')
    model.load_state_dict(torch.load(model_file_name))
    metrics = evaluate_model(model, path, i_sp, device)
    return metrics

In [25]:
def run_for_all_study_periods(da_method,batch_size,hidden_dim,n_layers,n_epochs,init_sp,end_sp,patience):
    for i in range(init_sp, end_sp+1):
        path = op.join(run_path, 'output/study_period_'+str(i).zfill(2))
        create_directory(path)
        train_loader, valid_loader = augment_dataset(i, batch_size=batch_size, da_method=da_method)
        model = LSTMNet(1, hidden_dim=hidden_dim, output_dim=2, n_layers=n_layers, device=device)
        model.to(device)
        metrics = train_eval_single_model(model, train_loader, valid_loader, n_epochs, path, i, device, patience)
        print(metrics)

#### Listing different augmentation methods

In [22]:
da_method_list = list(da_methods_mapping.keys())+['None'] # 'augmentation methods'
da_method_list

['convolve',
 'pool',
 'jitter',
 'quantize',
 'reverse',
 'timewarp',
 'spawner',
 'scaling',
 'magnitude_warp',
 'window_warp',
 'None']

#### Method 1: Convolve

In [26]:
batch_size = 128 # 'batch size'
hidden_dim = 25 # 'hidden dimension of LSTM'
n_layers = 1 # 'number of layers in the LSTM'
n_epochs = 200 # 'number of epochs for training'
init_sp = 0 # 'initial data split'
end_sp = 29 # 'final data split'
patience = 10 # 'patience for early stopping'
da_method = 'convolve'

In [None]:
run_for_all_study_periods(da_method,batch_size,hidden_dim,n_layers,n_epochs,init_sp,end_sp,patience)

#### Method 2: Pool

In [None]:
batch_size = 128 # 'batch size'
hidden_dim = 25 # 'hidden dimension of LSTM'
n_layers = 1 # 'number of layers in the LSTM'
n_epochs = 200 # 'number of epochs for training'
init_sp = 0 # 'initial data split'
end_sp = 29 # 'final data split'
patience = 10 # 'patience for early stopping'
da_method = 'pool'

In [None]:
run_for_all_study_periods(da_method,batch_size,hidden_dim,n_layers,n_epochs,init_sp,end_sp,patience)

#### Method 3: Jitter

In [28]:
batch_size = 128 # 'batch size'
hidden_dim = 25 # 'hidden dimension of LSTM'
n_layers = 1 # 'number of layers in the LSTM'
n_epochs = 200 # 'number of epochs for training'
init_sp = 0 # 'initial data split'
end_sp = 29 # 'final data split'
patience = 10 # 'patience for early stopping'
da_method = 'jitter'

In [None]:
run_for_all_study_periods(da_method,batch_size,hidden_dim,n_layers,n_epochs,init_sp,end_sp,patience)

#### Method 4: Quantize

In [None]:
batch_size = 128 # 'batch size'
hidden_dim = 25 # 'hidden dimension of LSTM'
n_layers = 1 # 'number of layers in the LSTM'
n_epochs = 200 # 'number of epochs for training'
init_sp = 0 # 'initial data split'
end_sp = 29 # 'final data split'
patience = 10 # 'patience for early stopping'
da_method = 'quantize'

In [None]:
run_for_all_study_periods(da_method,batch_size,hidden_dim,n_layers,n_epochs,init_sp,end_sp,patience)

#### Method 5: Reverse

In [None]:
batch_size = 128 # 'batch size'
hidden_dim = 25 # 'hidden dimension of LSTM'
n_layers = 1 # 'number of layers in the LSTM'
n_epochs = 200 # 'number of epochs for training'
init_sp = 0 # 'initial data split'
end_sp = 29 # 'final data split'
patience = 10 # 'patience for early stopping'
da_method = 'reverse'

In [None]:
run_for_all_study_periods(da_method,batch_size,hidden_dim,n_layers,n_epochs,init_sp,end_sp,patience)

#### Method 6: Timewarp

In [None]:
batch_size = 128 # 'batch size'
hidden_dim = 25 # 'hidden dimension of LSTM'
n_layers = 1 # 'number of layers in the LSTM'
n_epochs = 200 # 'number of epochs for training'
init_sp = 0 # 'initial data split'
end_sp = 29 # 'final data split'
patience = 10 # 'patience for early stopping'
da_method = 'timewarp'

In [None]:
run_for_all_study_periods(da_method,batch_size,hidden_dim,n_layers,n_epochs,init_sp,end_sp,patience)

#### Method 7: Spawner

In [None]:
batch_size = 128 # 'batch size'
hidden_dim = 25 # 'hidden dimension of LSTM'
n_layers = 1 # 'number of layers in the LSTM'
n_epochs = 200 # 'number of epochs for training'
init_sp = 0 # 'initial data split'
end_sp = 29 # 'final data split'
patience = 10 # 'patience for early stopping'
da_method = 'spawner'

In [None]:
run_for_all_study_periods(da_method,batch_size,hidden_dim,n_layers,n_epochs,init_sp,end_sp,patience)

#### Method 8: scaling

In [None]:
batch_size = 128 # 'batch size'
hidden_dim = 25 # 'hidden dimension of LSTM'
n_layers = 1 # 'number of layers in the LSTM'
n_epochs = 200 # 'number of epochs for training'
init_sp = 0 # 'initial data split'
end_sp = 29 # 'final data split'
patience = 10 # 'patience for early stopping'
da_method = 'scaling'

In [None]:
run_for_all_study_periods(da_method,batch_size,hidden_dim,n_layers,n_epochs,init_sp,end_sp,patience)

#### Method 9: magnitude_warp

In [None]:
batch_size = 128 # 'batch size'
hidden_dim = 25 # 'hidden dimension of LSTM'
n_layers = 1 # 'number of layers in the LSTM'
n_epochs = 200 # 'number of epochs for training'
init_sp = 0 # 'initial data split'
end_sp = 29 # 'final data split'
patience = 10 # 'patience for early stopping'
da_method = 'magnitude_warp'

In [None]:
run_for_all_study_periods(da_method,batch_size,hidden_dim,n_layers,n_epochs,init_sp,end_sp,patience)

#### Method 10: window_warp

In [None]:
batch_size = 128 # 'batch size'
hidden_dim = 25 # 'hidden dimension of LSTM'
n_layers = 1 # 'number of layers in the LSTM'
n_epochs = 200 # 'number of epochs for training'
init_sp = 0 # 'initial data split'
end_sp = 29 # 'final data split'
patience = 10 # 'patience for early stopping'
da_method = 'window_warp'

In [None]:
run_for_all_study_periods(da_method,batch_size,hidden_dim,n_layers,n_epochs,init_sp,end_sp,patience)

#### Method 11: None

In [None]:
batch_size = 128 # 'batch size'
hidden_dim = 25 # 'hidden dimension of LSTM'
n_layers = 1 # 'number of layers in the LSTM'
n_epochs = 200 # 'number of epochs for training'
init_sp = 0 # 'initial data split'
end_sp = 29 # 'final data split'
patience = 10 # 'patience for early stopping'
da_method = None

In [None]:
run_for_all_study_periods(da_method,batch_size,hidden_dim,n_layers,n_epochs,init_sp,end_sp,patience)