### Setup

In [2]:
import warnings
warnings.filterwarnings('ignore')

import torch
from torch import nn
import torch.nn.functional as F

import numpy as np
import pickle
import pandas as pd

import sklearn.metrics as metrics

In [3]:
import os
import sys

# add the 'src' directory as one where we can import modules
src_dir = os.path.join(os.getcwd(), os.pardir, 'src')
sys.path.append(src_dir)

from data.dataset import loadingDB

In [4]:
#create results folder
!mkdir -p ../models/results

### Download Datasets

In [5]:
# run below commands to download datasets from google drive using Gdown tool
# Alternatively you can manually download datasets from following url and put them in the data folder
# https://goo.gl/wgEuhu

!pip install gdown
!mkdir -p ../data/processed
!gdown https://drive.google.com/uc?id=1nkAwjp1TRB-wnOYBvlRJS_srv2c6Spz7 -O ../data/processed/opp.mat
!gdown https://drive.google.com/uc?id=1KJ04DWE7nt_PB0Zm9ZaN-Wh-ZYgvBOj- -O ../data/processed/pamap2.mat
!gdown https://drive.google.com/uc?id=15Q8oV02h2_e94IWJ9rnKLrSCKPCTW5FS -O ../data/processed/skoda.mat

[33mYou are using pip version 18.1, however version 19.0.3 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m
Downloading...
From: https://drive.google.com/uc?id=1nkAwjp1TRB-wnOYBvlRJS_srv2c6Spz7
To: /data/home/ml/notebooks/ensemblelstm_pytorch/data/processed/opp.mat
177MB [00:01, 121MB/s]  
Downloading...
From: https://drive.google.com/uc?id=1KJ04DWE7nt_PB0Zm9ZaN-Wh-ZYgvBOj-
To: /data/home/ml/notebooks/ensemblelstm_pytorch/data/processed/pamap2.mat
140MB [00:01, 92.5MB/s] 
Downloading...
From: https://drive.google.com/uc?id=15Q8oV02h2_e94IWJ9rnKLrSCKPCTW5FS
To: /data/home/ml/notebooks/ensemblelstm_pytorch/data/processed/skoda.mat
114MB [00:02, 54.4MB/s] 


### Choose Dataset

In [6]:
#1 is Opportunity , 2 is PAMAP2, 3 is Skoda
dataset = 1

if dataset == 1:
	train_x, valid_x, test_x, train_y, valid_y, test_y = loadingDB('../data/processed/', 79)
	n_classes = 18
	DB = 79
if dataset == 2:
	train_x, valid_x, test_x, train_y, valid_y, test_y = loadingDB('../data/processed/', 52)
	n_classes = 12
	DB = 52
if dataset == 3:
	train_x, valid_x, test_x, train_y, valid_y, test_y = loadingDB('../data/processed/', 60)
	n_classes = 11
	DB = 60
    
print("\nTrain data shape: inputs{0}, targets {1}".format(train_x.shape, train_y.shape))
print("Valid data shape: inputs{0}, targets {1}".format(valid_x.shape, valid_y.shape))
print("Test data shape: inputs{0}, targets {1}".format(test_x.shape ,test_y.shape))

../data/processed/opp.mat
normalising... zero mean, unit variance
normalising...X_train, X_valid, X_test... done
loading the 79-dim matData successfully . . .

Train data shape: inputs(650972, 79), targets (650972,)
Valid data shape: inputs(32224, 79), targets (32224,)
Test data shape: inputs(118750, 79), targets (118750,)


### Reshape Validation and Test Data

In [17]:
DIM = len(train_x[0])
TEST_WIN = 5000

valid_bt = 1
valid_se = len(valid_x)//valid_bt
valid_x = valid_x[:valid_se*valid_bt,]
valid_y = np.array(valid_y)
valid_y = valid_y[:valid_se*valid_bt,]
valid_x = np.reshape(valid_x, (valid_bt, -1, DB))
valid_y = np.reshape(valid_y, (valid_bt,-1))
print("Valid data shape: inputs{0}, targets {1}".format(valid_x.shape, valid_y.shape))

test_bt = 1
test_se = len(test_x)//test_bt
test_x = test_x[:test_se*test_bt,]
test_y = np.array(test_y)
test_y = test_y[:test_se*test_bt,]
test_x = np.reshape(test_x, (test_bt, -1, DB))
test_y = np.reshape(test_y, (test_bt,-1))
print("Test data shape: inputs{0}, targets {1}".format(test_x.shape ,test_y.shape))

Valid data shape: inputs(1, 32200, 79), targets (1, 32200)
Test data shape: inputs(1, 118700, 79), targets (1, 118700)


In [8]:
def making_training_set(train_x, train_y, batch_size):
    
    seqence_len = len(train_x)//batch_size
    
    # generate random initial position of sampling for each epoch
    indices_start = np.random.randint(low=0, high=len(train_x)-seqence_len, size=(batch_size,))
    
    indices_all_2d = np.zeros((batch_size, seqence_len))
    for i in range(batch_size):
        indices_all_2d[i,:] = np.arange(indices_start[i],indices_start[i]+seqence_len)
    indices_all = np.reshape(indices_all_2d, (-1))

    X_train = np.zeros((batch_size, seqence_len, DIM), dtype=np.float32)
    y_train = np.zeros((batch_size, seqence_len), dtype=np.uint8) 
    for i in range(batch_size):
        idx_start = indices_start[i]
        idx_end = idx_start+seqence_len
        X_train[i,:,:] = train_x[idx_start:idx_end, :]
        y_train[i,:] = train_y[idx_start:idx_end]
    return X_train, y_train

### Define the Model

In [9]:
class SingleModel(nn.Module):
    
    def __init__(self, n_channels=DB, n_hidden=256, n_layers=2, 
                 n_classes=n_classes, drop_prob=0.5):
        super(SingleModel, self).__init__()
        
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.n_classes = n_classes
        self.drop_prob = drop_prob
        self.n_channels = n_channels
        
        self.lstm  = nn.LSTM(n_channels, n_hidden, n_layers, dropout=self.drop_prob)
        self.fc = nn.Linear(n_hidden, n_classes)
        self.dropout = nn.Dropout(drop_prob)
        
    def forward(self, x, hidden, batch_size):
        
        x = x.permute(1, 0, 2)
        x, hidden = self.lstm(x, hidden)
        x = self.dropout(x)    
        x = x.contiguous().view(-1, self.n_hidden)
        out = self.fc(x)
        
        return out, hidden
    
    def init_hidden(self, batch_size):
        ''' Initializes hidden state '''
        # Create two new tensors with sizes n_layers x batch_size x n_hidden,
        # initialized to zero, for hidden state and cell state of LSTM
        weight = next(self.parameters()).data
        
        if (train_on_gpu):
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda(),
                  weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda())
        else:
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_(),
                      weight.new(self.n_layers, batch_size, self.n_hidden).zero_())
        
        return hidden
    
net = SingleModel()

## Initialize Model Weights

In [10]:
def init_weights(m):
    if type(m) == nn.LSTM:
        for name, param in m.named_parameters():
            if 'weight_ih' in name:
                torch.nn.init.orthogonal_(param.data)
            elif 'weight_hh' in name:
                torch.nn.init.orthogonal_(param.data)
            elif 'bias' in name:
                param.data.fill_(0)
    elif type(m) == nn.Linear:
        torch.nn.init.orthogonal_(m.weight)
        m.bias.data.fill_(0)
net.apply(init_weights)    

SingleModel(
  (lstm): LSTM(79, 256, num_layers=2, dropout=0.5)
  (fc): Linear(in_features=256, out_features=18, bias=True)
  (dropout): Dropout(p=0.5)
)

In [11]:
# check if GPU is available
train_on_gpu = torch.cuda.is_available()
if(train_on_gpu):
    print('Training on GPU!')
else: 
    print('No GPU available, training on CPU; consider making n_epochs very small.')

Training on GPU!


## Validate the Model

In [12]:
def validation(criterion):
    
    val_accuracy=0
    val_f1score=0
    val_losses = []
    num_val_process = valid_se//TEST_WIN + 1
    val_h = net.init_hidden(valid_bt)
    net.eval()

    for j in range(num_val_process):
        start = j*TEST_WIN
        end = np.min((valid_se, start+TEST_WIN))
        
        x = valid_x[:,start:end,:]
        y = valid_y[:,start:end]

        inputs, targets = torch.from_numpy(x), torch.from_numpy(y.flatten('F'))
        if(train_on_gpu):
            inputs, targets = inputs.cuda(), targets.cuda()
        
        val_h = tuple([each.data for each in val_h])
        
        output, val_h = net(inputs, val_h, valid_bt)

        val_loss = criterion(output, targets.long())
        val_losses.append(val_loss.item())
        
        top_p, top_class = output.topk(1, dim=1)
        equals = top_class == targets.view(*top_class.shape).long()
        val_accuracy += torch.mean(equals.type(torch.FloatTensor))
        val_f1score += metrics.f1_score(top_class.cpu(), targets.view(*top_class.shape).long().cpu(), average='macro')
            
    test_accuracy=0
    test_f1score=0
    test_losses = []
    num_test_process = test_se//TEST_WIN + 1
    test_h = net.init_hidden(test_bt)
    
    for j in range(num_test_process):
        start = j*TEST_WIN
        end = np.min((test_se, start+TEST_WIN))
        
        x = test_x[:,start:end,:]
        y = test_y[:,start:end]

        inputs, targets = torch.from_numpy(x), torch.from_numpy(y.flatten('F'))
        if(train_on_gpu):
            inputs, targets = inputs.cuda(), targets.cuda()
        
        test_h = tuple([each.data for each in test_h])
        
        output, test_h = net(inputs, test_h, test_bt)

        test_loss = criterion(output, targets.long())
        test_losses.append(test_loss.item())
        
        top_p, top_class = output.topk(1, dim=1)
        equals = top_class == targets.view(*top_class.shape).long()
        test_accuracy += torch.mean(equals.type(torch.FloatTensor))
        test_f1score += metrics.f1_score(top_class.cpu(), targets.view(*top_class.shape).long().cpu(), average='macro')
        
    valid_losses_avg = np.mean(val_losses)
    valid_f1_avg = val_f1score/num_val_process
    print(' '*16 +"Val   Loss: {:.4f}...".format(valid_losses_avg),
    "Val  Acc: {:.4f}...".format(val_accuracy/num_val_process),
    "Val  F1: {:.4f}...".format(valid_f1_avg))
          
    test_losses_avg = np.mean(test_losses)
    test_f1_avg = test_f1score/num_test_process
    print(' '*16 +"Test  Loss: {:.4f}...".format(test_losses_avg),
    "Test Acc: {:.4f}...".format(test_accuracy/num_test_process),
    "Test F1: {:.4f}...".format(test_f1_avg))
    
    net.train() # reset to train mode after iterationg through validation data
    
    return valid_losses_avg, test_losses_avg, valid_f1_avg, test_f1_avg

## Train the Model

In [13]:
def train(net, epochs=100, lr=0.001):
    
    opt = torch.optim.Adam(net.parameters(), lr=lr) 
    criterion = nn.CrossEntropyLoss()
    
    if(train_on_gpu):
        net.cuda()
     
    train_losses = []    
    results = np.empty([0, 5], dtype=np.float32)
    net.train()
    
    for epoch in range(epochs):
        epoch_loss = 0
        train_loss = 0
        train_sz = 0
        
        #generate random batch size for each epoch
        batch_size = np.random.randint(low=128, high=256, size=1)[0]
        
        # initialize hidden state
        h = net.init_hidden(batch_size)      
        
        x_train, y_train = making_training_set(train_x, train_y, batch_size)
        train_len = len(train_x)//batch_size # train_x3D shape: [batch_size,train_len,dim]

        pos_start = 0
        pos_end = 0
        while pos_end < train_len:

             # generate a random window length in each training process
            curr_win_len = np.random.randint(low=16, high=32, size=1)[0]
            
            pos_start = pos_end
            pos_end += curr_win_len

            x = x_train[:,pos_start:pos_end,:]
            y = y_train[:,pos_start:pos_end]
                        
            inputs, targets = torch.from_numpy(x), torch.from_numpy(y.flatten('F'))
            if(train_on_gpu):
                inputs, targets = inputs.cuda(), targets.cuda()
            # Creating new variables for the hidden state, otherwise
            # we'd backprop through the entire training history
            h = tuple([each.data for each in h])
            
            # zero accumulated gradients
            opt.zero_grad()   
            
            # get the output from the model
            output, h = net(inputs, h, batch_size)
            #output = net(inputs, batch_size)
            loss = criterion(output, targets.long())
            
            epoch_loss += loss.item()
            sample_sz = batch_size*curr_win_len
            train_loss += loss.item()*sample_sz
            train_sz += sample_sz
                      
            loss.backward()
            opt.step()
            
        #saving the models
        PATH = '../models/'+str(DB)+'_'+str(epoch)+'.pth'
        torch.save(net.state_dict(), PATH)
        
        train_loss_avg = train_loss/train_sz
        print("Epoch: {}/{}..".format(epoch+1, epochs),
        "Train Loss: {:.4f}".format(train_loss_avg))
        
        valid_loss, test_loss, valid_f1, test_f1 = validation(criterion)
        
        #saving the results
        epoch_results = np.zeros(5)
        
        epoch_results[0] = train_loss_avg
        epoch_results[1] = valid_loss
        epoch_results[2] = test_loss
        epoch_results[3] = valid_f1
        epoch_results[4] = test_f1
        
        results = np.float32(np.vstack((results, epoch_results)))
        
        PATH = '../models/results/'+str(DB)+'.npy'
        np.save(PATH, results)

###  LSTM Ensemble

In [14]:
def lstmEnsemble(n_bestM=20):

    PATH = '../models/results/'+str(DB)+'.npy'
    results = np.load(PATH)

    valid_col = 3 #third column of results is validation f1 
    idx_set = np.argsort(results[:,valid_col])[::-1] # sort results based on validation f1

    best_models = []
    best_models.append(idx_set[:n_bestM]) # store the epoch number of top n models

    prob_M = np.zeros((n_bestM, test_y.size, n_classes))
    
    for i in range(n_bestM):
        idx = best_models[0][i]

        model = '../models/'+str(DB)+'_'+str(idx)+".pth"
        net.load_state_dict(torch.load(model))
       
        if(train_on_gpu):
            net.cuda()

        num_test_process = test_se//TEST_WIN + 1
        test_accuracy=0
        test_f1score=0
        test_losses = []
        test_h = net.init_hidden(test_bt)
        prob_2d = np.zeros((test_y.size, n_classes))

        net.eval()
        for j in range(num_test_process):
            start = j*TEST_WIN
            end = np.min((test_se, start+TEST_WIN))

            x = test_x[:,start:end,:]
            y = test_y[:,start:end]

            inputs, targets = torch.from_numpy(x), torch.from_numpy(y.flatten('F'))
            if(train_on_gpu):
                inputs, targets = inputs.cuda(), targets.cuda()

            test_h = tuple([each.data for each in test_h])
            output, test_h = net(inputs, test_h, test_bt)

            prob_2d[start*test_bt:end*test_bt,:] = F.softmax(output).cpu().detach().numpy()

        prob_M[i,:,:] = prob_2d #store predictions of each of the top n models

    prob_avg = np.mean(prob_M[:,:,:], axis=0) #model fusion by calculating the average of probabilities 
    fused_pred = np.argmax(prob_avg, axis=1)

    f1_fused = metrics.f1_score(test_y.flatten("F"), fused_pred, average='macro')

    print("Ensemble of LSTMs F1-score: {:.4f}".format(f1_fused))

In [15]:
train(net) # train and save results & models

Epoch: 1/100.. Train Loss: 0.7529
                Val   Loss: 0.6732... Val  Acc: 0.8028... Val  F1: 0.2242...
                Test  Loss: 0.4668... Test Acc: 0.8637... Test F1: 0.3522...
Epoch: 2/100.. Train Loss: 0.4088
                Val   Loss: 0.4614... Val  Acc: 0.8593... Val  F1: 0.3592...
                Test  Loss: 0.4220... Test Acc: 0.8702... Test F1: 0.4104...
Epoch: 3/100.. Train Loss: 0.3351
                Val   Loss: 0.4478... Val  Acc: 0.8626... Val  F1: 0.3781...
                Test  Loss: 0.3628... Test Acc: 0.8865... Test F1: 0.5320...
Epoch: 4/100.. Train Loss: 0.2858
                Val   Loss: 0.4571... Val  Acc: 0.8690... Val  F1: 0.4760...
                Test  Loss: 0.3709... Test Acc: 0.8865... Test F1: 0.5302...
Epoch: 5/100.. Train Loss: 0.2912
                Val   Loss: 0.3862... Val  Acc: 0.8919... Val  F1: 0.4290...
                Test  Loss: 0.4079... Test Acc: 0.8818... Test F1: 0.4932...
Epoch: 6/100.. Train Loss: 0.2474
                Val   Loss

Epoch: 45/100.. Train Loss: 0.0241
                Val   Loss: 0.5806... Val  Acc: 0.8994... Val  F1: 0.5436...
                Test  Loss: 0.6001... Test Acc: 0.9138... Test F1: 0.6940...
Epoch: 46/100.. Train Loss: 0.0202
                Val   Loss: 0.6588... Val  Acc: 0.8906... Val  F1: 0.5139...
                Test  Loss: 0.6355... Test Acc: 0.9140... Test F1: 0.6830...
Epoch: 47/100.. Train Loss: 0.0400
                Val   Loss: 0.6402... Val  Acc: 0.8820... Val  F1: 0.4858...
                Test  Loss: 0.5809... Test Acc: 0.9037... Test F1: 0.6322...
Epoch: 48/100.. Train Loss: 0.0397
                Val   Loss: 0.5564... Val  Acc: 0.8975... Val  F1: 0.5311...
                Test  Loss: 0.5925... Test Acc: 0.9091... Test F1: 0.6737...
Epoch: 49/100.. Train Loss: 0.0281
                Val   Loss: 0.5990... Val  Acc: 0.8913... Val  F1: 0.5058...
                Test  Loss: 0.5834... Test Acc: 0.9091... Test F1: 0.6899...
Epoch: 50/100.. Train Loss: 0.0251
                Val 

Epoch: 89/100.. Train Loss: 0.0153
                Val   Loss: 0.7071... Val  Acc: 0.8890... Val  F1: 0.5434...
                Test  Loss: 0.7218... Test Acc: 0.9068... Test F1: 0.6526...
Epoch: 90/100.. Train Loss: 0.0111
                Val   Loss: 0.7257... Val  Acc: 0.8895... Val  F1: 0.5592...
                Test  Loss: 0.7419... Test Acc: 0.9077... Test F1: 0.6682...
Epoch: 91/100.. Train Loss: 0.0150
                Val   Loss: 0.6786... Val  Acc: 0.8943... Val  F1: 0.5399...
                Test  Loss: 0.7365... Test Acc: 0.9084... Test F1: 0.6496...
Epoch: 92/100.. Train Loss: 0.0123
                Val   Loss: 0.6969... Val  Acc: 0.8952... Val  F1: 0.5620...
                Test  Loss: 0.7607... Test Acc: 0.9067... Test F1: 0.6539...
Epoch: 93/100.. Train Loss: 0.0101
                Val   Loss: 0.7272... Val  Acc: 0.8939... Val  F1: 0.5364...
                Test  Loss: 0.7924... Test Acc: 0.9030... Test F1: 0.6513...
Epoch: 94/100.. Train Loss: 0.0223
                Val 

In [16]:
lstmEnsemble(n_bestM=20)

Ensemble of LSTMs F1-score: 0.7179
