# LSTM (PyTorch)



In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [64]:

%matplotlib inline

In [65]:
from multiprocessing import cpu_count
from pathlib import Path

In [66]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import TensorDataset, DataLoader
from torch.optim.lr_scheduler import _LRScheduler

In [67]:
seed = 1
np.random.seed(seed)
#torch.cuda.set_device(0)  # if you have more than one CUDA device

## Pre-processing

In [7]:
x_trn = pd.read_csv("/content/drive/MyDrive/PAM_AP2_Dataset/x_trn.csv")
x_tst = pd.read_csv("/content/drive/MyDrive/PAM_AP2_Dataset/x_tst.csv")
y_trn = pd.read_csv("/content/drive/MyDrive/PAM_AP2_Dataset/y_trn.csv")

In [8]:
x_trn.shape

(77484, 11)

In [9]:
x_tst.shape

(30000, 11)

In [10]:
y_trn.shape

(107484, 4)

In [89]:
#y_tst = y_trn.tail(30000)

In [11]:
y_trn.drop(x_trn.columns[[0]], axis = 1, inplace = True)

In [90]:
y_tst = y_trn.tail(30000)

In [91]:
y_tst.shape

(30000, 3)

In [68]:
y_trn.head()

Unnamed: 0,time_stamp,activity_id,id
0,1551.19,12,101
1,1551.2,12,101
2,1551.21,12,101
3,1551.22,12,101
4,1551.23,12,101


In [None]:
#y_trn.drop(y_trn.columns[[2]], axis = 1, inplace = True)

In [None]:
#y_trn

In [None]:
#y_trn.drop(y_trn.columns[[0]], axis = 1, inplace = True)

In [None]:
#y_trn

In [13]:
x_trn['Address'] = None
#x_trn['Address_id'] = None


In [14]:
x_tst['Address'] = None
#x_tst['Address_id'] = None

In [15]:
x_trn.shape

(77484, 12)

In [None]:
x_trn

In [17]:
x_trn.drop(x_trn.columns[[0]], axis = 1, inplace = True)

In [None]:
x_trn

In [19]:
x_tst.shape

(30000, 12)

In [21]:
x_tst.drop(x_tst.columns[[0]], axis = 1, inplace = True)

In [None]:
x_tst

## Loading The Data


In [None]:
ID_COLS = ['Address']

In [70]:
x_trn.shape

(77484, 11)

In [None]:
x_trn.head()

In [47]:
len(x_trn)

77484

In [None]:
#x_trn['series_id'].value_counts()

In [48]:
x_tst.shape

(30000, 11)

In [49]:
len(x_tst)

30000

In [None]:
x_tst.head()

Unnamed: 0.1,Unnamed: 0,time_stamp,ankle_3D_acceleration_16_x,ankle_3D_acceleration_16_y,ankle_3D_acceleration_16_z,ankle_3D_acceleration_6_x,ankle_3D_acceleration_6_y,ankle_3D_acceleration_6_z,ankle_3D_gyroscope_x,ankle_3D_gyroscope_y,ankle_3D_gyroscope_z,id
0,240454,2412.92,19.8924,-4.75177,-5.41677,19.002,-3.10183,-4.10839,-1.1816,1.50162,-2.91111,101
1,240455,2412.93,18.4496,-7.00762,-7.03833,19.6417,-5.4326,-5.46646,-1.51205,1.52989,-2.89438,101
2,240456,2412.94,18.3991,-7.38385,-7.42241,18.3141,-7.29476,-6.67937,-1.73398,1.38642,-2.91837,101
3,240457,2412.95,20.6787,-6.37059,-7.71403,19.2584,-6.97758,-7.21514,-2.03219,1.19972,-2.91066,101
4,240458,2412.96,20.3223,-3.98849,-6.22748,20.4468,-6.26602,-6.87563,-2.30671,1.04641,-3.03571,101


In [71]:
y_trn.shape

(77484, 3)

In [51]:
y_trn = y_trn.head(77484)

In [72]:
len(y_trn)

77484

In [73]:
y_trn

Unnamed: 0,time_stamp,activity_id,id
0,1551.19,12,101
1,1551.20,12,101
2,1551.21,12,101
3,1551.22,12,101
4,1551.23,12,101
...,...,...,...
77479,3177.52,5,101
77480,3177.53,5,101
77481,3177.54,5,101
77482,3177.55,5,101


In [None]:
#y_trn['surface'].value_counts

## PyTorch 

In [74]:
def create_datasets(X, y, test_size=0.2, dropcols=ID_COLS, time_dim_first=False):
    enc = LabelEncoder()
    y_enc = enc.fit_transform(y)
    X_grouped = create_grouped_array(X)
    if time_dim_first:
        X_grouped = X_grouped.transpose(0, 2, 1)
    X_train, X_valid, y_train, y_valid = train_test_split(X_grouped, y_enc, test_size=0.1)
    X_train, X_valid = [torch.tensor(arr, dtype=torch.float32) for arr in (X_train, X_valid)]
    y_train, y_valid = [torch.tensor(arr, dtype=torch.long) for arr in (y_train, y_valid)]
    train_ds = TensorDataset(X_train, y_train)
    valid_ds = TensorDataset(X_valid, y_valid)
    return train_ds, valid_ds, enc


def create_grouped_array(data, group_col='time_stamp', drop_cols=ID_COLS):
    X_grouped = np.row_stack([
        group.drop(columns=drop_cols).values[None]
        for _, group in data.groupby(group_col)])
    return X_grouped


def create_test_dataset(X, drop_cols=ID_COLS):
    X_grouped = np.row_stack([
        group.drop(columns=drop_cols).values[None]
        for _, group in X.groupby('time_stamp')])
    X_grouped = torch.tensor(X_grouped.transpose(0, 2, 1)).float()
    y_fake = torch.tensor([0] * len(X_grouped)).long()
    return TensorDataset(X_grouped, y_fake)


def create_loaders(train_ds, valid_ds, bs=512, jobs=0):
    train_dl = DataLoader(train_ds, bs, shuffle=True, num_workers=jobs)
    valid_dl = DataLoader(valid_ds, bs, shuffle=False, num_workers=jobs)
    return train_dl, valid_dl


def accuracy(output, target):
    return (output.argmax(dim=1) == target).float().mean().item()

## Cyclic Learning Rate

In [75]:
class CyclicLR(_LRScheduler):
    
    def __init__(self, optimizer, schedule, last_epoch=-1):
        assert callable(schedule)
        self.schedule = schedule
        super().__init__(optimizer, last_epoch)

    def get_lr(self):
        return [self.schedule(self.last_epoch, lr) for lr in self.base_lrs]

In [76]:
def cosine(t_max, eta_min=0):
    
    def scheduler(epoch, base_lr):
        t = epoch % t_max
        return eta_min + (base_lr - eta_min)*(1 + np.cos(np.pi*t/t_max))/2
    
    return scheduler

In [None]:
n = 100
sched = cosine(n)
lrs = [sched(t, 1) for t in range(n * 4)]
plt.plot(lrs)

## The LSTM Model

In [78]:
class LSTMClassifier(nn.Module):
    
    
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim
        self.rnn = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.batch_size = None
        self.hidden = None
    
    def forward(self, x):
        h0, c0 = self.init_hidden(x)
        out, (hn, cn) = self.rnn(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out
    
    def init_hidden(self, x):
        h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim)
        c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim)
        return [t.cuda() for t in (h0, c0)]

In [79]:
print('Preparing datasets')
trn_ds, val_ds, enc = create_datasets(x_trn, y_trn['activity_id'])

Preparing datasets


In [80]:
bs = 128
print(f'Creating data loaders with batch size: {bs}')
trn_dl, val_dl = create_loaders(trn_ds, val_ds, bs, jobs=cpu_count())

Creating data loaders with batch size: 128


## Training Loop

Finally, we are ready to bring everything together and train the model.

In [81]:
len(trn_dl)

545

In [82]:
input_dim = 10    
hidden_dim = 256
layer_dim = 3
output_dim = 6
seq_dim = 128

lr = 0.0005
n_epochs = 1000
iterations_per_epoch = len(trn_dl)
best_acc = 0
patience, trials = 100, 0

model = LSTMClassifier(input_dim, hidden_dim, layer_dim, output_dim)
model = model.cuda()
criterion = nn.CrossEntropyLoss()
opt = torch.optim.RMSprop(model.parameters(), lr=lr)
sched = CyclicLR(opt, cosine(t_max=iterations_per_epoch * 2, eta_min=lr/100))

print('Start model training')

for epoch in range(1, n_epochs + 1):
    
    for i, (x_batch, y_batch) in enumerate(trn_dl):
        model.train()
        x_batch = x_batch.cuda()
        y_batch = y_batch.cuda()
        sched.step()
        opt.zero_grad()
        out = model(x_batch)
        loss = criterion(out, y_batch)
        loss.backward()
        opt.step()
    
    model.eval()
    correct, total = 0, 0
    for x_val, y_val in val_dl:
        x_val, y_val = [t.cuda() for t in (x_val, y_val)]
        out = model(x_val)
        preds = F.log_softmax(out, dim=1).argmax(dim=1)
        total += y_val.size(0)
        correct += (preds == y_val).sum().item()
    
    acc = correct / total

    if epoch % 5 == 0:
        print(f'Epoch: {epoch:3d}. Loss: {loss.item():.4f}. Acc.: {acc:2.2%}')

    if acc > best_acc:
        trials = 0
        best_acc = acc
        torch.save(model.state_dict(), 'best.pth')
        print(f'Epoch {epoch} best model saved with accuracy: {best_acc:2.2%}')
    else:
        trials += 1
        if trials >= patience:
            print(f'Early stopping on epoch {epoch}')
            break

Start model training




Epoch 1 best model saved with accuracy: 28.49%
Epoch:   5. Loss: nan. Acc.: 28.49%
Epoch:  10. Loss: nan. Acc.: 28.49%
Epoch:  15. Loss: nan. Acc.: 28.49%
Epoch:  20. Loss: nan. Acc.: 28.49%
Epoch:  25. Loss: nan. Acc.: 28.49%
Epoch:  30. Loss: nan. Acc.: 28.49%
Epoch:  35. Loss: nan. Acc.: 28.49%
Epoch:  40. Loss: nan. Acc.: 28.49%
Epoch:  45. Loss: nan. Acc.: 28.49%
Epoch:  50. Loss: nan. Acc.: 28.49%
Epoch:  55. Loss: nan. Acc.: 28.49%
Epoch:  60. Loss: nan. Acc.: 28.49%
Epoch:  65. Loss: nan. Acc.: 28.49%
Epoch:  70. Loss: nan. Acc.: 28.49%
Epoch:  75. Loss: nan. Acc.: 28.49%
Epoch:  80. Loss: nan. Acc.: 28.49%
Epoch:  85. Loss: nan. Acc.: 28.49%
Epoch:  90. Loss: nan. Acc.: 28.49%
Epoch:  95. Loss: nan. Acc.: 28.49%
Epoch: 100. Loss: nan. Acc.: 28.49%
Early stopping on epoch 101


## Eval

In [None]:
model.load_state_dict(torch.load('best.pth'))

<All keys matched successfully>

In [83]:
model.eval()

LSTMClassifier(
  (rnn): LSTM(10, 256, num_layers=3, batch_first=True)
  (fc): Linear(in_features=256, out_features=6, bias=True)
)

In [95]:
test_dl = DataLoader(create_test_dataset(x_tst), batch_size=64, shuffle=False)

In [96]:
test = []
print('Predicting on test dataset')
for batch, _ in test_dl:
    batch = batch.permute(0, 2, 1)
    out = model(batch.cuda())
    y_hat = F.log_softmax(out, dim=1).argmax(dim=1)
    test += y_hat.tolist()

Predicting on test dataset


In [99]:
len(test)

30000