In [2]:

import numpy as np
from torch.utils.data import Dataset, DataLoader
import torch
from torch import nn
from torch.nn import functional as F
from torch.optim.lr_scheduler import _LRScheduler
from tqdm import tqdm
import pandas as pd
import os

In [3]:
data = dt.fread('./dataset/H1_nav.csv')
data = data.to_pandas()

In [4]:
ignore_columns = ['Participant', 'Dataset']
features = [col for col in data.columns if col not in ignore_columns]

In [4]:
device = torch.device('cuda')
device

device(type='cuda')

In [6]:
data = data.fillna(method='ffill').fillna(method='bfill')
# data['action'] = (data['resp'] > 0).astype('int')
data = data.drop(columns=ignore_columns)

In [7]:
class Timeseries_Dataset(Dataset):
    def __init__(self, X: np.ndarray, y: np.ndarray, seq_len: int = 32):
        self.X = torch.tensor(X).float()
        self.y = torch.tensor(y).float()
        self.seq_len = seq_len

    def __len__(self):
        return self.X.__len__() - (self.seq_len - 1)

    def __getitem__(self, index):
        return {'x': torch.tensor(self.X[index:index + self.seq_len], dtype=torch.float),
                'y': torch.tensor(self.y[index + self.seq_len - 1], dtype=torch.long)}


In [8]:
class LSTMClassifier(nn.Module):

    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim
        self.rnn = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        h0, c0 = self.init_hidden(x)
        out, (hn, cn) = self.rnn(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out
    
    def init_hidden(self, x):
        h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim)
        c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim)
        return [t.to(device) for t in (h0, c0)]



In [11]:
batch_size = 200
lr = 0.0005
input_dim = 7
hidden_dim = 256
layer_dim = 5
output_dim = 2
seq_dim = 32
target_column = 'x'

feature_columns = data.columns[~data.columns.isin([target_column])]
train, validation = data[:int(len(data) * 0.8)], data[int(len(data) * 0.2):]
train_features, train_target = train[feature_columns], train[[target_column]]
validation_features, validation_target = validation[feature_columns], validation[[target_column]]
train_dataset = Timeseries_Dataset(X=train_features.values, y=train_target.values, seq_len=seq_dim)
validation_dataset = Timeseries_Dataset(X=validation_features.values, y=validation_target.values, seq_len=seq_dim)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False)



In [12]:
weight = '/kaggle/input/weight-lstm/best_30.pth'

phase_training = True
if os.path.exists(weight):
    phase_training = False

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model = LSTMClassifier(input_dim, hidden_dim, layer_dim, output_dim)
model = model.to(device)
if phase_training:
    iterations_per_epoch = len(train_loader)
    num_epochs = 30
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(), lr=lr)
    print('Start model training ...')
    best_acc = 0.0
    patience, trials = 100, 0
    for epoch in range(1, num_epochs + 1):
        for i, train_batch in enumerate(validation_loader):
            model.train()
            features = train_batch['x'].to(device)
            targets = train_batch['y'].to(device)
            targets = torch.squeeze(targets)
            preds = model(features)
            loss = criterion(preds, targets)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print(f'Epoch {epoch} best model saved with loss: {loss:2.2}')

        model.eval()
        correct, total = 0, 0
        for valid_batch in validation_loader:
            features = valid_batch['x'].to(device)
            targets = valid_batch['y'].to(device)
            targets = torch.squeeze(targets)
            preds = model(features)
            preds = F.log_softmax(preds, dim=1).argmax(dim=1)
            total += targets.size(0)
            correct += (preds == targets).sum().item()

        acc = correct / total

        if epoch % 5 == 0:
            print(f'Epoch: {epoch:3d}. Loss: {loss.item():.4f}. Acc.: {acc:2.2%}')

        if acc > best_acc:
            trials = 0
            best_acc = acc
            torch.save(model.state_dict(), 'best.pth')
            print(f'Epoch {epoch} best model saved with accuracy: {best_acc:2.2%}')
        else:
            trials += 1
            if trials >= patience:
                print(f'Early stopping on epoch {epoch}')
                break
    print('Training Complete !!!')


Start model training ...


  return {'x': torch.tensor(self.X[index:index + self.seq_len], dtype=torch.float),
  'y': torch.tensor(self.y[index + self.seq_len - 1], dtype=torch.long)}


Epoch 1 best model saved with loss: 0.062
Epoch 1 best model saved with accuracy: 95.42%
Epoch 2 best model saved with loss: 0.063
Epoch 3 best model saved with loss: 0.066
Epoch 4 best model saved with loss: 0.068
Epoch 5 best model saved with loss: 0.067
Epoch:   5. Loss: 0.0671. Acc.: 95.42%
Epoch 6 best model saved with loss: 0.066
Epoch 7 best model saved with loss: 0.061
Epoch 8 best model saved with loss: 0.036
Epoch 9 best model saved with loss: 0.043
Epoch 10 best model saved with loss: 0.059
Epoch:  10. Loss: 0.0594. Acc.: 95.42%
Epoch 11 best model saved with loss: 0.063
Epoch 12 best model saved with loss: 0.063
Epoch 13 best model saved with loss: 0.058
Epoch 14 best model saved with loss: 0.043
Epoch 15 best model saved with loss: 0.023
Epoch:  15. Loss: 0.0231. Acc.: 95.42%
Epoch 16 best model saved with loss: 0.03
Epoch 17 best model saved with loss: 0.02
Epoch 18 best model saved with loss: 0.0083
Epoch 19 best model saved with loss: 0.0057
Epoch 20 best model saved wi