In [1]:
import torch
from torch import nn
from torchinfo import summary

from sklearn import tree
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_fscore_support
from sklearn.model_selection import train_test_split

import seaborn as sns
import matplotlib.pyplot as plt

import pandas as pd
import numpy  as np

import json
import os
import glob

import warnings
warnings.filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm


## Read Raw data

In [2]:
save_folder = "../data/jikken2/"

feature_save_file = os.path.join(save_folder, "features.npy")
label_save_file = os.path.join(save_folder, "label.npy")
label_name_save_file = os.path.join(save_folder, "label_name.json")

kfold_split_save_file = os.path.join(save_folder, "kfold_train_val_test.npy")

In [3]:
features = np.load(feature_save_file, allow_pickle=True).astype(np.float)
labels = np.load(label_save_file, allow_pickle=True)

kfold_train_test_index_list = np.load(kfold_split_save_file, allow_pickle=True)

In [4]:
# Normalize over the feature data

from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

data_num, window_size, feature_num = features.shape
features_reshape = features.reshape(-1, feature_num)
features_norm = sc.fit_transform(features_reshape)

# convert back t feature size
features = features_norm.reshape(data_num, window_size, feature_num)

In [5]:
with open(label_name_save_file) as f:
    label_list = json.load(f)

In [6]:
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

class CustomDataset(Dataset):
    def __init__(self, all_feature_list, y_list, data_index_list):

        self.all_feature_list = all_feature_list
        self.y_list = y_list
        self.data_index_list = data_index_list
        
    def __len__(self):
        return len(self.data_index_list)
    
    def __getitem__(self, idx):
        index = self.data_index_list[idx]
        x = self.all_feature_list[index]
        label = self.y_list[index]
        
        return x, label

In [7]:
## test

# Create train dataset and test dataset for the first activity in label_list
train_data_df_index_list, val_data_df_index_list, test_data_df_index_list = kfold_train_test_index_list[0]

train_dataset = CustomDataset(features, labels, train_data_df_index_list)
val_dataset = CustomDataset(features, labels, val_data_df_index_list)
test_dataset = CustomDataset(features, labels, test_data_df_index_list)

train_dataloader = DataLoader(
    train_dataset, 
    batch_size=8,
    num_workers=0, # number of subprocesses to use for data loading
    shuffle=True)

val_dataloader = DataLoader(
    train_dataset, 
    batch_size=8,
    num_workers=0, # number of subprocesses to use for data loading
    shuffle=False)

test_dataloader = DataLoader(
    test_dataset,
    batch_size=1,
    num_workers=0, # number of subprocesses to use for data loading
    shuffle=False)

next(iter(train_dataloader))

[tensor([[[ 2.3015e+00,  1.6927e+00,  4.2375e-01,  ...,  2.0343e+00,
            2.8641e-01, -3.3243e-01],
          [ 2.4050e+00,  2.4260e+00,  4.5213e-01,  ...,  1.7951e+00,
            7.6658e-01, -5.5451e-01],
          [ 2.4537e+00,  3.1704e+00,  3.7554e-01,  ...,  1.4671e+00,
           -2.0232e-01, -6.0877e-01],
          ...,
          [ 2.7687e+00,  3.4738e-01, -1.2136e+00,  ...,  1.6880e+00,
            1.2917e+00, -1.0755e-01],
          [ 2.9732e+00,  1.3122e-01, -1.0464e+00,  ...,  1.7342e+00,
            1.7392e+00, -2.0992e-01],
          [ 3.0616e+00,  3.8998e-02, -7.8318e-01,  ...,  1.7242e+00,
            2.0684e+00, -3.4977e-01]],
 
         [[ 3.4493e+00, -7.3613e-01, -1.1183e+00,  ...,  9.1317e-01,
            1.8859e+00, -1.2840e+00],
          [ 3.7051e+00, -8.1678e-01, -1.1483e+00,  ...,  9.5291e-01,
            1.9282e+00, -1.3354e+00],
          [ 3.8829e+00, -8.4837e-01, -1.1935e+00,  ...,  1.1538e+00,
            2.1926e+00, -1.0384e+00],
          ...,
    

## Model definition

In [8]:
import pytorch_lightning as pl
import torch.nn.functional as F

class LSTMModel(pl.LightningModule):
    def __init__(self, hidden_size=128, input_size=30, output_size=6):
        super().__init__()
        self.rnn = nn.LSTM(input_size=input_size, 
                          hidden_size=hidden_size,
                          num_layers=2,
                          batch_first=True)
        
        self.seq_1 = nn.Sequential(
            nn.Linear(in_features=hidden_size, out_features=hidden_size),
            nn.BatchNorm1d(num_features=hidden_size),
            nn.Dropout1d(p=0.2),
            nn.ReLU(),
            nn.Linear(in_features=hidden_size, out_features=hidden_size),
            nn.BatchNorm1d(num_features=hidden_size),
            nn.Dropout1d(p=0.2),
            nn.ReLU(),
        )
        
        self.seq_2 = nn.Sequential(
            nn.Linear(in_features=hidden_size, out_features=hidden_size),
            nn.BatchNorm1d(num_features=hidden_size),
            nn.Dropout1d(p=0.2),
            nn.ReLU(),
            nn.Linear(in_features=hidden_size, out_features=hidden_size),
            nn.BatchNorm1d(num_features=hidden_size),
            nn.Dropout1d(p=0.2),
            nn.ReLU(),
        )
        
        self.classifier = nn.Linear(in_features=3 * hidden_size, out_features=output_size)
        
    def forward(self, x):
        activation, _ = self.rnn(x)
        
        b, _, _ = activation.size()
        lstm_output = activation[:,-1,:].view(b,-1)
        seq_1_output = self.seq_1(lstm_output)
        seq_2_output = self.seq_2(lstm_output)
        
        output = torch.concat([lstm_output, seq_1_output, seq_2_output], dim=1)
        output = self.classifier(output)
        
        return output
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(params=self.parameters(), lr=0.0005)
        return optimizer
    
    def training_step(self, batch, batch_idx):
        X, y = batch
        X = X.float()
        # 1. Forward pass
        y_pred = self.forward(X)
        # 2. Calculate  and accumulate loss
        loss = F.cross_entropy(y_pred, y)
        
        self.log("train_loss", loss)
        
        return loss
    
    def test_step(self, batch, batch_idx):
        # this is the test loop
        X, y = batch
        X = X.float()
    
        # 1. Forward pass
        test_pred_logits = self.forward(X)

        # Calculate and accumulate accuracy
        test_pred_labels = test_pred_logits.argmax(dim=1)
        test_acc = ((test_pred_labels == y).sum().item()/len(test_pred_labels))
        self.log("test_acc", test_acc)

    def validation_step(self, batch, batch_idx):
        # this is the validation loop
        X, y = batch
        X = X.float()
        
        y_pred = self.forward(X)
        # 2. Calculate  and accumulate loss
        loss = F.cross_entropy(y_pred, y)
        
        self.log("val_loss", loss)
        

In [9]:
model = LSTMModel()
summary(model)

Layer (type:depth-idx)                   Param #
LSTMModel                                --
├─LSTM: 1-1                              214,016
├─Sequential: 1-2                        --
│    └─Linear: 2-1                       16,512
│    └─BatchNorm1d: 2-2                  256
│    └─Dropout1d: 2-3                    --
│    └─ReLU: 2-4                         --
│    └─Linear: 2-5                       16,512
│    └─BatchNorm1d: 2-6                  256
│    └─Dropout1d: 2-7                    --
│    └─ReLU: 2-8                         --
├─Sequential: 1-3                        --
│    └─Linear: 2-9                       16,512
│    └─BatchNorm1d: 2-10                 256
│    └─Dropout1d: 2-11                   --
│    └─ReLU: 2-12                        --
│    └─Linear: 2-13                      16,512
│    └─BatchNorm1d: 2-14                 256
│    └─Dropout1d: 2-15                   --
│    └─ReLU: 2-16                        --
├─Linear: 1-4                            2,310

## Train step setup

In [11]:
from tqdm.auto import tqdm
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
# from lightning.pytorch.callbacks.early_stopping import EarlyStopping

batch_size = 1024
patience = 20

all_test = []
all_pred = []
loss_all_folds = []

# kfold_train_test_index_list = [kfold_train_test_index_list[0]]

for i, (train_index, val_index, test_index) in enumerate(kfold_train_test_index_list):
    print(f"\n*************KFOLD {i + 1}*************")
    
    train_dataset = CustomDataset(features, labels, train_index)
    val_dataset = CustomDataset(features, labels, val_index)
    test_dataset = CustomDataset(features, labels, test_index)

    train_dataloader = DataLoader(
        train_dataset, 
        batch_size=batch_size,
        num_workers=4, # number of subprocesses to use for data loading
        shuffle=True)
    
    val_dataloader = DataLoader(
        val_dataset, 
        batch_size=batch_size,
        num_workers=2, # number of subprocesses to use for data loading
        shuffle=False)
    
    test_dataloader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        num_workers=2, # number of subprocesses to use for data loading
        shuffle=False)
    
    model = LSTMModel(hidden_size=64, input_size=feature_num, output_size=len(label_list))

    trainer = pl.Trainer(callbacks=[EarlyStopping(monitor="val_loss", patience=patience, mode="min")])
    trainer.fit(model, train_dataloader, val_dataloader)
    trainer.test(model, test_dataloader)
    

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: /Users/tranhoang/jikken/acttivity_recognition/notebooks/lightning_logs

  | Name       | Type       | Params
------------------------------------------
0 | rnn        | LSTM       | 57.9 K
1 | seq_1      | Sequential | 8.6 K 
2 | seq_2      | Sequential | 8.6 K 
3 | classifier | Linear     | 1.2 K 
------------------------------------------
76.2 K    Trainable params
0         Non-trainable params
76.2 K    Total params
0.305     Total estimated model params size (MB)



*************KFOLD 1*************
Epoch 0:  84%|█████████████████████████████████████████████████████████████████████████▏             | 132/157 [00:13<00:02,  9.91it/s, loss=1.36, v_num=0]
Validation: 0it [00:00, ?it/s][A
Validation DataLoader 0:   0%|                                                                                                      | 0/25 [00:00<?, ?it/s][A
Epoch 0:  85%|█████████████████████████████████████████████████████████████████████████▋             | 133/157 [00:13<00:02,  9.96it/s, loss=1.36, v_num=0][A
Epoch 0:  85%|██████████████████████████████████████████████████████████████████████████▎            | 134/157 [00:13<00:02, 10.03it/s, loss=1.36, v_num=0][A
Epoch 0:  86%|██████████████████████████████████████████████████████████████████████████▊            | 135/157 [00:13<00:02, 10.09it/s, loss=1.36, v_num=0][A
Epoch 0:  87%|███████████████████████████████████████████████████████████████████████████▎           | 136/157 [00:13<00:02, 10.15it/s, los

Epoch 1:  98%|█████████████████████████████████████████████████████████████████████████████████████▎ | 154/157 [00:24<00:00,  6.36it/s, loss=1.19, v_num=0][A
Epoch 1:  99%|█████████████████████████████████████████████████████████████████████████████████████▉ | 155/157 [00:24<00:00,  6.39it/s, loss=1.19, v_num=0][A
Epoch 1:  99%|██████████████████████████████████████████████████████████████████████████████████████▍| 156/157 [00:24<00:00,  6.43it/s, loss=1.19, v_num=0][A
Epoch 1: 100%|███████████████████████████████████████████████████████████████████████████████████████| 157/157 [00:24<00:00,  6.47it/s, loss=1.19, v_num=0][A
Testing DataLoader 0: 100%|████████████████████████████████████████████████████████████████████████████████████████████████| 19/19 [00:00<00:00, 61.15it/s][A


GPU available: False, used: False
TPU available: False, using: 0 TPU cores



*************KFOLD 2*************


IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type       | Params
------------------------------------------
0 | rnn        | LSTM       | 57.9 K
1 | seq_1      | Sequential | 8.6 K 
2 | seq_2      | Sequential | 8.6 K 
3 | classifier | Linear     | 1.2 K 
------------------------------------------
76.2 K    Trainable params
0         Non-trainable params
76.2 K    Total params
0.305     Total estimated model params size (MB)


Testing DataLoader 0: 100%|████████████████████████████████████████████████████████████████████████████████████████████████| 19/19 [00:00<00:00, 61.22it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type       | Params
------------------------------------------
0 | rnn        | LSTM       | 57.9 K
1 | seq_1      | Sequential | 8.6 K 
2 | seq_2      | Sequential | 8.6 K 
3 | classifier | Linear     | 1.2 K 
------------------------------------------
76.2 K    Trainable params
0         Non-trainable params
76.2 K    Total params
0.305     Total estimated model params size (MB)



*************KFOLD 3*************
Epoch 0:  79%|████████████████████████████████████████████████████████████████████▋                  | 124/157 [00:20<00:05,  6.18it/s, loss=1.48, v_num=1]



KeyboardInterrupt

