In [1]:
import torch
from torch import nn
from torchinfo import summary

from sklearn import tree
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_fscore_support
from sklearn.model_selection import train_test_split

import pytorch_lightning as pl
import torch.nn.functional as F

import seaborn as sns
import matplotlib.pyplot as plt

import pandas as pd
import numpy  as np

import json
import os
import glob

import warnings
warnings.filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm


## Read Raw data

In [2]:
save_folder = "../data/jikken1/"

feature_save_file = os.path.join(save_folder, "features.npy")
label_save_file = os.path.join(save_folder, "label.npy")
label_name_save_file = os.path.join(save_folder, "label_name.json")

kfold_split_save_file = os.path.join(save_folder, "kfold_train_val_test.npy")

In [3]:
features = np.load(feature_save_file, allow_pickle=True).astype(np.float)
labels = np.load(label_save_file, allow_pickle=True)

kfold_train_test_index_list = np.load(kfold_split_save_file, allow_pickle=True)

In [4]:
# Normalize over the feature data

from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

data_num, window_size, feature_num = features.shape
features_reshape = features.reshape(-1, feature_num)
features_norm = sc.fit_transform(features_reshape)

# convert back t feature size
features = features_norm.reshape(data_num, window_size, feature_num)

In [5]:
with open(label_name_save_file) as f:
    label_list = json.load(f)

In [6]:
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

class CustomDataset(Dataset):
    def __init__(self, all_feature_list, y_list, data_index_list):

        self.all_feature_list = all_feature_list
        self.y_list = y_list
        self.data_index_list = data_index_list
        
    def __len__(self):
        return len(self.data_index_list)
    
    def __getitem__(self, idx):
        index = self.data_index_list[idx]
        x = self.all_feature_list[index]
        label = self.y_list[index]
        
        return x, label

In [7]:
## test

# Create train dataset and test dataset for the first activity in label_list
train_data_df_index_list, val_data_df_index_list, test_data_df_index_list = kfold_train_test_index_list[0]

train_dataset = CustomDataset(features, labels, train_data_df_index_list)
val_dataset = CustomDataset(features, labels, val_data_df_index_list)
test_dataset = CustomDataset(features, labels, test_data_df_index_list)

train_dataloader = DataLoader(
    train_dataset, 
    batch_size=8,
    num_workers=0, # number of subprocesses to use for data loading
    shuffle=True)

val_dataloader = DataLoader(
    train_dataset, 
    batch_size=8,
    num_workers=0, # number of subprocesses to use for data loading
    shuffle=False)

test_dataloader = DataLoader(
    test_dataset,
    batch_size=1,
    num_workers=0, # number of subprocesses to use for data loading
    shuffle=False)

next(iter(train_dataloader))

[tensor([[[-7.8359e-01, -3.1282e-01, -4.9598e-01,  ..., -4.3510e-01,
            7.3018e-01, -1.6694e+00],
          [-7.0130e-01, -3.3016e-01, -4.7102e-01,  ..., -4.9755e-01,
            6.7183e-01, -1.6151e+00],
          [-6.3043e-01, -3.3355e-01, -4.5598e-01,  ..., -4.1698e-01,
            6.7504e-01, -1.4171e+00],
          ...,
          [ 6.4485e-01, -8.5654e-02,  1.9672e-01,  ...,  8.8857e-01,
            3.1264e-01, -6.0877e-01],
          [ 6.2727e-01, -8.0859e-02,  1.3176e-01,  ...,  9.0993e-01,
            7.1091e-01, -7.1450e-01],
          [ 6.0153e-01, -7.0566e-02,  1.1672e-01,  ...,  9.3804e-01,
            6.2258e-01, -7.3855e-01]],
 
         [[-4.4537e-01, -3.7345e-01, -6.9121e-01,  ..., -1.6314e-02,
           -7.3120e-01, -6.4625e-01],
          [-3.9589e-01, -3.7966e-01, -7.2130e-01,  ...,  1.7995e-03,
           -8.0989e-01, -5.8472e-01],
          [-3.5257e-01, -3.7754e-01, -7.1788e-01,  ..., -1.4448e-03,
           -8.8163e-01, -5.4388e-01],
          ...,
    

## Model definition

In [8]:
class SelfAttention(nn.Module):
    def __init__(self, input_size=128, attention_mat_size=64, output_kernel=1):
        """
        output_kernel(int): the kernel of attention matrix
        to the input shape (B, Sequence_number, Feature),
        the size of output shape will be (B, Sequence_number, attention_mat_size x output_kernel )
        """
        super(SelfAttention, self).__init__()
        self.lstm_dim = input_size
        self.da = attention_mat_size
        self.r = output_kernel
        
        self.main = nn.Sequential(
            # Bidirectionalなので各隠れ層のベクトルの次元は２倍のサイズになってます。
            nn.Linear(input_size, attention_mat_size), 
            nn.Tanh(),
            nn.Linear(attention_mat_size, output_kernel)
        )
        
    def forward(self, out):
        attention_weight = F.softmax(self.main(out), dim=1) # calculate the attention maxtrix
        
        # attention is changed for matmul with lstm output
        attention_weight = attention_weight.unsqueeze(dim=3)
        
        mul = torch.matmul(attention_weight, out.unsqueeze(dim=2))
        mul = mul.view(mul.shape[0], mul.shape[1], -1)
    
        return mul
        

In [9]:
model = SelfAttention()
summary(model)

Layer (type:depth-idx)                   Param #
SelfAttention                            --
├─Sequential: 1-1                        --
│    └─Linear: 2-1                       8,256
│    └─Tanh: 2-2                         --
│    └─Linear: 2-3                       65
Total params: 8,321
Trainable params: 8,321
Non-trainable params: 0

In [10]:
def test():
    rnn = nn.LSTM(
        input_size=10, 
        hidden_size=64,
        num_layers=1,
        batch_first=True)

    attention = SelfAttention(
        input_size=64, 
        attention_mat_size=64, 
        output_kernel=4
    )
    
    x = torch.rand(2, 6, 10)
    rnn_out, _ = rnn(x)
    print("rnn_out.shape:", rnn_out.shape)
    attention_out = attention(rnn_out)
    
    print(attention_out.shape)


In [11]:
class LSTMModel(pl.LightningModule):
    def __init__(self, hidden_size=128, input_size=30, output_size=6):
        super().__init__()
        self.rnn1 = nn.LSTM(input_size=input_size, 
                          hidden_size=hidden_size,
                          num_layers=1,
                          batch_first=True)
        
        self.attention1 = SelfAttention(
            input_size=hidden_size, 
            attention_mat_size=hidden_size, 
            output_kernel=1)

        self.rnn2 = nn.LSTM(input_size=hidden_size, 
                          hidden_size=hidden_size,
                          num_layers=1,
                          batch_first=True)
        
        self.attention2 = SelfAttention(
            input_size=hidden_size, 
            attention_mat_size=hidden_size, 
            output_kernel=1
        )
        
        self.seq_1 = nn.Sequential(
            nn.Linear(in_features=hidden_size, out_features=hidden_size),
            nn.BatchNorm1d(num_features=hidden_size),
            nn.Dropout1d(p=0.2),
            nn.ReLU(),
            nn.Linear(in_features=hidden_size, out_features=hidden_size),
            nn.BatchNorm1d(num_features=hidden_size),
            nn.Dropout1d(p=0.2),
            nn.ReLU(),
        )
        
        self.seq_2 = nn.Sequential(
            nn.Linear(in_features=hidden_size, out_features=hidden_size),
            nn.BatchNorm1d(num_features=hidden_size),
            nn.Dropout1d(p=0.2),
            nn.ReLU(),
            nn.Linear(in_features=hidden_size, out_features=hidden_size),
            nn.BatchNorm1d(num_features=hidden_size),
            nn.Dropout1d(p=0.2),
            nn.ReLU(),
        )
        
        self.classifier = nn.Linear(in_features=3 * hidden_size, out_features=output_size)
        
    def forward(self, x):
        activation, _ = self.rnn1(x)
        activation = self.attention1(activation)
        activation, _ = self.rnn2(activation)
        activation = self.attention2(activation)
        
        b, _, _ = activation.size()
        lstm_output = activation[:,-1,:].view(b,-1)
        seq_1_output = self.seq_1(lstm_output)
        seq_2_output = self.seq_2(lstm_output)
        
        output = torch.concat([lstm_output, seq_1_output, seq_2_output], dim=1)
        output = self.classifier(output)
        
        return output
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(params=self.parameters(), lr=0.0005)
        return optimizer
    
    def training_step(self, batch, batch_idx):
        X, y = batch
        X = X.float()
        # 1. Forward pass
        y_pred = self.forward(X)
        # 2. Calculate  and accumulate loss
        loss = F.cross_entropy(y_pred, y)
        
        self.log("train_loss", loss)
        
        return loss
    
    def test_step(self, batch, batch_idx):
        # this is the test loop
        X, y = batch
        X = X.float()
    
        # 1. Forward pass
        test_pred_logits = self.forward(X)

        # Calculate and accumulate accuracy
        test_pred_labels = test_pred_logits.argmax(dim=1)
        test_acc = ((test_pred_labels == y).sum().item()/len(test_pred_labels))
        self.log("test_acc", test_acc)

    def validation_step(self, batch, batch_idx):
        # this is the validation loop
        X, y = batch
        X = X.float()
        
        y_pred = self.forward(X)
        # 2. Calculate  and accumulate loss
        loss = F.cross_entropy(y_pred, y)
        
        self.log("val_loss", loss)
        

In [12]:
model = LSTMModel()
summary(model)

Layer (type:depth-idx)                   Param #
LSTMModel                                --
├─LSTM: 1-1                              81,920
├─SelfAttention: 1-2                     --
│    └─Sequential: 2-1                   --
│    │    └─Linear: 3-1                  16,512
│    │    └─Tanh: 3-2                    --
│    │    └─Linear: 3-3                  129
├─LSTM: 1-3                              132,096
├─SelfAttention: 1-4                     --
│    └─Sequential: 2-2                   --
│    │    └─Linear: 3-4                  16,512
│    │    └─Tanh: 3-5                    --
│    │    └─Linear: 3-6                  129
├─Sequential: 1-5                        --
│    └─Linear: 2-3                       16,512
│    └─BatchNorm1d: 2-4                  256
│    └─Dropout1d: 2-5                    --
│    └─ReLU: 2-6                         --
│    └─Linear: 2-7                       16,512
│    └─BatchNorm1d: 2-8                  256
│    └─Dropout1d: 2-9                    -

## Train step setup

In [13]:
from tqdm.auto import tqdm
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
# from lightning.pytorch.callbacks.early_stopping import EarlyStopping

batch_size = 4
patience = 20

all_test = []
all_pred = []
loss_all_folds = []

# kfold_train_test_index_list = [kfold_train_test_index_list[0]]

for i, (train_index, val_index, test_index) in enumerate(kfold_train_test_index_list):
    print(f"\n*************KFOLD {i + 1}*************")
    
    train_dataset = CustomDataset(features, labels, train_index)
    val_dataset = CustomDataset(features, labels, val_index)
    test_dataset = CustomDataset(features, labels, test_index)

    train_dataloader = DataLoader(
        train_dataset, 
        batch_size=batch_size,
        num_workers=0, # number of subprocesses to use for data loading
        shuffle=True)
    
    val_dataloader = DataLoader(
        val_dataset, 
        batch_size=batch_size,
        num_workers=0, # number of subprocesses to use for data loading
        shuffle=False)
    
    test_dataloader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        num_workers=0, # number of subprocesses to use for data loading
        shuffle=False)
    
    model = LSTMModel(hidden_size=64, input_size=feature_num, output_size=len(label_list))

    trainer = pl.Trainer(callbacks=[EarlyStopping(monitor="val_loss", patience=patience, mode="min")])
    trainer.fit(model, train_dataloader, val_dataloader)
    trainer.test(model, test_dataloader)
    

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type          | Params
---------------------------------------------
0 | rnn1       | LSTM          | 24.6 K
1 | attention1 | SelfAttention | 4.2 K 
2 | rnn2       | LSTM          | 33.3 K
3 | attention2 | SelfAttention | 4.2 K 
4 | seq_1      | Sequential    | 8.6 K 
5 | seq_2      | Sequential    | 8.6 K 
6 | classifier | Linear        | 1.2 K 
---------------------------------------------
84.6 K    Trainable params
0         Non-trainable params
84.6 K    Total params
0.338     Total estimated model params size (MB)



*************KFOLD 1*************
Epoch 0:  84%|█████████████████████████████████████████████████████████████████████████▏             | 132/157 [00:13<00:02,  9.59it/s, loss=1.44, v_num=3]
Validation: 0it [00:00, ?it/s][A
Validation DataLoader 0:   0%|                                                                                                      | 0/25 [00:00<?, ?it/s][A
Epoch 0:  85%|█████████████████████████████████████████████████████████████████████████▋             | 133/157 [00:13<00:02,  9.64it/s, loss=1.44, v_num=3][A
Epoch 0:  85%|██████████████████████████████████████████████████████████████████████████▎            | 134/157 [00:13<00:02,  9.70it/s, loss=1.44, v_num=3][A
Epoch 0:  86%|██████████████████████████████████████████████████████████████████████████▊            | 135/157 [00:13<00:02,  9.76it/s, loss=1.44, v_num=3][A
Epoch 0:  87%|███████████████████████████████████████████████████████████████████████████▎           | 136/157 [00:13<00:02,  9.82it/s, los

Epoch 1:  98%|█████████████████████████████████████████████████████████████████████████████████████▎ | 154/157 [00:25<00:00,  6.12it/s, loss=1.16, v_num=3][A
Epoch 1:  99%|█████████████████████████████████████████████████████████████████████████████████████▉ | 155/157 [00:25<00:00,  6.15it/s, loss=1.16, v_num=3][A
Epoch 1:  99%|██████████████████████████████████████████████████████████████████████████████████████▍| 156/157 [00:25<00:00,  6.19it/s, loss=1.16, v_num=3][A
Epoch 1: 100%|███████████████████████████████████████████████████████████████████████████████████████| 157/157 [00:25<00:00,  6.22it/s, loss=1.16, v_num=3][A
Epoch 2:  84%|████████████████████████████████████████████████████████████████████████▎             | 132/157 [00:35<00:06,  3.72it/s, loss=0.907, v_num=3][A
Validation: 0it [00:00, ?it/s][A
Validation DataLoader 0:   0%|                                                                                                      | 0/25 [00:00<?, ?it/s][A
Epoch 2:  85

Epoch 3:  96%|██████████████████████████████████████████████████████████████████████████████████▋   | 151/157 [00:46<00:01,  3.22it/s, loss=0.919, v_num=3][A
Epoch 3:  97%|███████████████████████████████████████████████████████████████████████████████████▎  | 152/157 [00:46<00:01,  3.24it/s, loss=0.919, v_num=3][A
Epoch 3:  97%|███████████████████████████████████████████████████████████████████████████████████▊  | 153/157 [00:46<00:01,  3.26it/s, loss=0.919, v_num=3][A
Epoch 3:  98%|████████████████████████████████████████████████████████████████████████████████████▎ | 154/157 [00:46<00:00,  3.28it/s, loss=0.919, v_num=3][A
Epoch 3:  99%|████████████████████████████████████████████████████████████████████████████████████▉ | 155/157 [00:47<00:00,  3.30it/s, loss=0.919, v_num=3][A
Epoch 3:  99%|█████████████████████████████████████████████████████████████████████████████████████▍| 156/157 [00:47<00:00,  3.32it/s, loss=0.919, v_num=3][A
Epoch 3: 100%|████████████████████████████████


*************KFOLD 2*************


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type          | Params
---------------------------------------------
0 | rnn1       | LSTM          | 24.6 K
1 | attention1 | SelfAttention | 4.2 K 
2 | rnn2       | LSTM          | 33.3 K
3 | attention2 | SelfAttention | 4.2 K 
4 | seq_1      | Sequential    | 8.6 K 
5 | seq_2      | Sequential    | 8.6 K 
6 | classifier | Linear        | 1.2 K 
---------------------------------------------
84.6 K    Trainable params
0         Non-trainable params
84.6 K    Total params
0.338     Total estimated model params size (MB)


Testing DataLoader 0: 100%|████████████████████████████████████████████████████████████████████████████████████████████████| 19/19 [00:00<00:00, 59.72it/s]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type          | Params
---------------------------------------------
0 | rnn1       | LSTM          | 24.6 K
1 | attention1 | SelfAttention | 4.2 K 
2 | rnn2       | LSTM          | 33.3 K
3 | attention2 | SelfAttention | 4.2 K 
4 | seq_1      | Sequential    | 8.6 K 
5 | seq_2      | Sequential    | 8.6 K 
6 | classifier | Linear        | 1.2 K 
---------------------------------------------
84.6 K    Trainable params
0         Non-trainable params
84.6 K    Total params
0.338     Total estimated model params size (MB)




*************KFOLD 3*************
Testing DataLoader 0:  50%|████████████████████████████████████████████████▌                                                | 9/18 [00:00<00:00, 55.92it/s]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type          | Params
---------------------------------------------
0 | rnn1       | LSTM          | 24.6 K
1 | attention1 | SelfAttention | 4.2 K 
2 | rnn2       | LSTM          | 33.3 K
3 | attention2 | SelfAttention | 4.2 K 
4 | seq_1      | Sequential    | 8.6 K 
5 | seq_2      | Sequential    | 8.6 K 
6 | classifier | Linear        | 1.2 K 
---------------------------------------------
84.6 K    Trainable params
0         Non-trainable params
84.6 K    Total params
0.338     Total estimated model params size (MB)



*************KFOLD 4*************
Testing DataLoader 0: 100%|████████████████████████████████████████████████████████████████████████████████████████████████| 18/18 [00:00<00:00, 55.23it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type          | Params
---------------------------------------------
0 | rnn1       | LSTM          | 24.6 K
1 | attention1 | SelfAttention | 4.2 K 
2 | rnn2       | LSTM          | 33.3 K
3 | attention2 | SelfAttention | 4.2 K 
4 | seq_1      | Sequential    | 8.6 K 
5 | seq_2      | Sequential    | 8.6 K 
6 | classifier | Linear        | 1.2 K 
---------------------------------------------
84.6 K    Trainable params
0         Non-trainable params
84.6 K    Total params
0.338     Total estimated model params size (MB)



*************KFOLD 5*************
Testing DataLoader 0: 100%|████████████████████████████████████████████████████████████████████████████████████████████████| 18/18 [00:00<00:00, 59.87it/s]



*************KFOLD 6*************
Epoch 0:  20%|█████████████████▉                                                                      | 32/157 [00:04<00:18,  6.79it/s, loss=1.79, v_num=4]
Epoch 0:   1%|█▏                                                                                       | 2/158 [00:01<02:10,  1.20it/s, loss=1.79, v_num=5]
Testing DataLoader 0:  50%|████████████████████████████████████████████████▌                                                | 9/18 [00:01<00:01,  6.08it/s]
Epoch 0:   3%|██▎                                                                                       | 4/158 [00:01<00:48,  3.19it/s, loss=1.8, v_num=5]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs






  | Name       | Type          | Params
---------------------------------------------
0 | rnn1       | LSTM          | 24.6 K
1 | attention1 | SelfAttention | 4.2 K 
2 | rnn2       | LSTM          | 33.3 K
3 | attention2 | SelfAttention | 4.2 K 
4 | seq_1      | Sequential    | 8.6 K 
5 | seq_2      | Sequential    | 8.6 K 
6 | classifier | Linear        | 1.2 K 
---------------------------------------------
84.6 K    Trainable params
0         Non-trainable params
84.6 K    Total params
0.338     Total estimated model params size (MB)


Testing DataLoader 0: 100%|████████████████████████████████████████████████████████████████████████████████████████████████| 18/18 [00:00<00:00, 58.87it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs



*************KFOLD 7*************



  | Name       | Type          | Params
---------------------------------------------
0 | rnn1       | LSTM          | 24.6 K
1 | attention1 | SelfAttention | 4.2 K 
2 | rnn2       | LSTM          | 33.3 K
3 | attention2 | SelfAttention | 4.2 K 
4 | seq_1      | Sequential    | 8.6 K 
5 | seq_2      | Sequential    | 8.6 K 
6 | classifier | Linear        | 1.2 K 
---------------------------------------------
84.6 K    Trainable params
0         Non-trainable params
84.6 K    Total params
0.338     Total estimated model params size (MB)


Testing DataLoader 0: 100%|████████████████████████████████████████████████████████████████████████████████████████████████| 18/18 [00:00<00:00, 58.14it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type          | Params
---------------------------------------------
0 | rnn1       | LSTM          | 24.6 K
1 | attention1 | SelfAttention | 4.2 K 
2 | rnn2       | LSTM          | 33.3 K
3 | attention2 | SelfAttention | 4.2 K 
4 | seq_1      | Sequential    | 8.6 K 
5 | seq_2      | Sequential    | 8.6 K 
6 | classifier | Linear        | 1.2 K 
---------------------------------------------
84.6 K    Trainable params
0         Non-trainable params
84.6 K    Total params
0.338     Total estimated model params size (MB)



*************KFOLD 8*************
Testing DataLoader 0: 100%|████████████████████████████████████████████████████████████████████████████████████████████████| 17/17 [00:00<00:00, 60.06it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type          | Params
---------------------------------------------
0 | rnn1       | LSTM          | 24.6 K
1 | attention1 | SelfAttention | 4.2 K 
2 | rnn2       | LSTM          | 33.3 K
3 | attention2 | SelfAttention | 4.2 K 
4 | seq_1      | Sequential    | 8.6 K 
5 | seq_2      | Sequential    | 8.6 K 
6 | classifier | Linear        | 1.2 K 
---------------------------------------------
84.6 K    Trainable params
0         Non-trainable params
84.6 K    Total params
0.338     Total estimated model params size (MB)



*************KFOLD 9*************
Testing DataLoader 0:  53%|███████████████████████████████████████████████████▎                                             | 9/17 [00:00<00:00, 54.78it/s]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type          | Params
---------------------------------------------
0 | rnn1       | LSTM          | 24.6 K
1 | attention1 | SelfAttention | 4.2 K 
2 | rnn2       | LSTM          | 33.3 K
3 | attention2 | SelfAttention | 4.2 K 
4 | seq_1      | Sequential    | 8.6 K 
5 | seq_2      | Sequential    | 8.6 K 
6 | classifier | Linear        | 1.2 K 
---------------------------------------------
84.6 K    Trainable params
0         Non-trainable params
84.6 K    Total params
0.338     Total estimated model params size (MB)



*************KFOLD 10*************
Testing DataLoader 0:  47%|█████████████████████████████████████████████▋                                                   | 8/17 [00:00<00:00, 57.15it/s]