In [1]:
from torch.utils.data import TensorDataset, DataLoader
import torch.nn.functional as F
import torch.nn as nn
import torch

from lightning.pytorch.utilities.types import STEP_OUTPUT, OptimizerLRScheduler
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.loggers import TensorBoardLogger
import lightning.pytorch as pl

from torchmetrics.functional import accuracy


from sklearn.model_selection import train_test_split
import numpy as np



In [2]:
X = np.load("X.npy")
y = np.load("y.npy")

X.shape, y.shape

((774, 84, 16), (774,))

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


X_train.shape, X_test.shape, y_train.shape, y_test.shape

((619, 84, 16), (155, 84, 16), (619,), (155,))

In [4]:
print(np.unique(y_train, return_counts=True))
print(np.unique(y_test, return_counts=True))


(array([0, 1, 2], dtype=int64), array([207, 205, 207], dtype=int64))
(array([0, 1, 2], dtype=int64), array([51, 53, 51], dtype=int64))


In [5]:
X_train_t = torch.from_numpy(X_train)
y_train_t = torch.from_numpy(y_train)
X_test_t = torch.from_numpy(X_test)
y_test_t = torch.from_numpy(y_test)

# Create the dataset object
train_ds = TensorDataset(X_train_t, y_train_t)
test_ds = TensorDataset(X_test_t, y_test_t)

train_loader = DataLoader(train_ds, batch_size=32, num_workers=1, persistent_workers=True)
test_loader = DataLoader(test_ds, batch_size=32, num_workers=1, persistent_workers=True)


In [88]:
class LSTM_Classifer(pl.LightningModule):
    def __init__(self, n_features = 16, n_classes = 3, n_hidden = 64, n_layers = 3, dropout = 0.6) -> None:
        super().__init__()
        
        self.lstm = nn.LSTM(
            input_size  = n_features,
            hidden_size = n_hidden,
            num_layers  = n_layers,
            dropout=dropout,
            batch_first=True
        )
        
        self.classifier = nn.Linear(n_hidden, n_classes)
        
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        _ , (out, _) = self.lstm(x)
        return self.classifier(out[-1])
    
    def training_step(self, batch, batch_idx) -> STEP_OUTPUT:
        X, y = batch
        y_pred = self(X)
        loss = F.cross_entropy(y_pred, y)
        
        y_lab = torch.argmax(F.softmax(y_pred, dim=1), dim=1)
        acc = accuracy(y_lab, y, task="multiclass", num_classes=3)
        
        self.log("acc", acc, prog_bar=True, on_epoch=True, on_step=False)
        self.log("loss", loss, prog_bar=True, on_epoch=True, on_step=False)
        return loss
    
    def validation_step(self, batch, batch_idx) -> STEP_OUTPUT:
        X, y = batch
        y_pred = self(X)
        y_lab = torch.argmax(F.softmax(y_pred, dim=1), dim=1)
        loss = F.cross_entropy(y_pred, y)
        
        acc = accuracy(y_lab,y, task="multiclass", num_classes=3)
        
        self.log("val_loss", loss, prog_bar=True, on_epoch=True, on_step=False)
        self.log("val_acc", acc, prog_bar=True, on_epoch=True, on_step=False)
        return loss
    
    def configure_optimizers(self) -> OptimizerLRScheduler:
        return torch.optim.Adam(self.parameters(), lr = 1e-3)

In [97]:
model = LSTM_Classifer(n_layers= 5, n_hidden=64, dropout=0.2)
es_callback = EarlyStopping(monitor="val_loss", patience=15, strict=False, mode="min")
torch.set_float32_matmul_precision('medium')

logger = TensorBoardLogger("tb_logs", name="Lstm_v2")

pl.seed_everything(42)

trainer = pl.Trainer(accelerator="gpu", 
                     #callbacks=[es_callback], 
                     min_epochs=50,
                     logger=logger)

trainer.fit(model, train_dataloaders=train_loader, val_dataloaders=test_loader)

Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
c:\Users\DELL\AppData\Local\Programs\Python\Python311\Lib\site-packages\lightning\pytorch\loops\utilities.py:72: `max_epochs` was not set. Setting it to 1000 epochs. To train without an epoch limit, set `max_epochs=-1`.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type   | Params
--------------------------------------
0 | lstm       | LSTM   | 154 K 
1 | classifier | Linear | 195   
--------------------------------------
154 K     Trainable params
0         Non-trainable params
154 K     Total params
0.617     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

c:\Users\DELL\AppData\Local\Programs\Python\Python311\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


                                                                            

c:\Users\DELL\AppData\Local\Programs\Python\Python311\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.
c:\Users\DELL\AppData\Local\Programs\Python\Python311\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:293: The number of training batches (20) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 999: 100%|██████████| 20/20 [00:00<00:00, 110.90it/s, v_num=53, val_loss=0.803, val_acc=0.761, acc=0.863, loss=0.351]

`Trainer.fit` stopped: `max_epochs=1000` reached.


Epoch 999: 100%|██████████| 20/20 [00:00<00:00, 102.39it/s, v_num=53, val_loss=0.803, val_acc=0.761, acc=0.863, loss=0.351]


In [101]:
from sklearn.metrics import classification_report, roc_auc_score, precision_recall_curve, balanced_accuracy_score

In [99]:
model.eval()

all_pred = []
all_true = []

for batch in test_loader:
    cX, cy = batch
    
    pred = torch.argmax(F.softmax(model(cX), dim=1), dim=1).detach().cpu().tolist()
    true = cy.detach().cpu().tolist()
    
    all_pred.extend(pred)
    all_true.extend(true)


len(all_true) , len(all_pred)

(155, 155)

In [100]:
print(classification_report(all_true, all_pred))

              precision    recall  f1-score   support

           0       0.78      0.78      0.78        51
           1       0.65      0.85      0.74        53
           2       0.94      0.65      0.77        51

    accuracy                           0.76       155
   macro avg       0.79      0.76      0.76       155
weighted avg       0.79      0.76      0.76       155



In [102]:
balanced_accuracy_score(all_true, all_pred)

0.7601430509310642