In [1]:
import torch.nn.functional as F
import torch.nn as nn
import torch
import lightning.pytorch as pl
import torchmetrics

import pandas as pd
import numpy as np

  warn(f"Failed to load image Python extension: {e}")


In [2]:
RANDOM_STATE=42
np.random.seed(RANDOM_STATE)
torch.manual_seed(RANDOM_STATE)

<torch._C.Generator at 0x7f5240c8aa10>

Extracted beats, as explained in
Section III-A, are used as inputs. Here, all convolution layers
are applying 1-D convolution through time and each have 32
kernels of size 5. We also use max pooling of size 6 and stride
2 in all pooling layers. The predictor network consists of five
residual blocks followed by two fully-connected layers with
32 neurons each and a softmax layer to predict output class probabilities. Each residual block contains two convolutional
layers, two ReLU nonlinearities [19], a residual skip connec-
tion [20], and a pooling layer. In total, the resulting network
is a deep network consisting of 13 weight layers.

In [3]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv1d(in_channels, 32, kernel_size=5, padding=2)
        self.bn1 = nn.BatchNorm1d(32)
        self.relu1 = nn.ReLU()
        self.conv2 = nn.Conv1d(32, 32, kernel_size=5, padding=2)
        self.bn2 = nn.BatchNorm1d(32)
        self.relu2 = nn.ReLU()
        self.pool = nn.MaxPool1d(kernel_size=5, stride=2)

    def forward(self , x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu1(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out = out + residual  # Residual connection
        out = self.relu2(out)
        out = self.pool(out)
        return out

class Baseline(nn.Module):
    def __init__(self, sequence_len, n_classes, n_blocks=5):
        super().__init__()
        self.conv1 = nn.Conv1d(in_channels=12, out_channels=32, kernel_size=5,stride=1, padding=0)
        self.bn1 = nn.BatchNorm1d(32)
        self.residual_blocks = nn.Sequential(
            ResidualBlock(32),
            ResidualBlock(32),
            ResidualBlock(32),
            ResidualBlock(32),
            ResidualBlock(32)
        )
        self.classifier = nn.Sequential(
            nn.Linear(sequence_len, 32), # 20
            nn.ReLU(),
            nn.Linear(32, n_classes),
        )

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.residual_blocks(out)
        out = out.view(out.size(0), -1)  # Flatten to [batch_size, channels * sequence_length]

        return self.classifier(out)
        
 

In [4]:
test_beat = np.load('./transformed_train/00034_hr.npy')
print(test_beat.shape)
model = Baseline(test_beat.shape[1]-120, 1)


test_beat = test_beat.reshape((1,12,-1))
test_y = torch.tensor([[1.]])
criterion = nn.BCEWithLogitsLoss()
print("test beat shape", test_beat.shape)
res= model(torch.from_numpy(test_beat).float())
print(res.shape)
criterion(res, test_y)

(12, 600)
test beat shape (1, 12, 600)
torch.Size([1, 1])


tensor(1.4940, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)

In [5]:
import os
import pandas as pd
from torch.utils.data import Dataset

class DatasetECG(Dataset):
    def __init__(self, annotations_file, signals_dir):
        """
        annotantions_file - path to the annotations dataframe. 
                            First column should be name of the record, second - strat_fold then labels 
        
        signals_dir - path to the directory with transformed signals
        """
        self.signals_labels = pd.read_csv(annotations_file)
        self.signals_dir = signals_dir 

    def __len__(self):
        return len(self.signals_labels)
    def __getitem__(self, idx):
        signals_path = os.path.join(self.signals_dir, self.signals_labels.iloc[idx, 0]+ ".npy")
        signal = np.load(signals_path).astype(np.float32)
        labels = torch.from_numpy(self.signals_labels.iloc[idx, 2:].values.astype(int)).float()
        return signal, labels


In [6]:
train_dataset = DatasetECG("./train_annotations.csv", "transformed_train")
val_dataset = DatasetECG("./val_annotations.csv", "transformed_train")

In [7]:
from torch.utils.data import DataLoader

train_loader = DataLoader(train_dataset, batch_size=1, shuffle=False, num_workers=16)
val_loader = DataLoader(val_dataset, batch_size=2, shuffle=False, num_workers=16)

In [16]:
from torchmetrics.classification import F1Score
class LitBaseline(pl.LightningModule):
    def __init__(self, model):
        super().__init__()
        self.model = model
        self.criterion = nn.BCEWithLogitsLoss()
        self.train_score = F1Score()
        self.val_score = F1Score()

    def training_step(self, batch, batch_idx):
        # training_step defines the train loop.
        x, y = batch
        x = x.view(x.size(0),12, -1)
        pred = self.model(x)
        loss = self.criterion(pred, y)
        self.train_score(pred,y.to(torch.int))
        self.log("f1_score", self.train_score)
        self.log("loss", loss, prog_bar=True, logger=True, on_epoch=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        # this is the validation loop
        x, y = batch
        x = x.view(x.size(0),12, -1)
        pred = self.model(x)
        val_loss = self.criterion(pred, y)
        self.val_score(pred,y.to(torch.int))
        self.log("val_f1_score", self.val_score)
        self.log("val_loss", val_loss)


    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer
    


Если один батч - лосс converges в 0, если 5 - в пизде какой то. Если менять lr сильно ниче не меняется, если поменять sequence_len в препроцессинге(transforming.ipynb, BEAT_LENGTH) тоже особо не меняется. Когда sequence_len была 1400 и 5 батчей лосс конвержился в 19 хз.

In [17]:
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks.model_checkpoint import ModelCheckpoint

# model
autoencoder = LitBaseline(Baseline(sequence_len=600-120,n_classes=1,n_blocks=5))

checkpoint_callback =  ModelCheckpoint(dirpath="./lightning_logs/best_run1/", save_top_k=2, monitor="val_f1_score")
# train model
trainer = pl.Trainer(max_epochs=100, check_val_every_n_epoch=5,enable_checkpointing=True,
                     callbacks=[EarlyStopping(monitor="val_f1_score", mode='max', patience=15), checkpoint_callback])
trainer.fit(model=autoencoder, train_dataloaders=train_loader, val_dataloaders=val_loader,)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type              | Params
--------------------------------------------------
0 | model       | Baseline          | 69.6 K
1 | criterion   | BCEWithLogitsLoss | 0     
2 | train_score | F1Score           | 0     
3 | val_score   | F1Score           | 0     
--------------------------------------------------
69.6 K    Trainable params
0         Non-trainable params
69.6 K    Total params
0.278     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

In [None]:
trainer.validate(autoencoder, val_loader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Validation: 0it [00:00, ?it/s]

[{'val_f1_score': 0.06461086869239807, 'val_loss': 2.1510236263275146}]

In [None]:
overfit_logs = "./lightning_logs/version_52/metrics.csv"
df2 = pd.read_csv(overfit_logs)
df2

FileNotFoundError: [Errno 2] No such file or directory: './lightning_logs/version_52/metrics.csv'