In [1]:
import torch.nn.functional as F
import torch.nn as nn
import torch
import lightning.pytorch as pl
import torchmetrics

import pandas as pd
import numpy as np

from IPython.display import HTML, display
import os
from types import SimpleNamespace

from torchmetrics.classification import BinaryF1Score
from torchmetrics.classification.accuracy import BinaryAccuracy
import lightning as L
import matplotlib
import matplotlib.pyplot as plt
import matplotlib_inline.backend_inline
import numpy as np
import seaborn as sns
import tabulate
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torchvision
%matplotlib inline



from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks.lr_monitor import LearningRateMonitor
from lightning.pytorch.callbacks.model_checkpoint import ModelCheckpoint
from resnet1d import ResNet1D

  warn(f"Failed to load image Python extension: {e}")


In [2]:
torch.cuda.empty_cache()

In [3]:
matplotlib_inline.backend_inline.set_matplotlib_formats(
    "svg", "pdf")  # For export
matplotlib.rcParams["lines.linewidth"] = 2.0
sns.reset_orig()


RANDOM_STATE = 42
# Path to the folder where the pretrained models are saved
CHECKPOINT_PATH = "./saved_models/ConvNets/"


# Function for setting the seed
L.seed_everything(42)

# Ensure that all operations are deterministic on GPU (if used) for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

device = torch.device(
    "cuda:0") if torch.cuda.is_available() else torch.device("cpu")



Global seed set to 42


Прошлая архитектура модели для предсказания бита.

In [4]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv1d(in_channels, 32, kernel_size=5, padding=2)
        self.bn1 = nn.BatchNorm1d(32)
        self.relu1 = nn.ReLU()
        self.conv2 = nn.Conv1d(32, 32, kernel_size=5, padding=2)
        self.bn2 = nn.BatchNorm1d(32)
        self.relu2 = nn.ReLU()
        self.pool = nn.MaxPool1d(kernel_size=5, stride=2)

    def forward(self , x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu1(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out = out + residual  # Residual connection
        out = self.relu2(out)
        out = self.pool(out)
        return out

class Baseline(nn.Module):
    def __init__(self, sequence_len, n_classes, n_blocks=5):
        super().__init__()
        self.conv1 = nn.Conv1d(in_channels=12, out_channels=32, kernel_size=5,stride=1, padding=0)
        self.bn1 = nn.BatchNorm1d(32)
        self.residual_blocks = nn.Sequential(
            ResidualBlock(32),
            ResidualBlock(32),
            ResidualBlock(32),
            ResidualBlock(32),
            ResidualBlock(32)
        )
        self.classifier = nn.Sequential(
            nn.Linear(sequence_len, 32), # 20
            nn.ReLU(),
            nn.Linear(32, n_classes),
        )

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.residual_blocks(out)
        out = out.view(out.size(0), -1)  # Flatten to [batch_size, channels * sequence_length]

        return self.classifier(out)
        
 

Тест на работоспособность модели.

In [5]:
test_beat = np.load('./transformed_train/00269_hr_n1.npy')
print(test_beat.shape)
kernel_size = 16
stride = 2
n_block = 48
downsample_gap = 6
increasefilter_gap = 12
model = ResNet1D(
    in_channels=12, 
    base_filters=16, # 64 for ResNet1D, 352 for ResNeXt1D
    kernel_size=16, 
    stride=2, 
    groups=1, 
    n_block=12, 
    n_classes=1) 


test_beat = test_beat.reshape((1,12,-1))
test_y = torch.tensor([[1.]])
criterion = nn.BCEWithLogitsLoss()
print("test beat shape", test_beat.shape)
res= model(torch.from_numpy(test_beat).float())
print(res.shape)
criterion(res, test_y)

(12, 500)
test beat shape (1, 12, 500)
torch.Size([1, 1])


tensor(0.8083, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)

In [6]:
import os
import pandas as pd
from torch.utils.data import Dataset

class DatasetECG(Dataset):
    def __init__(self, annotations_file, signals_dir):
        """
        annotantions_file - path to the annotations dataframe. 
                            First column should be name of the record, second - strat_fold then labels 
        
        signals_dir - path to the directory with transformed signals
        """
        self.signals_labels = pd.read_csv(annotations_file)
        self.signals_dir = signals_dir 

    def __len__(self):
        return len(self.signals_labels)

    def __getitem__(self, idx):
        signals_path = os.path.join(self.signals_dir, self.signals_labels.iloc[idx, 0]+ ".npy")
        signal = np.load(signals_path).astype(np.float32)        

        # iloc[idx, 2:] 2 is because first column is a record name
        labels = torch.from_numpy(self.signals_labels.iloc[idx, 2:].values.astype(int)).float()
        return signal, labels


In [7]:
train_dataset = DatasetECG("./train_annotations.csv", "transformed_train")
val_dataset = DatasetECG("./val_annotations.csv", "transformed_train")

In [8]:
from torch.utils.data import DataLoader

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=False, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=4)

In [9]:
class LitBaseline(pl.LightningModule):
    def __init__(self, model):
        super().__init__()
        self.model = model
        self.criterion = nn.BCEWithLogitsLoss()
        self.train_score = F1Score()
        self.accuracy = BinaryAccuracy()
        self.val_score = F1Score()

    def training_step(self, batch, batch_idx):
        # training_step defines the train loop.
        x, y = batch
        x = x.view(x.size(0),12, -1)
        pred = self.model(x)
        loss = self.criterion(pred, y)
        self.train_score(pred,y.to(torch.int))
        self.log("f1_score", self.train_score)
        self.log("loss", loss, prog_bar=True, logger=True, on_epoch=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        # this is the validation loop
        x, y = batch
        x = x.view(x.size(0),12, -1)
        pred = self.model(x)
        val_loss = self.criterion(pred, y)
        self.val_score(pred,y.to(torch.int))
        self.log("val_f1_score", self.val_score)
        self.log("val_loss", val_loss)


    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer
    


In [10]:
class CWT_ResNet(L.LightningModule):
    def __init__(self, model_name, model_hparams, optimizer_name, optimizer_hparams):
        """
        Inputs:
            model_name - Name of the model/CNN to run. Used for creating the model (see function below)
            model_hparams - Hyperparameters for the model, as dictionary.
            optimizer_name - Name of the optimizer to use. Currently supported: Adam, SGD
            optimizer_hparams - Hyperparameters for the optimizer, as dictionary. This includes learning rate, weight decay, etc.
        """
        super().__init__()
        # Exports the hyperparameters to a YAML file, and create "self.hparams" namespace
        self.save_hyperparameters()
        # Create model
        self.model = ResNet1D(**model_hparams)
        # Create loss module
        self.loss_module = nn.BCEWithLogitsLoss()
        self.train_score = BinaryF1Score()
        self.val_score = BinaryF1Score()
        self.test_score = BinaryF1Score()
        self.val_acc = BinaryAccuracy()
        self.train_acc = BinaryAccuracy()
        # Example input for visualizing the graph in Tensorboard
        self.example_input_array = torch.zeros((1, 12, 32, 32), dtype=torch.float32)

    def forward(self, imgs):
        # Forward function that is run when visualizing the graph
        return self.model(imgs)

    def configure_optimizers(self):
        # We will support Adam or SGD as optimizers.
        if self.hparams.optimizer_name == "Adam":
            # AdamW is Adam with a correct implementation of weight decay (see here
            # for details: https://arxiv.org/pdf/1711.05101.pdf)
            optimizer = optim.AdamW(self.parameters(), **self.hparams.optimizer_hparams)
        elif self.hparams.optimizer_name == "SGD":
            optimizer = optim.SGD(self.parameters(), **self.hparams.optimizer_hparams)
        else:
            assert False, f'Unknown optimizer: "{self.hparams.optimizer_name}"'

        # We will reduce the learning rate by 0.1 every milestone
        scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[65, 115, 150], gamma=0.1)
        return [optimizer], [scheduler]

    def training_step(self, batch, batch_idx):
        # "batch" is the output of the training data loader.
        imgs, labels = batch
        labels = np.squeeze(labels)
        preds = np.squeeze(self.model(imgs))
        loss = self.loss_module(preds, labels)
        self.train_acc(preds, np.squeeze(labels).to(torch.int))

        # Logs the accuracy per epoch to tensorboard (weighted average over batches)
        self.train_score(preds, labels.to(torch.int))
        self.log("train_f1_score", self.train_score)
        self.log("train_acc", self.train_acc, on_step=False, on_epoch=True)
        self.log("train_loss", loss)
        return loss  # Return tensor to call ".backward" on

    def validation_step(self, batch, batch_idx):
        imgs, labels = batch
        labels = np.squeeze(labels)
        preds = np.squeeze(self.model(imgs))
        self.val_acc(preds, labels.to(torch.int))
        # By default logs it per epoch (weighted average over batches)
        self.val_score(preds, labels.to(torch.int))
        self.log("val_f1_score", self.val_score)
        self.log("val_acc", self.val_acc)


In [11]:
class Lightning_ResNet1D(L.LightningModule):
    def __init__(self, model_name, model_hparams, optimizer_name, optimizer_hparams):
        """
        Inputs:
            model_name - Name of the model/CNN to run. Used for creating the model (see function below)
            model_hparams - Hyperparameters for the model, as dictionary.
            optimizer_name - Name of the optimizer to use. Currently supported: Adam, SGD
            optimizer_hparams - Hyperparameters for the optimizer, as dictionary. This includes learning rate, weight decay, etc.
        """
        super().__init__()
        # Exports the hyperparameters to a YAML file, and create "self.hparams" namespace
        self.save_hyperparameters()
        # Create model
        self.model = ResNet1D(**model_hparams)
        # Create loss module
        self.loss_module = nn.BCEWithLogitsLoss()
        self.train_score = BinaryF1Score()
        self.val_score = BinaryF1Score()
        self.test_score = BinaryF1Score()
        self.val_acc = BinaryAccuracy()
        self.train_acc = BinaryAccuracy()
        # Example input for visualizing the graph in Tensorboard
        self.example_input_array = torch.zeros((1, 12, 500), dtype=torch.float32)

    def forward(self, imgs):
        # Forward function that is run when visualizing the graph
        return self.model(imgs)

    def configure_optimizers(self):
        # We will support Adam or SGD as optimizers.
        if self.hparams.optimizer_name == "Adam":
            # AdamW is Adam with a correct implementation of weight decay (see here
            # for details: https://arxiv.org/pdf/1711.05101.pdf)
            optimizer = optim.AdamW(self.parameters(), **self.hparams.optimizer_hparams)
        elif self.hparams.optimizer_name == "SGD":
            optimizer = optim.SGD(self.parameters(), **self.hparams.optimizer_hparams)
        else:
            assert False, f'Unknown optimizer: "{self.hparams.optimizer_name}"'

        # We will reduce the learning rate by 0.1 every milestone
        scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[35,65, 115, 150], gamma=0.1)
        return [optimizer], [scheduler]

    def training_step(self, batch, batch_idx):
        # "batch" is the output of the training data loader.
        imgs, labels = batch
        labels = np.squeeze(labels)
        preds = np.squeeze(self.model(imgs))
        loss = self.loss_module(preds, labels)
        self.train_acc(preds, np.squeeze(labels).to(torch.int))

        self.train_score(preds, labels.to(torch.int))
        self.log("train_f1_score", self.train_score)
        self.log("train_acc", self.train_acc, on_step=False, on_epoch=True)
        self.log("train_loss", loss)
        return loss  

    def validation_step(self, batch, batch_idx):
        imgs, labels = batch
        labels = np.squeeze(labels)
        preds = np.squeeze(self.model(imgs))
        self.val_acc(preds, labels.to(torch.int))
        self.val_score(preds, labels.to(torch.int))
        self.log("val_f1_score", self.val_score)
        self.log("val_acc", self.val_acc)
        
    def predict_step(self, batch, batch_idx):
        preds = np.squeeze(self(batch))
        return preds


In [29]:
def train_model(model_name, train_continue = True, save_name=None, pretrained_filename="", **kwargs):
    """
    Inputs:
        model_name - Name of the model you want to run. Is used to look up the class in "model_dict"
        save_name (optional) - If specified, this name will be used for creating the checkpoint and logging directory.
    """
    if save_name is None:
        save_name = model_name

    # Create a PyTorch Lightning trainer with the generation callback
    curr_model_save_path = os.path.join(CHECKPOINT_PATH, save_name)
    trainer = L.Trainer(
        check_val_every_n_epoch=2,
        default_root_dir=os.path.join(CHECKPOINT_PATH, save_name),  # Where to save models
        # We run on a single GPU (if possible)
        accelerator="auto",
        devices=1,
        # How many epochs to train for if no patience is set
        max_epochs=180,
        callbacks=[
            ModelCheckpoint(
                mode="max", monitor="val_f1_score", save_top_k=2,
            ), 
            EarlyStopping(monitor="val_f1_score", mode="max", patience=10),
            LearningRateMonitor("epoch"),
        ],  # Log learning rate every epoch
    ) 
    trainer.logger._log_graph = True  # If True, we plot the computation graph in tensorboard
    trainer.logger._default_hp_metric = None  # Optional logging argument that we don't need

    # Check whether pretrained model exists. If yes, load it and skip training
    if os.path.isfile(pretrained_filename):
        print(f"Found pretrained model at {pretrained_filename}, loading...")
        # Automatically loads the model with the saved hyperparameters
        model = Lightning_ResNet1D.load_from_checkpoint(pretrained_filename)
    else:
        if(pretrained_filename!=""):
            print("FAILED TO LOAD A MODEL")
            return
        L.seed_everything(42)  # To be reproducable
        if train_continue:
            default_root_dir = os.path.join(CHECKPOINT_PATH, save_name) 
            default_root_dir = os.path.join(default_root_dir, "lightning_logs")
            continue_path = os.path.join(default_root_dir, os.listdir(default_root_dir)[-1])
            continue_path = os.path.join(continue_path, "checkpoints")
            continue_path = os.path.join(continue_path, os.listdir(continue_path)[-1])


        model = Lightning_ResNet1D(model_name=model_name, **kwargs)
        if train_continue:
            trainer.fit(model, train_loader, val_loader, ckpt_path=continue_path)
        else:
            trainer.fit(model, train_loader, val_loader,)
        model = Lightning_ResNet1D.load_from_checkpoint(
            trainer.checkpoint_callback.best_model_path
        )  # Load best checkpoint after training

    # Test best model on validation and test set
    val_result = trainer.validate(model, dataloaders=val_loader, verbose=False)
    result = {"val_acc": val_result[0]["val_acc"], "val_f1_score": val_result[0]["val_f1_score"]}
    
    
    return model, result, curr_model_save_path

In [31]:

resnet_model, resnet_results, curr_model_save_path = train_model(
    train_continue=False,
    pretrained_filename="",
    model_name="ResNet1D",
    save_name="ResNet1D_denoising_level3", 
    model_hparams={"n_classes": 1, "base_filters": 16, "kernel_size":16, "stride":2, "groups":1, "n_block":12, "in_channels":12},
    optimizer_name="Adam",
    optimizer_hparams={"lr": 0.0001,  "weight_decay": 1e-4},
) 


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Global seed set to 42
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type              | Params | In sizes     | Out sizes
-----------------------------------------------------------------------------
0 | model       | ResNet1D          | 653 K  | [1, 12, 500] | [1, 1]   
1 | loss_module | BCEWithLogitsLoss | 0      | ?            | ?        
2 | train_score | BinaryF1Score     | 0      | ?            | ?        
3 | val_score   | BinaryF1Score     | 0      | ?            | ?        
4 | test_score  | BinaryF1Score     | 0      | ?            | ?        
5 | val_acc     | BinaryAccuracy    | 0      | ?            | ?        
6 | train_acc   | BinaryAccuracy    | 0      | ?            | ?        
-----------------------------------------------------------------------------
653 K     Trainable params
0         Non-trainable 

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Validation: 0it [00:00, ?it/s]

In [14]:
resnet_results

{'val_acc': 0.771377444267273, 'val_f1_score': 0.8122245073318481}

Predict тренировочных данных и тестовых данных.


In [32]:
from tqdm.notebook import tqdm
def predict(beat_filename='transformed_train/17302_hr_n0.npy'):
    test_beat = np.load(beat_filename).astype(np.float32) 
    test_beat = test_beat.reshape((1,12,-1))
    res = resnet_model(torch.from_numpy(test_beat).float())
    return torch.sigmoid(res).item()

def run_test_predicts(curr_model_save_path):
    df_og = pd.read_csv("./test/test_meta.csv")
    test_annotations = pd.read_csv("./transformed_test_df.csv")
    preds = {}
    for name in tqdm(test_annotations["new_name"].values):
        record_name = name[:name.rfind("_")]
        if record_name not in preds:
            preds[record_name] = []
        preds[record_name].append(predict("./transformed_test/"+name+".npy"))
    preds_mean = {k:round((sum(v)/len(v))) for k,v in preds.items()}
    df_og["predict"] = df_og["record_name"].map(preds_mean)

    save_path = os.path.join(curr_model_save_path, "predicted_test.csv")
    df_og.to_csv(save_path)
    df_og.to_csv("./predicted_test.csv")
    print("Соотношение предсказанных классов:")
    display(df_og['predict'].value_counts(normalize=True))
    return df_og

def preds_train_df(curr_model_save_path):
    df = pd.read_csv("./transformed_df.csv")
    preds = {}
    for name in tqdm(df["new_name"].values):
        record_name = name[:name.rfind("_")]
        if record_name not in preds:
            preds[record_name] = []
        preds[record_name].append(predict("./transformed_train/"+name+".npy"))
    
    df_og = pd.read_csv("./train/train_gts.csv")
    
    preds_median = {k:np.median(np.array(v)) for k,v in preds.items()}
    df_og['predict'] = df_og["record_name"].map(preds_median)
    save_path = os.path.join(curr_model_save_path, "predicted_train.csv")
    df_og.to_csv(save_path)
    df_og.to_csv("./predicted_train.csv")
    print("Соотношение предсказанных классов:")
    display(df_og['predict'].value_counts(normalize=True))
    return preds, df_og


In [33]:
test_res = run_test_predicts(curr_model_save_path)
preds, df_og = preds_train_df(curr_model_save_path)

  0%|          | 0/4517 [00:00<?, ?it/s]

Соотношение предсказанных классов:


0    1.0
Name: predict, dtype: float64

  0%|          | 0/21595 [00:00<?, ?it/s]

Соотношение предсказанных классов:


0.465597    0.000952
0.465355    0.000952
0.469544    0.000952
0.468178    0.000952
0.467098    0.000952
              ...   
0.455319    0.000476
0.466875    0.000476
0.466798    0.000476
0.467700    0.000476
0.464812    0.000476
Name: predict, Length: 2095, dtype: float64

In [17]:
preds_mean = {k:(sum(v)/len(v)) for k,v in preds.items()}
preds_median = {k:np.median(np.array(v)) for k,v in preds.items()}
preds_max= {k:np.argmax(np.array(v)) for k,v in preds.items()}

In [18]:
def check(preds, df_og):
    tp,tn,fp,fn = 0,0,0,0

    for k, v in preds.items():
        predicted_class = round(v)
        actual_class = df_og[df_og["record_name"] == k]["myocard"].values[0]
        
        if actual_class == 1 and predicted_class == 1:
            tp += 1
        elif actual_class == 0 and predicted_class == 0:
            tn += 1
        elif actual_class == 0 and predicted_class == 1:
            fp += 1
        elif actual_class == 1 and predicted_class == 0:
            fn += 1
    precision = tp/(tp+fp)
    recall = tp/(tp+fn)
    tnr = tn/(tn+fp)
    print("accuracy", (tp + tn)/len(preds))
    print("true positive rate, recall", (tp/(tp+fn)))
    print("true negative rate", tnr)
    print("f1_score", 2*(precision*recall)/(precision+recall))

In [19]:
check(preds_mean, df_og)

ZeroDivisionError: division by zero

In [None]:
check(preds_median, df_og)

accuracy 0.8448357924797716
true positive rate, recall 0.9662650602409638
true negative rate 0.8149466192170819
f1_score 0.7109929078014183


In [None]:
check(preds_max, df_og)

accuracy 0.24083769633507854
true positive rate, recall 0.4107142857142857
true negative rate 0.6764705882352942
f1_score 0.24338624338624337
