In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import torch.nn.functional as F
import torch.nn as nn
import torch
#import pytorch_lightning as pl
import lightning.pytorch as pl
import torchmetrics

import pandas as pd
import numpy as np

from IPython.display import HTML, display
import os
from types import SimpleNamespace

from torchmetrics.classification import F1Score, BinaryF1Score, MulticlassF1Score
from torchmetrics.classification.accuracy import Accuracy, BinaryAccuracy
import matplotlib
import matplotlib.pyplot as plt
import matplotlib_inline.backend_inline
import numpy as np
import seaborn as sns
import tabulate
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
%matplotlib inline



#import pytorch_lightning.callbacks.
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks.lr_monitor import LearningRateMonitor
from lightning.pytorch.callbacks import LearningRateMonitor
from lightning.pytorch.callbacks import ModelCheckpoint

from resnet1d import ResNet1D

In [3]:
torch.cuda.empty_cache()

In [4]:
matplotlib_inline.backend_inline.set_matplotlib_formats(
    "svg", "pdf")  # For export
matplotlib.rcParams["lines.linewidth"] = 2.0
sns.reset_orig()


RANDOM_STATE = 42
# Path to the folder where the pretrained models are saved
CHECKPOINT_PATH = "./saved_models/ConvNets/"


# Function for setting the seed
pl.seed_everything(42)

# Ensure that all operations are deterministic on GPU (if used) for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

device = torch.device(
    "cuda:0") if torch.cuda.is_available() else torch.device("cpu")
task = 'multiclass'


Seed set to 42


Тест на работоспособность модели.

In [5]:
test_beat = np.load('./transformed_train/00269_hr_n1.npy')
kernel_size = 16
stride = 2
n_block = 48
downsample_gap = 6
increasefilter_gap = 12
model = ResNet1D(
    in_channels=12, 
    base_filters=16, # 64 for ResNet1D, 352 for ResNeXt1D
    kernel_size=16, 
    stride=2, 
    groups=1, 
    n_block=12, 
    n_classes=1) 


test_beat = test_beat.reshape((1,12,-1))
test_y = torch.tensor([[1.]])
criterion = nn.BCELoss()
print("test beat shape", test_beat.shape)
res= model(torch.from_numpy(test_beat).float())
print(res.shape)
criterion(res, test_y)

test beat shape (1, 12, 500)
torch.Size([1, 1])


tensor(0., grad_fn=<BinaryCrossEntropyBackward0>)

In [6]:
import os
import pandas as pd
from torch.utils.data import Dataset

class DatasetECG(Dataset):
    def __init__(self, annotations_file, signals_dir):
        """
        annotantions_file - path to the annotations dataframe. 
                            First column should be name of the record, second - strat_fold then labels 
        
        signals_dir - path to the directory with transformed signals
        """
        self.signals_labels = pd.read_csv(annotations_file)
        #self.signals_labels = self.signals_labels[self.signals_labels["норма"] != 1]
        self.signals_dir = signals_dir 

    def __len__(self):
        return len(self.signals_labels)

    def __getitem__(self, idx):
        signals_path = os.path.join(self.signals_dir, self.signals_labels.iloc[idx, 0]+ ".npy")
        signal = np.load(signals_path).astype(np.float32)        

        # iloc[idx, 2:] 2 is because first column is a record name
        # label = "".join(self.signals_labels.iloc[idx, 2:].values.astype(str).tolist())
        labels = torch.from_numpy(self.signals_labels.iloc[idx, 2:].values.astype(int)).float()
        if(task=='multiclass'):
            labels = labels.argmax()
        # label = self.targets.index(label)
        # encoded_label = torch.zeros(13)
        # encoded_label[label] = 1
        return signal, labels


In [7]:
PREFIX_TRAIN = './transformed_train2/'
PREFIX_VAL ='./transformed_train/'

train_dataset = DatasetECG("./train_annotations.csv", PREFIX_TRAIN)
val_dataset = DatasetECG("./val_annotations.csv", PREFIX_VAL)

In [8]:
val_dataset[0]

(array([[-0.01595599, -0.01452019, -0.01285886, ..., -0.05533369,
         -0.05917533, -0.05846827],
        [ 0.00149945,  0.0019128 ,  0.0026792 , ..., -0.03774664,
         -0.03252999, -0.02398809],
        [ 0.00999824,  0.00886223,  0.00809438, ...,  0.00065544,
         -0.00274447, -0.00584236],
        ...,
        [-0.1298285 , -0.12982513, -0.12974711, ..., -0.04617563,
         -0.0457119 , -0.04531983],
        [-0.10032634, -0.10036746, -0.10019438, ..., -0.04906217,
         -0.04845833, -0.04787983],
        [-0.06980778, -0.07076729, -0.07136829, ..., -0.0260726 ,
         -0.02434688, -0.02278278]], dtype=float32),
 tensor(6))

In [9]:
from torch.utils.data import DataLoader

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True) #, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False) #, num_workers=4)

### init trainer class

In [10]:

class Lightning_ResNet1D(pl.LightningModule):
    def __init__(self, model_name, model_hparams, optimizer_name, optimizer_hparams, model_class=ResNet1D, task="MULTILABEL"):
        """
        Inputs:
            model_name - Name of the model/CNN to run. Used for creating the model (see function below)
            model_hparams - Hyperparameters for the model, as dictionary.
            optimizer_name - Name of the optimizer to use. Currently supported: Adam, SGD
            optimizer_hparams - Hyperparameters for the optimizer, as dictionary. This includes learning rate, weight decay, etc.
        """
        super().__init__()
        # Exports the hyperparameters to a YAML file, and create "self.hparams" namespace
        self.save_hyperparameters()
        # Create model
        self.model = model_class(**model_hparams)
        # Create loss module
        self.loss_module = nn.CrossEntropyLoss()
        self.train_score = F1Score(task=task, num_classes=model_hparams["n_classes"], top_k=1)
        self.val_score = MulticlassF1Score(task=task, num_classes=model_hparams["n_classes"], top_k=1)
        self.test_score = F1Score(task=task, num_labels=model_hparams["n_classes"], num_classes=model_hparams["n_classes"], top_k=1)
        self.val_acc = Accuracy(task=task, num_classes=model_hparams["n_classes"], top_k=1)
        self.train_acc = Accuracy(task=task, num_labels=model_hparams["n_classes"], num_classes=model_hparams["n_classes"], top_k=1)
        

        # Example input for visualizing the graph in Tensorboard
        self.example_input_array = torch.zeros((1, 12, 500), dtype=torch.float32)

    def forward(self, imgs):
        # Forward function that is run when visualizing the graph
        return self.model(imgs)

    def configure_optimizers(self):
        # We will support Adam or SGD as optimizers.
        if self.hparams.optimizer_name == "Adam":
            # AdamW is Adam with a correct implementation of weight decay (see here
            # for details: https://arxiv.org/pdf/1711.05101.pdf)
            optimizer = optim.AdamW(self.parameters(), **self.hparams.optimizer_hparams)
        elif self.hparams.optimizer_name == "SGD":
            optimizer = optim.SGD(self.parameters(), **self.hparams.optimizer_hparams)
        else:
            assert False, f'Unknown optimizer: "{self.hparams.optimizer_name}"'

        # We will reduce the learning rate by 0.1 every milestone
        scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[35,65, 115, 150], gamma=0.1)
        return [optimizer], [scheduler]

    def training_step(self, batch, batch_idx):
        # "batch" is the output of the training data loader.
        self.model.train()
        imgs, labels = batch
        labels = np.squeeze(labels)
        preds = np.squeeze(self.model(imgs))
        loss = self.loss_module(preds, labels)
        #if len(preds.shape) < 2:
        # print(preds.shape, labels.shape)
        if False:
            preds = preds.argmax(axis=1)
            labels = labels.to(torch.int).argmax(axis=1)
            
        self.train_acc(preds, labels.to(torch.int))

        self.train_score(preds, labels.to(torch.int))
        # print(preds, labels.to(torch.int))
        # print(preds.argmax(axis=1), labels.to(torch.int).argmax(axis=1))
        self.log("train_f1_score", self.train_score)
        self.log("train_acc", self.train_acc, on_step=False, on_epoch=True)
        self.log("train_loss", loss)
        return loss  

    def validation_step(self, batch, batch_idx):
        self.model.eval()
        imgs, labels = batch
        labels = np.squeeze(labels)
        preds = np.squeeze(self.model(imgs))

        #preds = preds.view(1, -1)
        #labels = labels.view(1, -1)
        # print(torch.round(preds, decimals=2))
        #if len(preds.shape) < 2:
        #    preds = preds.view(-1, 1, -1)
        #    labels = labels.view(1, -1)
        # print(torch.round(preds, decimals=2))
        # print(torch.round(labels, decimals=2))
        # print(preds)
        # print(labels)
        self.val_acc(preds, labels.to(torch.int))
        # print(preds)
        # print(labels)
        # print(preds.shape, labels.shape)
        # print(preds.argmax(axis=1), labels.to(torch.int).argmax(axis=1))
        self.val_score(preds, labels.to(torch.int))
        self.log("val_f1_score", self.val_score)
        self.log("val_acc", self.val_acc)
        
    def predict_step(self, batch, batch_idx):
        preds = np.squeeze(self(batch))
        return preds


In [11]:
def train_model(model_name, model_class=ResNet1D,num_epochs=None, train_continue = True, save_name=None, pretrained_filename="", model_hparams=None, optimizer_hparams=None, optimizer_name=None, task=None):
    """
    Inputs:
        model_name - Name of the model you want to run. Is used to look up the class in "model_dict"
        save_name (optional) - If specified, this name will be used for creating the checkpoint and logging directory.
    """
    if save_name is None:
        save_name = model_name

    curr_model_save_path = os.path.join(CHECKPOINT_PATH, save_name)
    trainer = pl.Trainer(
        check_val_every_n_epoch=2,
        default_root_dir=os.path.join(CHECKPOINT_PATH, save_name),  # Where to save models
        # We run on a single GPU (if possible)
        accelerator="auto",
        devices=1,
        # How many epochs to train for if no patience is set
        max_epochs=num_epochs,
        callbacks=[
            ModelCheckpoint(
                mode="max", monitor="val_f1_score", save_top_k=2,
            ), 
            EarlyStopping(monitor="val_f1_score", mode="max", patience=15),
            LearningRateMonitor("epoch"),
        ],  # Log learning rate every epoch
    ) 
    trainer.logger._log_graph = True  # If True, we plot the computation graph in tensorboard
    trainer.logger._default_hp_metric = None  # Optional logging argument that we don't need

    # Check whether pretrained model exists. If yes, load it and skip training
    if os.path.isfile(pretrained_filename):
        print(f"Found pretrained model at {pretrained_filename}, loading...")
        # Automatically loads the model with the saved hyperparameters
        model = Lightning_ResNet1D.load_from_checkpoint(pretrained_filename)
    else:
        if(pretrained_filename!=""):
            print("FAILED TO LOAD A MODEL")
            return
        # L.seed_everything(42)  # To be reproducable
        if train_continue:
            default_root_dir = os.path.join(CHECKPOINT_PATH, save_name) 
            default_root_dir = os.path.join(default_root_dir, "lightning_logs")
            continue_path = os.path.join(default_root_dir, os.listdir(default_root_dir)[-1])
            continue_path = os.path.join(continue_path, "checkpoints")
            continue_path = os.path.join(continue_path, os.listdir(continue_path)[-1])


        model = Lightning_ResNet1D(model_name=model_name, model_class=model_class,model_hparams=model_hparams, 
                                   optimizer_hparams=optimizer_hparams, optimizer_name=optimizer_name, task=task)
        if train_continue:
            trainer.fit(model, train_loader, val_loader, ckpt_path=continue_path)
        else:
            trainer.fit(model, train_loader, val_loader,)
        model = Lightning_ResNet1D.load_from_checkpoint(
            trainer.checkpoint_callback.best_model_path
        )  # Load best checkpoint after training

    # Test best model on validation and test set
    val_result = trainer.validate(model, dataloaders=val_loader, verbose=False)
    result = {"val_acc": val_result[0]["val_acc"], "val_f1_score": val_result[0]["val_f1_score"]}
    
    
    return model, result, curr_model_save_path

In [12]:

# pretrained_filename="saved_models/ConvNets/ResNet1D_denoising_level2/lightning_logs/version_3/checkpoints/epoch=63-step=23104.ckpt",     
#удалить аргумент pretrained filename, для тренировки заново
resnet_model, resnet_results, curr_model_save_path = train_model(
    train_continue=False,
    num_epochs=50,
    #pretrained_filename="saved_models/ConvNets/Final_fixed_val_added_norm/lightning_logs/version_0/checkpoints/epoch=31-step=65824.ckpt",     
    model_name="ResNet1D_v2",
    model_class=ResNet1D,
    save_name="Final_added_augs", 
    model_hparams={"n_classes": 7, "base_filters": 16, "kernel_size": 16, "stride": 2, "groups": 1, "n_block": 12, "in_channels": 12},
    optimizer_name="Adam",
    optimizer_hparams={"lr": 0.0001,  "weight_decay": 1e-4},
    task="multiclass"
) 


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type               | Params | In sizes     | Out sizes
------------------------------------------------------------------------------
0 | model       | ResNet1D           | 653 K  | [1, 12, 500] | [1, 7]   
1 | loss_module | CrossEntropyLoss   | 0      | ?            | ?        
2 | train_score | MulticlassF1Score  | 0      | ?            | ?        
3 | val_score   | MulticlassF1Score  | 0      | ?            | ?        
4 | test_score  | MulticlassF1Score  | 0      | ?            | ?        
5 | val_acc     | MulticlassAccuracy | 0      | ?            | ?        
6 | train_acc   | MulticlassAccuracy | 0      | ?            | ?        
------------------------------------------------------------------------------
653 K     Trainable params
0         Non-trainable params
653 K

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=50` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Validation: |          | 0/? [00:00<?, ?it/s]

In [25]:
resnet_results

{'val_acc': 0.6364899277687073, 'val_f1_score': 0.2566266357898712}

In [14]:
from tqdm.notebook import tqdm
from scipy.stats import mode

resnet_model.eval()

def predict(beat_filename):
    test_beat = np.load(beat_filename).astype(np.float32) 
    test_beat = test_beat.reshape((1,12,-1))
    res = resnet_model(torch.from_numpy(test_beat).float())
    return res.detach().numpy()

def run_test_predicts(curr_model_save_path, treshhold=0.5):
    df_og = pd.read_csv("./train/train_meta.csv")
    test_annotations = pd.read_csv("./val_annotations.csv")
    test_annotations.drop_duplicates(inplace=True)
    preds = {}
    for name in tqdm(test_annotations["new_name"]):
        record_name = name[:name.rfind("_")]
        if record_name not in preds:
            preds[record_name] = []
        pred = np.argmax(predict("./transformed_train/"+name+".npy"))
        preds[record_name].append(pred)

    preds_most_freq = {}
    for name, preds in preds.items(): 
        most_freq_pred = np.argmax(np.bincount(preds))   
        preds_most_freq[name] = most_freq_pred

    df_og["predict"] = df_og["record_name"].map(preds_most_freq)
    return df_og, preds_most_freq


In [15]:
test_res, d = run_test_predicts(curr_model_save_path)

  0%|          | 0/4319 [00:00<?, ?it/s]

In [16]:
test_res = test_res[~test_res.predict.isna()]
test_res

Unnamed: 0,patient_id,age,sex,height,weight,record_name,predict
0,18792.0,55.0,0,,70.0,00009_hr,6.0
3,18153.0,35.0,0,,82.0,00052_hr,6.0
4,16063.0,26.0,0,,93.0,00057_hr,6.0
23,14751.0,37.0,0,,,00209_hr,6.0
27,15351.0,25.0,0,,75.0,00247_hr,6.0
...,...,...,...,...,...,...,...
2091,20990.0,37.0,1,,,21744_hr,0.0
2094,9993.0,74.0,0,,,21774_hr,6.0
2097,10162.0,68.0,0,,,21795_hr,6.0
2098,11197.0,59.0,0,,,21825_hr,6.0


In [17]:
predicts = pd.DataFrame(test_res.predict.tolist())

label2pred = {"перегородочный":0, "передний":1, "боковой":2, "передне-боковой":3, "передне-перегородочный":4, "нижний":5, "норма":6}
pred2label = {v:k for k, v in label2pred.items()}

unique_values = predicts[0].unique() 
unique_values = np.append(unique_values, 2)
for value in unique_values:
    column_name = pred2label[value]
    predicts[column_name] = (predicts[0] == value).astype(int)
predicts = predicts.drop(columns=[0])
predicts

Unnamed: 0,норма,передний,перегородочный,нижний,передне-перегородочный,передне-боковой,боковой
0,1,0,0,0,0,0,0
1,1,0,0,0,0,0,0
2,1,0,0,0,0,0,0
3,1,0,0,0,0,0,0
4,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...
415,0,0,1,0,0,0,0
416,1,0,0,0,0,0,0
417,1,0,0,0,0,0,0
418,1,0,0,0,0,0,0


In [18]:
val_annotations = pd.read_csv("./train/train_gts_final.csv")

In [19]:
test_res2 = test_res.merge(val_annotations, on="record_name", how="left")

In [20]:
test_res2

Unnamed: 0,patient_id,age,sex,height,weight,record_name,predict,перегородочный,передний,боковой,передне-боковой,передне-перегородочный,нижний,норма
0,18792.0,55.0,0,,70.0,00009_hr,6.0,0,0,0,0,0,0,1
1,18153.0,35.0,0,,82.0,00052_hr,6.0,0,0,0,0,0,0,1
2,16063.0,26.0,0,,93.0,00057_hr,6.0,0,0,0,0,0,0,1
3,14751.0,37.0,0,,,00209_hr,6.0,0,0,0,0,0,0,1
4,15351.0,25.0,0,,75.0,00247_hr,6.0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
415,20990.0,37.0,1,,,21744_hr,0.0,0,0,0,0,0,0,1
416,9993.0,74.0,0,,,21774_hr,6.0,0,0,0,0,0,0,1
417,10162.0,68.0,0,,,21795_hr,6.0,0,0,0,0,0,0,1
418,11197.0,59.0,0,,,21825_hr,6.0,0,0,0,0,0,0,1


In [21]:
from sklearn.metrics import f1_score

summs = 0
for i, column in enumerate(["перегородочный", "передний", "боковой", "передне-боковой", 
                             "передне-перегородочный", "нижний", "норма"]):
    print(column, "F1", f1_score(test_res2[column], predicts[column]))
    summs += f1_score(test_res2[column], predicts[column])
"TOTAL", summs / 7

перегородочный F1 0.10810810810810811
передний F1 0.06666666666666667
боковой F1 0.0
передне-боковой F1 0.16666666666666666
передне-перегородочный F1 0.23376623376623373
нижний F1 0.40449438202247195
норма F1 0.8157894736842105


('TOTAL', 0.2564987901306225)

In [22]:
pd.read_csv("./train_annotations.csv").sum()

new_name                  00269_hr_n000269_hr_n100269_hr_n200269_hr_n300...
target                                                                97088
перегородочный                                                       2624.0
передний                                                             2624.0
боковой                                                                 0.0
передне-боковой                                                      5248.0
передне-перегородочный                                               2624.0
нижний                                                               1312.0
норма                                                                  1312
dtype: object

## PREDICT

In [23]:
# test_res[~test_res.predict.isna()]["predict"].to_list()

Predict тренировочных данных и тестовых данных.


Класс еще не доделал

In [24]:
#from augmentation import 
class Predictor():
    def __init__(self,curr_model_save_path, test_df_path="./test/test_meta.csv",  tta=True) -> None:
        """
        curr_model_save_path: example: 
        """

        resnet_model.eval()
        self.curr_model_save_path = curr_model_save_path
        self.test_df_path = test_df_path
    
    def TTA():

        pass

    def _predict(self, beat_filename):
        test_beat = np.load(beat_filename).astype(np.float32) 
        test_beat = test_beat.reshape((1,12,-1))
        res = resnet_model(torch.from_numpy(test_beat).float())
        return res.detach().numpy()

    def run_test_predicts_multiclass(self):
        df_og = pd.read_csv(self.test_df_path)
        test_annotations = pd.read_csv("./transformed_test_df.csv")
        preds = {}
        for name in tqdm(test_annotations["new_name"].values):
            record_name = name[:name.rfind("_")]
            if record_name not in preds:
                preds[record_name] = []
            pred = np.argmax(predict("./transformed_test/"+name+".npy"))
            preds[record_name].append(pred)
        
        preds_most_freq = {}
        for name, preds in preds.items(): 
            most_freq_pred = np.argmax(np.bincount(preds))   
            preds_most_freq[name] = most_freq_pred

        df_og["predict"] = df_og["record_name"].map(preds_most_freq)

        label2pred = {"перегородочный":0, "передний":1, "боковой":2, "передне-боковой":3, "передне-перегородочный":4, "нижний":5, "норма":6}
        pred2label = {v:k for k, v in label2pred.items()}
        
        unique_values = df_og['predict'].unique() 
        unique_values = np.append(unique_values, 2)
        for value in unique_values:
            column_name = pred2label[value]
            df_og[column_name] = (df_og['predict'] == value).astype(int)

        df_og["target"] = df_og["record_name"].map(preds_most_freq)
        
        save_path = os.path.join(curr_model_save_path, "predicted_test.csv")
        df_og.to_csv(save_path)
        df_og.to_csv("./predicted_test.csv")
        print("Соотношение предсказанных классов:")
        display(df_og['target'].apply(round).value_counts(normalize=True))
        return df_og

    def run_test_predicts_multilabel(self):
        df_og = pd.read_csv(self.test_df_path)
        test_annotations = pd.read_csv("./transformed_test_df.csv")
        preds = {}
        for name in tqdm(test_annotations["new_name"].values):
            record_name = name[:name.rfind("_")]
            if record_name not in preds:
                preds[record_name] = []
            pred = np.argmax(predict("./transformed_test/"+name+".npy"))
            preds[record_name].append(pred)

        #TODO: dopisat'

        preds_median = {}
        df_og["target"] = df_og["record_name"].map(preds_median)
        
        save_path = os.path.join(curr_model_save_path, "predicted_test.csv")
        df_og.to_csv(save_path)
        df_og.to_csv("./predicted_test.csv")
        print("Соотношение предсказанных классов:")
        display(df_og['target'].apply(round).value_counts(normalize=True))
        return df_og



SyntaxError: invalid syntax (275528591.py, line 1)

In [26]:
from tqdm.notebook import tqdm
from scipy.stats import mode

resnet_model.eval()

def predict(beat_filename):
    test_beat = np.load(beat_filename).astype(np.float32) 
    test_beat = test_beat.reshape((1,12,-1))
    res = resnet_model(torch.from_numpy(test_beat).float())
    return res.detach().numpy()

def run_test_predicts(curr_model_save_path):
    df_og = pd.read_csv("./test/test_meta.csv")
    test_annotations = pd.read_csv("./transformed_test_df.csv")
    preds = {}
    for name in tqdm(test_annotations["new_name"].values):
        record_name = name[:name.rfind("_")]
        if record_name not in preds:
            preds[record_name] = []
        pred = np.argmax(predict("./transformed_test/"+name+".npy"))
        preds[record_name].append(pred)
    
    preds_most_freq = {}
    for name, preds in preds.items(): 
        most_freq_pred = np.argmax(np.bincount(preds))   
        preds_most_freq[name] = most_freq_pred

    df_og["predict"] = df_og["record_name"].map(preds_most_freq)

    #print(np.round(np.array(preds["00127_hr"]).mean(axis=0), 2))
    #preds_median = {k: np.array(v).mean(axis=0) > 0.5 for k,v in preds.items()}
    #df_og["predict"] = df_og["record_name"].map(preds_median)
    label2pred = {"перегородочный":0, "передний":1, "боковой":2, "передне-боковой":3, "передне-перегородочный":4, "нижний":5, "норма":6}
    pred2label = {v:k for k, v in label2pred.items()}
    
    unique_values = df_og['predict'].unique() 
    unique_values = np.append(unique_values, 2)
    for value in unique_values:
        column_name = pred2label[value]
        df_og[column_name] = (df_og['predict'] == value).astype(int)

    #preds_median = {k: np.array(v).mean(axis=0).argmax() for k,v in preds.items()}
    df_og["target"] = df_og["record_name"].map(preds_most_freq)
    
    save_path = os.path.join(curr_model_save_path, "predicted_test.csv")
    df_og.to_csv(save_path)
    df_og.to_csv("./predicted_test.csv")
    print("Соотношение предсказанных классов:")
    display(df_og['target'].apply(round).value_counts(normalize=True))
    return df_og

def preds_train_df(curr_model_save_path):
    df = pd.read_csv("./transformed_df.csv")
    preds = {}
    for name in tqdm(df["new_name"].values):
        record_name = name[:name.rfind("_")]
        if record_name not in preds:
            preds[record_name] = []
        preds[record_name].append(predict("./transformed_train/"+name+".npy"))
    
    df_og = pd.read_csv("../task_final/train/train_gts_final.csv")
    
    preds_median = {k:np.median(np.array(v)) for k,v in preds.items()}
    df_og['predict'] = df_og["record_name"].map(preds_median)
    save_path = os.path.join(curr_model_save_path, "predicted_train.csv")
    df_og.to_csv(save_path)
    df_og.to_csv("./predicted_train.csv")
    print("Соотношение предсказанных классов:")
    display(df_og['predict'].apply(round).value_counts(normalize=True))
    return preds, df_og


In [None]:
old_submit = pd.read_csv("old_submit.csv")

In [27]:
test_res = run_test_predicts(curr_model_save_path)
# preds, df_og = preds_train_df(curr_model_save_path)

one_hot_enc = np.zeros((449, 6))
for i in range(449):
    one_hot_enc[i] = test_res["predict"].iloc[i] * 1 + [0]

  0%|          | 0/4533 [00:00<?, ?it/s]

Соотношение предсказанных классов:


6    0.625835
5    0.120267
4    0.109131
0    0.084633
1    0.035635
3    0.024499
Name: target, dtype: float64

In [28]:
one_hot_enc = np.zeros((449, 6))
for i in range(449):
    one_hot_enc[i] = test_res["predict"].iloc[i] * 1 + [0]

In [None]:
one_hot_enc

array([[6., 6., 6., 6., 6., 6.],
       [6., 6., 6., 6., 6., 6.],
       [0., 0., 0., 0., 0., 0.],
       ...,
       [3., 3., 3., 3., 3., 3.],
       [6., 6., 6., 6., 6., 6.],
       [6., 6., 6., 6., 6., 6.]])

In [31]:
submit = test_res[["перегородочный","передний","боковой","передне-боковой","передне-перегородочный","нижний","норма","record_name"]]
#submit = pd.read_csv('saved_models/ConvNets/two_models/submit.csv')
sample = pd.read_csv("./sample.csv")
submit = sample.merge(submit, on="record_name", how="left")
submit = submit[["record_name", "перегородочный","передний","боковой","передне-боковой","передне-перегородочный","нижний","норма"]]
submit.to_csv(f"{curr_model_save_path}/submit.csv", index=False)

In [None]:
onehot_df = pd.DataFrame(one_hot_enc, columns=["перегородочный", "передний", "боковой", "передне-боковой", 
                             "передне-перегородочный", "нижний"])
onehot_df["record_name"] = test_res["record_name"]
subm_df = test_res.merge(onehot_df, on="record_name", how="left")
subm_df = subm_df.merge(old_submit, on="record_name", how="left")
subm_df["норма"] = subm_df["myocard"].map(lambda x: 0 if x == 1 else 1)
for column in ["перегородочный", "передний", "боковой", "передне-боковой", 
                             "передне-перегородочный", "нижний"]:
    subm_df[column] = subm_df['myocard'] & subm_df[column].astype(int)

NameError: name 'old_submit' is not defined

In [None]:
subm_df.head()

In [None]:
subm_df[["record_name", "перегородочный", "передний", "боковой", "передне-боковой", 
                             "передне-перегородочный", "нижний", "норма"]].to_csv("submit.csv", index=False)
subm_df[["record_name", "перегородочный", "передний", "боковой", "передне-боковой", 
                             "передне-перегородочный", "нижний", "норма"]]

Предсказание всех тренировочных данных

In [None]:
preds_mean = {k:(sum(v)/len(v)) for k,v in preds.items()}
preds_median = {k:np.median(np.array(v)) for k,v in preds.items()}
preds_max= {k:np.argmax(np.array(v)) for k,v in preds.items()}

In [None]:
def check(preds, df_og):
    tp,tn,fp,fn = 0,0,0,0

    for k, v in preds.items():
        predicted_class = round(v)
        actual_class = df_og[df_og["record_name"] == k]["myocard"].values[0]
        
        if actual_class == 1 and predicted_class == 1:
            tp += 1
        elif actual_class == 0 and predicted_class == 0:
            tn += 1
        elif actual_class == 0 and predicted_class == 1:
            fp += 1
        elif actual_class == 1 and predicted_class == 0:
            fn += 1
    precision = tp/(tp+fp)
    recall = tp/(tp+fn)
    tnr = tn/(tn+fp)
    print("accuracy", (tp + tn)/len(preds))
    print("true positive rate, recall", (tp/(tp+fn)))
    print("true negative rate", tnr)
    print("f1_score", 2*(precision*recall)/(precision+recall))

In [None]:
check(preds_mean, df_og)

In [None]:
check(preds_median, df_og)

In [None]:
check(preds_max, df_og)