In [None]:
%%capture
!pip install torchmetrics wandb

In [None]:
# Загрузка датасета
%%capture
!gdown 1UNqS5EORC5r38PY8OgoKvzLPQMBzXhRh
!unzip "R_peaks_train.zip"

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as ptl
import torchmetrics
from tqdm.auto import tqdm
from IPython.display import clear_output

import torch
from torch.utils.data import DataLoader, Dataset, random_split
from torch.optim import AdamW, lr_scheduler
from torchvision import models

from google.colab import drive
drive.mount('/content/drive')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


device(type='cuda')

In [None]:
!wandb login 20f90c141c14e3f8e2d80816e21a0960cf446951
import wandb


wandb.init(
    project="AIIJC",

    config={
        "architecture": "regnet",
        "dataset": "R_peaks",
    }
)

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


[34m[1mwandb[0m: Currently logged in as: [33mnotdiff[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Tracking run with wandb version 0.15.10
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/content/wandb/run-20230920_072719-x653r869[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33mcrimson-sea-112[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/notdiff/AIIJC[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/notdiff/AIIJC/runs/x653r869[0m


In [None]:
data = pd.read_csv('/content/R_peaks_train/labels.csv')

normals = data[data.myocard == 0][:7000]
myocard = data[data.myocard == 1]

data = pd.concat([normals, myocard]).sample(frac = 1)

In [None]:
class SignalsDataset(Dataset):
    def __init__(self, labels, path='/'):
        self.x_paths = [labels.iloc[i, 0] for i in range(len(labels))]
        self.labels = [labels.iloc[i, 1] for i in range(len(labels))]
        self.path = path

    def __len__(self):
        return len(self.x_paths)

    def __getitem__(self, idx):
        hr = torch.tensor(np.load(self.path + self.x_paths[idx] + '.npy'))[None, :, :]

        target = self.labels[idx]

        return hr, target

In [None]:
dataset = SignalsDataset(data, '/content/R_peaks_train/signals/')
train_data, val_data = random_split(dataset, [0.9, 0.1])

batch_size = 64

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True)

In [None]:
model = models.regnet.regnet_y_3_2gf(pretrained=True)

model.stem[0] = torch.nn.Conv2d(1, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)

model.fc = torch.nn.Sequential(
    torch.nn.Dropout(p=0.1, inplace=True),
    torch.nn.ReLU(),
    torch.nn.Linear(in_features=model.fc.in_features, out_features=2, bias=True),
)

for param in model.parameters():
     param.requires_grad = True

model.to(device)
None

Downloading: "https://download.pytorch.org/models/regnet_y_3_2gf-b5a9779c.pth" to /root/.cache/torch/hub/checkpoints/regnet_y_3_2gf-b5a9779c.pth
100%|██████████| 74.6M/74.6M [00:00<00:00, 88.1MB/s]


In [None]:
loss_fn = torch.nn.CrossEntropyLoss()

learning_rate = 3e-4
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)

In [None]:
def train(model, loss_fn, scheduler, optimizer, n_epoch=3, device='cuda'):
    train_losses = []
    val_losses = []
    train_acc = []
    val_acc = []
    val_f1 = []

    max_f1 = 0

    val_accuracy, val_loss, val_f1_score = evaluate(model, val_loader, loss_fn=loss_fn, device=device)
    wandb.log({"F1": val_f1_score, "Acc": val_accuracy, 'loss': val_loss})

    for epoch in range(n_epoch):
        print("Epoch:", epoch+1)



        model = model.train()
        for batch in tqdm(train_loader):
            X_batch, y_batch = batch
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)

            logits = model(X_batch.float())

            loss = loss_fn(logits, y_batch.to(torch.int64))

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            train_losses.append(loss.item())

            model_answers = torch.argmax(logits, dim=1)
            train_accuracy = torch.sum(y_batch == model_answers) / len(y_batch)
            train_acc.append(train_accuracy.item())


        model.eval()

        val_accuracy, val_loss, val_f1_score = evaluate(model, val_loader, loss_fn=loss_fn, device=device)
        wandb.log({"F1": val_f1_score, "Acc": val_accuracy, 'loss': val_loss})
        clear_output(wait=True)

        if max_f1 < val_f1_score:
            max_f1 = val_f1_score
            torch.save(model.state_dict(), f'/content/drive/MyDrive/models/regnet_y_3_2gf_Rp_resampled_model_f1:{val_f1_score:.3f}_7000.pth')


        val_losses.append(val_loss.item())
        val_acc.append(val_accuracy)
        scheduler.step(val_loss)

def evaluate(model, dataloader, loss_fn, device):
    losses = []
    num_correct = 0
    num_elements = 0
    f1 = torchmetrics.F1Score(task='binary')
    f1_score = 0

    for batch in tqdm(dataloader):
        X_batch, y_batch = batch
        X_batch, y_batch = X_batch.to(device), y_batch.to(device).float()

        with torch.no_grad():
            logits = model(X_batch.float())

            loss = loss_fn(logits, y_batch.to(torch.int64))
            losses.append(loss.item())

            y_pred = torch.argmax(logits, dim=1)

            f1_score += f1(y_pred.cpu(), y_batch.cpu())

            num_elements += len(y_batch)
            num_correct += torch.sum(y_pred == y_batch)

    accuracy = num_correct / num_elements
    f1_score = f1_score / len(dataloader)

    return accuracy.item(), np.mean(losses), f1_score.item()

In [None]:
train(model, loss_fn, scheduler, optimizer, n_epoch=40, device=device)

Epoch: 5


  0%|          | 0/158 [00:00<?, ?it/s]