# 1. Setup and Importing Libraries

In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset # wraps an iterable around the dataset
from torchvision import datasets    # stores the samples and their corresponding labels
from torchvision.transforms import transforms  # transformations we can perform on our dataset
from torchvision.transforms import ToTensor
import pandas as pd
import numpy as np
import os
#import wandb
import matplotlib.pyplot as plt

from torch.utils.tensorboard import SummaryWriter


import torch.optim as optim
import torch.nn.functional as F
# q: what is the difference between torch.nn.functional and torch.nn
# a: https://discuss.pytorch.org/t/what-is-the-difference-between-torch-nn-and-torch-nn-functional/33597/2

In [3]:
# Set API Key
os.environ["WANDB_API_KEY"] = "cf61e02cee13abdd3d8a232d29df527bd6cc7f89"

# Set the WANDB_NOTEBOOK_NAME environment variable to the name of your notebook (manually)
os.environ["WANDB_NOTEBOOK_NAME"] = "DataLoader.ipynb"

# set the WANDB_TEMP environment variable to a directory where we have write permissions
os.environ["WANDB_TEMP"] = os.getcwd()
os.environ["WANDB_DIR"] = os.getcwd()
os.environ["WANDB_CONFIG_DIR"] = os.getcwd()

In [3]:
wandb.init(project='ECG-analysis-with-Deep-Learning-on-GPU-accelerators')

AttributeError: module 'wandb' has no attribute 'init'

In [2]:
# Get cpu, gpu or mps device for training 
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)

In [3]:
torch.cuda.get_device_name(0)

'NVIDIA RTX A6000'

# 2. Data Loader

In [26]:
class ECGDataSet(Dataset):
    
    def __init__(self, split='train'):

        self.split = split

        # data loading
        current_directory = os.getcwd()
        self.parent_directory = os.path.dirname(current_directory)
        train_small_path = os.path.join(self.parent_directory, 'data/data', 'deepfake-ecg-small', str(self.split) + '.csv')
        self.df = pd.read_csv(train_small_path)  # Skip the header row
        
        # Avg RR interval
        # in milli seconds
        RR = torch.tensor(self.df['pr'].values, dtype=torch.float32)
        # calculate HR
        self.y = 60 * 1000/RR

        # Size of the dataset
        self.samples = self.df.shape[0]

    def __getitem__(self, index):
        
        # file path
        filename= self.df['patid'].values[index]
        asc_path = os.path.join(self.parent_directory, 'data/data', 'deepfake-ecg-small', str(self.split), str(filename) + '.asc')
        #print(asc_path)
        
        ecg_signals = pd.read_csv( asc_path, header=None, sep=" ") # read into dataframe
        ecg_signals = torch.tensor(ecg_signals.values) # convert dataframe values to tensor
        
        ecg_signals = ecg_signals.float()
        
        # Transposing the ecg signals
        ecg_signals = ecg_signals/6000 # normalization
        ecg_signals = ecg_signals.t() 
        
        qt = self.y[index]
        # Retrieve a sample from x and y based on the index
        return ecg_signals, qt

    def __len__(self):
        # Return the total number of samples in the dataset
        return self.samples
    

In [27]:
# ECG dataset
train_dataset = ECGDataSet(split='train')
validate_dataset = ECGDataSet(split='validate')

In [28]:
# first data
first_data = train_dataset[2]
x, y = first_data

In [29]:
x

tensor([[-0.0123, -0.0147, -0.0088,  ..., -0.0090, -0.0063, -0.0090],
        [-0.0053, -0.0075, -0.0115,  ..., -0.0048, -0.0043, -0.0097],
        [-0.0030,  0.0000, -0.0017,  ...,  0.0068,  0.0083,  0.0085],
        ...,
        [-0.0110, -0.0078, -0.0065,  ..., -0.0052, -0.0030, -0.0037],
        [-0.0127, -0.0162, -0.0115,  ..., -0.0115, -0.0108, -0.0167],
        [-0.0055, -0.0072, -0.0018,  ..., -0.0060, -0.0045, -0.0068]])

In [30]:
y

tensor(389.6104)

In [58]:
x.shape

torch.Size([8, 5000])

In [59]:
y.shape

torch.Size([])

# 3. Residual Convoluted Neural Network

In [31]:
# data loader
# It allows you to efficiently load and iterate over batches of data during the training or evaluation process.
train_dataloader = DataLoader(dataset=train_dataset, batch_size=8, shuffle=True, num_workers=20)
validate_dataloader = DataLoader(dataset=validate_dataset, batch_size=8, shuffle=False, num_workers=20)

# q: what is num_workers?
# A: num_workers (int, optional) – how many subprocesses to use for data loading. 0 means that the data will be loaded in the main process. (default: 0)

In [7]:
for x,y in train_dataloader:
    print(x.shape, y.shape)
    print(x.dtype, y.dtype)
    break

torch.Size([4, 8, 5000]) torch.Size([4])
torch.float32 torch.float32


## ResNet of the paper reimplementation with pytorch

### 1st implementation

In [16]:
class KanResWide_X(nn.Module):

    def __init__(self, input_size, output_size):

        super(KanResWide_X, self).__init__()
        #q: what does super(KanResWide_X, self) do?
        #a: it returns a proxy object that delegates method calls to a parent or sibling class of type.
        #q: what does super(KanResWide_X, self).__init__() do?
        #a: it calls the __init__ function of the parent class (nn.Module)

        #q: is super(KanResWide_X, self).__init__() same to super().__init__()?
        #a: yes, but the former is more explicit

        self.input_size = input_size
        self.output_size = output_size

        # initial module (before resnet blocks)
        self.kanres_init = nn.Sequential(
            nn.Conv1d(input_size, 64, kernel_size=8, stride=1),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Conv1d(64, 32, kernel_size=3),
            nn.BatchNorm1d(32),
            nn.ReLU()
        )

        # Resnet block
        self.kanres_module = nn.Sequential(
            nn.Conv1d(32, 64, kernel_size=50, stride=1),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Conv1d(64, 32, kernel_size=50, stride=1),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.functional.add()        # the skip connection in res block
            #q: what does nn.Add() do?
            #a: it adds the input to the output
        )

        self.global_average_pooling = nn.AdaptiveAvgPool1d(1)
        self.dense = nn.Linear(32, output_size)

    def forward(self, x):
        x = self.kanres_init(x)
        x = self.kanres_module(x)
        x = self.kanres_module(x)
        x = self.kanres_module(x)
        x = self.kanres_module(x)
        x = self.kanres_module(x)
        x = self.kanres_module(x)
        x = self.kanres_module(x)
        x = self.kanres_module(x)
        x = self.global_average_pooling(x)
        x = self.dense(x)
        return x

### 2nd implementation 

In [32]:
import torch.nn.functional as F

class KanResInit(nn.Module):
    def __init__(self, in_channels, filterno_1, filterno_2, filtersize_1, filtersize_2, stride):
        #print(in_channels) --> 8
        super(KanResInit, self).__init__()
        self.conv1 = nn.Conv1d(in_channels, filterno_1, filtersize_1, stride=stride)
        self.bn1 = nn.BatchNorm1d(filterno_1)
        self.conv2 = nn.Conv1d(filterno_1, filterno_2, filtersize_2)
        self.bn2 = nn.BatchNorm1d(filterno_2)
        # initialize a relu layer
        self.relu = nn.ReLU()
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)
        return x

class KanResModule(nn.Module):
    def __init__(self, in_channels, filterno_1, filterno_2, filtersize_1, filtersize_2, stride):
        super(KanResModule, self).__init__()
        # have to use same padding to keep the size of the input and output the same
        # calculate the padding needed for same
        padding = (filtersize_1 - 1) // 2 + (stride - 1)
        self.conv1 = nn.Conv1d(in_channels, filterno_1, filtersize_1, stride=stride, padding='same')
        self.bn1 = nn.BatchNorm1d(filterno_1)
        self.conv2 = nn.Conv1d(filterno_1, filterno_2, filtersize_2, padding='same')
        self.bn2 = nn.BatchNorm1d(filterno_2)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        identity = x
        #print(x.shape)      
        x = self.conv1(x)
        #print(x.shape)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.conv2(x)
        #print(x.shape)
        x = self.bn2(x)
        x = self.relu(x)
        x = x + identity
        return x

class KanResWide_X2(nn.Module):
    def __init__(self, input_shape, output_size):
        super(KanResWide_X2, self).__init__()

        #print(input_shape[0])
        #print(input_shape[1])

        self.input_shape = input_shape
        self.output_size = output_size
        
        self.init_block = KanResInit(input_shape[0], 64, 32, 8, 3, 1)
        self.pool = nn.AvgPool1d(kernel_size=2)
        
        self.module_blocks = nn.Sequential(
            KanResModule(32, 64, 32, 50, 50, 1),
            KanResModule(32, 64, 32, 50, 50, 1),
            KanResModule(32, 64, 32, 50, 50, 1),
            KanResModule(32, 64, 32, 50, 50, 1),
            KanResModule(32, 64, 32, 50, 50, 1),
            KanResModule(32, 64, 32, 50, 50, 1),
            KanResModule(32, 64, 32, 50, 50, 1),
            KanResModule(32, 64, 32, 50, 50, 1)
        )
        
        self.global_avg_pool = nn.AdaptiveAvgPool1d(1)
        self.fc = nn.Linear(32, output_size)
        
    def forward(self, x):
        x = self.init_block(x)
        #print("init block trained")
        #print(x.shape)
        x = self.pool(x)
        #print("pool 1 trained")
        #print(x.shape)
        x = self.module_blocks(x)
        #print("module blocks trained")
        x = self.global_avg_pool(x)
        #print(x.shape)
        x = x.view(x.size(0), -1)
        #q: explain the above line
        #a: it flattens the input
        x = self.fc(x)
        #print(x.shape)
        # squeeze the output
        x = torch.squeeze(x)
        #print(x.shape)
        return x


### 3rd implementation

### Step by step reimplementation 

In [8]:
# A small 1D CNN just to check if the model is working
class CNNblock(nn.Module):
    def __init__(self,input_channels):
        super(CNNblock, self).__init__()

        self.conv1 = nn.Conv1d(in_channels=input_channels, out_channels=32, kernel_size=3, stride=1, padding=1)
        # q: explain nn.Conv1d(input_channels, 32, kernel_size=3, stride=1, padding=1)
        # a: input_channels is the number of channels in the input data
        #    32 is the number of output channels

        self.bn1 = nn.BatchNorm1d(32)
        # q: what is 32?
        # a: 32 is the number of output channels

        self.conv2 = nn.Conv1d(32, 64, kernel_size=3, stride=1, padding=1)

        self.bn2 = nn.BatchNorm1d(64)

        # relu layer
        self.relu = nn.ReLU()

        # average pooling layer 
        #self.pool = nn.AvgPool1d(kernel_size=2, stride=2)

        # q: what is nn.AdaptiveAvgPool1d(1)?
        # a: nn.AdaptiveAvgPool1d(1) is a function that averages the input
        #self.globalavgpool = nn.AdaptiveAvgPool1d(1)
        

    def forward(self,x):
        # bactchnormalization before activation
        #print(x.shape)
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        # q: explain nn.relu(x) vs nn.functional.relu(x)
        # a: nn.relu(x) is a module, nn.functional.relu(x) is a function
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)
        # q: batch normalizing and maxpooling?
        # a: batch normalizing is a technique to normalize the input of each layer
        #    maxpooling is a technique to reduce the size of the input

        # printing the shape of x
        #print(x.shape)

        return x
    
# Resnet block of the Network
class ResBlock(nn.Module):
    def __init__(self, input_channels, output_channels, stride):
        super(ResBlock, self).__init__()
        self.conv1 = nn.Conv1d(input_channels, output_channels, kernel_size=3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm1d(output_channels)
        self.conv2 = nn.Conv1d(output_channels, output_channels, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm1d(output_channels)
        self.relu = nn.ReLU()
        self.downsample = nn.Sequential(
            nn.Conv1d(input_channels, output_channels, kernel_size=1, stride=stride),
            nn.BatchNorm1d(output_channels)
        )
        self.stride = stride
    
    def forward(self, x):
        residual = x
        #print(x.shape)
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        #print(out.shape)
        out = self.conv2(out)
        out = self.bn2(out)
        #print(out.shape)
        if self.stride != 1 or x.shape[1] != out.shape[1]:
            residual = self.downsample(x)
        #print(residual.shape)
        out += residual
        out = self.relu(out)
        #print(out.shape)
        return out

# The model with the convolutional block
class CNN(nn.Module):
    def __init__(self, input_channels, output_size):
        super(CNN, self).__init__()
        self.cnn = CNNblock(input_channels)
        self.fc = nn.Linear(64*5000, output_size)
    
    def forward(self, x):
        #print(x.shape)
        x = self.cnn(x)
        x = x.view(x.size(0), -1)
        # what is x.view(x.size(0), -1)?
        # a: x.view(x.size(0), -1) is a function that flattens the input
        x = self.fc(x)
        return x
    

### GPT example

In [15]:
class Simple1DCNN(nn.Module):
    def __init__(self, input_channels, num_classes):
        super(Simple1DCNN, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=input_channels, out_channels=32, kernel_size=3, padding=1)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool1d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(32 * 2500, 128)  # Flattened size after pooling
        self.fc2 = nn.Linear(128, num_classes)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.pool(x)
        #print(x.shape)
        x = x.view(x.size(0), -1)  # Flatten the tensor
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

## Other Resnets

In [62]:
# Residual Block
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ResidualBlock, self).__init__()
        # First convolutional layer of the residual block
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU()
        # Second convolutional layer of the residual block
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)

    def forward(self, x):
        residual = x
        # Pass input through the first convolutional layer
        out = self.conv1(x)
        out = self.relu(out)
        # Pass the output of the first convolutional layer through the second convolutional layer
        out = self.conv2(out)
        # Add the residual connection
        out += residual
        out = self.relu(out)
        return out

In [63]:
# Residual CNN model
class ResidualCNN(nn.Module):
    def __init__(self, num_classes):
        super(ResidualCNN, self).__init__()
        # Initial convolutional layer
        self.conv1 = nn.Conv1d(8, 16, kernel_size=2, stride=1, padding=1)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool1d(kernel_size=2, stride=2)
        # First residual block
        self.res_block1 = ResidualBlock(16, 16)
        # Second residual block
        self.res_block2 = ResidualBlock(16, 16) # remove this 
        # Fully connected layer
        self.fc = nn.Linear(16 * 2500, num_classes)

    def forward(self, x):
        # Pass input through the initial convolutional layer
        x = self.conv1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        # Pass the output through the first residual block
        x = self.res_block1(x)
        # Pass the output through the second residual block
        x = self.res_block2(x)
        x = self.relu(x)
        x = x.view(x.size(0), -1)
        # Pass the flattened output through the fully connected layer
        x = self.fc(x)
        return x

# Training

#### To clear the VRAM

In [33]:
import torch, gc
gc.collect()
torch.cuda.empty_cache()

In [9]:
# for the tensorboard
writer = SummaryWriter()

In [47]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        #print(X.shape)
        # Compute prediction error
        pred = model(X)
        #print("LOSS")   # this print statement is there to check the warning
        loss = loss_fn(pred, y)

        #writer.add_scalar("Loss/train", loss, epoch)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        #if batch % 100 == 0:
         #   loss, current = loss.item(), (batch + 1) * len(X)
          #  print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
        
        loss, current = loss.item(), (batch + 1) * len(X)
        
    #print the average loss of the epoch
    print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
    

In [31]:
# train function with tensorbard
def trainTB(dataloader, model, loss_fn, optimizer,epoch):
    size = len(dataloader.dataset)
    model.train()
    loss = 0

    total_loss = 0
    # get the number of batches
    num_batches = len(dataloader)

    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        #print(X.shape)
        # Compute prediction error
        pred = model(X)
        # check the shape of pred and y here
        if batch == 1:
            print(pred.shape)       # this is [8,1]
            print(y.shape)          # this is [8]
        #print("LOSS")   # this print statement is there to check the warning
        loss = loss_fn(pred, y)

        total_loss += loss.item()

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        #if batch % 100 == 0:
        #    loss, current = loss.item(), (batch + 1) * len(X)
        #    print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
        
        #loss, current = loss.item(), (batch + 1) * len(X)
        #print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


    loss_avg = total_loss/num_batches
    print(f"Epoch [{epoch+1}], Average Loss: {loss_avg:.4f}")
    writer.add_scalar("Loss/train", loss_avg, epoch)

In [35]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

## The step by step CNN

### GPT conv model

In [16]:
# train the CNN 

# input size
input_size = 8

# create the model object
modelCNNsimple = Simple1DCNN(8,1)
modelCNNsimple.to(device)

print(modelCNNsimple)

Simple1DCNN(
  (conv1): Conv1d(8, 32, kernel_size=(3,), stride=(1,), padding=(1,))
  (relu): ReLU()
  (pool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=80000, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=1, bias=True)
)


In [17]:
# Loss function for linear values (e.g., regression)
loss_fn = nn.MSELoss()  # Mean Squared Error loss

# Adam optimizer
optimizer = optim.Adam(modelCNNsimple.parameters(), lr=1e-3)  # You can adjust lr and other hyperparameters

In [18]:
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, modelCNNsimple, loss_fn, optimizer)
    #test(validate_dataloader, modelCNNsimple, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 4889.563477  [  128/10000]
Epoch 2
-------------------------------
loss: 58.853962  [  128/10000]
Epoch 3
-------------------------------
loss: 52.782303  [  128/10000]
Epoch 4
-------------------------------
loss: 53.903473  [  128/10000]
Epoch 5
-------------------------------
loss: 63.146935  [  128/10000]
Done!


### Step by step reimplimentation model

In [37]:
# train the CNN 

# input size
input_size = 8

# create the model object
modelCNN = CNN(8,1)
modelCNN.to(device)

print(modelCNN)

NameError: name 'CNN' is not defined

In [36]:
# Loss function for linear values (e.g., regression)
loss_fn = nn.MSELoss()  # Mean Squared Error loss

# Adam optimizer
optimizer = optim.Adam(modelCNN.parameters(), lr=1e-3)  # You can adjust lr and other hyperparameters


NameError: name 'modelCNN' is not defined

In [13]:
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, modelCNN, loss_fn, optimizer)
    #test(validate_dataloader, modelCNN, loss_fn)
print("Done!")

Epoch 1
-------------------------------


  return F.mse_loss(input, target, reduction=self.reduction)


loss: 4849.607422  [  128/10000]
loss: 665.255493  [  256/10000]
loss: 2766.375244  [  384/10000]
loss: 1767.146484  [  512/10000]
loss: 257.742554  [  640/10000]
loss: 385.777283  [  768/10000]
loss: 1080.425171  [  896/10000]
loss: 1292.255615  [ 1024/10000]
loss: 678.771240  [ 1152/10000]
loss: 180.054565  [ 1280/10000]
loss: 170.081192  [ 1408/10000]
loss: 638.198242  [ 1536/10000]
loss: 781.975647  [ 1664/10000]
loss: 479.890808  [ 1792/10000]
loss: 215.536194  [ 1920/10000]
loss: 119.576218  [ 2048/10000]
loss: 236.538467  [ 2176/10000]
loss: 437.979401  [ 2304/10000]
loss: 437.651398  [ 2432/10000]
loss: 310.671509  [ 2560/10000]
loss: 135.758698  [ 2688/10000]
loss: 111.186447  [ 2816/10000]
loss: 239.832077  [ 2944/10000]
loss: 263.199524  [ 3072/10000]
loss: 278.564209  [ 3200/10000]
loss: 153.170227  [ 3328/10000]
loss: 86.712738  [ 3456/10000]
loss: 128.976303  [ 3584/10000]
loss: 181.366455  [ 3712/10000]
loss: 229.840302  [ 3840/10000]
loss: 150.572037  [ 3968/10000]
loss

  return F.mse_loss(input, target, reduction=self.reduction)


loss: 48.250198  [  128/10000]
loss: 83.936996  [  256/10000]
loss: 64.244156  [  384/10000]
loss: 53.363228  [  512/10000]
loss: 65.440781  [  640/10000]
loss: 56.322716  [  768/10000]
loss: 56.157646  [  896/10000]
loss: 70.731674  [ 1024/10000]
loss: 55.367603  [ 1152/10000]
loss: 68.248344  [ 1280/10000]
loss: 69.364876  [ 1408/10000]
loss: 58.776627  [ 1536/10000]
loss: 59.668190  [ 1664/10000]
loss: 50.147694  [ 1792/10000]
loss: 63.816574  [ 1920/10000]
loss: 68.932365  [ 2048/10000]
loss: 60.950100  [ 2176/10000]
loss: 58.609123  [ 2304/10000]
loss: 73.335083  [ 2432/10000]
loss: 64.686409  [ 2560/10000]
loss: 67.402145  [ 2688/10000]
loss: 60.515038  [ 2816/10000]
loss: 65.849777  [ 2944/10000]
loss: 59.836372  [ 3072/10000]
loss: 58.021591  [ 3200/10000]
loss: 58.721676  [ 3328/10000]
loss: 56.964191  [ 3456/10000]
loss: 59.605869  [ 3584/10000]
loss: 58.203415  [ 3712/10000]
loss: 52.594536  [ 3840/10000]
loss: 58.179470  [ 3968/10000]
loss: 59.045631  [ 4096/10000]
loss: 56

## Other stuff - correct one

### 2nd implimentation

In [48]:
input_shape = (8,5000)  # Modify this according to your input shape
# 128 is the batch size, 8 is the number of channels, 5000 is the number of time steps

output_size = 1  # Number of output units

model = KanResWide_X2(input_shape, output_size)
model.to(device)
print(model)

KanResWide_X2(
  (init_block): KanResInit(
    (conv1): Conv1d(8, 64, kernel_size=(8,), stride=(1,))
    (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv1d(64, 32, kernel_size=(3,), stride=(1,))
    (bn2): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (pool): AvgPool1d(kernel_size=(2,), stride=(2,), padding=(0,))
  (module_blocks): Sequential(
    (0): KanResModule(
      (conv1): Conv1d(32, 64, kernel_size=(50,), stride=(1,), padding=same)
      (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv1d(64, 32, kernel_size=(50,), stride=(1,), padding=same)
      (bn2): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU()
    )
    (1): KanResModule(
      (conv1): Conv1d(32, 64, kernel_size=(50,), stride=(1,), padding=same)
      (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1

In [49]:
import torch.optim as optim

# Loss function for linear values (e.g., regression)
loss_fn = nn.MSELoss()  # Mean Squared Error loss

# Adam optimizer
optimizer = optim.Adam(model.parameters(), lr=1e-3)  # You can adjust lr and other hyperparameters

# use Nadam optimizer
optimizerN = optim.NAdam(model.parameters(), lr=0.0005)

In [50]:
def validate(dataloader, model, loss_fn, device):
    #print(pr_max_val)
    #print(pr_min_val)
    #print(qt_max_val)
    #print(qt_min_val)
    #print(qrs_max_val)
    #print(qrs_min_val)

    model.eval()  # Set the model to evaluation mode
    val_losses_epoch = []
    #val_real_epoch = []

    with torch.no_grad():
        for batch, (X, y) in enumerate(dataloader):
            X, y = X.to(device), y.to(device)

            #print(X.shape)
            #print(y.shape)
            #exit()
            # Compute predictions
            pred = model(X)
            loss = loss_fn(pred, y)

            #print(X)
            #print(y)        # y is inf?
            #print(pred)
            #print(loss)
            #exit()

            #convert pred to real values
            #if (y_parameter == 'hr'):
                #val_losses_epoch.append(loss.item())
                #val_real_epoch.append(loss.item())
            #elif (y_parameter == 'pr'):
                #predr = pred * (pr_max_val - pr_min_val) + pr_min_val
                #lossr = loss_fn(predr, y)
                #val_real_epoch.append(lossr.item())
            #elif (y_parameter == 'qt'):
                #predr = pred * (qt_max_val - qt_min_val) + qt_min_val
                #lossr = loss_fn(predr, y)
                #val_real_epoch.append(lossr.item())
            #elif (y_parameter == 'qrs'):
                #predr = pred * (qrs_max_val - qrs_min_val) + qrs_min_val
                #lossr = loss_fn(predr, y)
                #val_real_epoch.append(lossr.item())

            val_losses_epoch.append(loss.item())
    print(f"Validation Loss: {np.mean(val_losses_epoch):.4f}")
    return np.mean(val_losses_epoch) #, np.mean(val_real_epoch)

In [51]:
epochs = 100
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizerN)
    #trainTB(train_dataloader, model, loss_fn, optimizerN,t)
    #writer.flush()
    x = validate(validate_dataloader, model, loss_fn, device)
print("Done!")
#writer.close()

Epoch 1
-------------------------------
loss: 14120.471680  [10000/10000]
Validation Loss: 16971.2314
Epoch 2
-------------------------------
loss: 1879.231079  [10000/10000]
Validation Loss: 517.1336
Epoch 3
-------------------------------
loss: 1901.704956  [10000/10000]
Validation Loss: 438.1177
Epoch 4
-------------------------------
loss: 464.814484  [10000/10000]
Validation Loss: 404.2698
Epoch 5
-------------------------------
loss: 69.059845  [10000/10000]
Validation Loss: 367.2397
Epoch 6
-------------------------------
loss: 172.954086  [10000/10000]
Validation Loss: 393.0334
Epoch 7
-------------------------------
loss: 472.719543  [10000/10000]
Validation Loss: 428.0073
Epoch 8
-------------------------------
loss: 988.655273  [10000/10000]
Validation Loss: 575.9935
Epoch 9
-------------------------------
loss: 178.297409  [10000/10000]
Validation Loss: 390.9721
Epoch 10
-------------------------------
loss: 726.472412  [10000/10000]
Validation Loss: 383.0849
Epoch 11
-----

In [66]:
model = ResidualCNN(num_classes)

# criterion = nn.CrossEntropyLoss()
criterion = nn.MSELoss()

# optimizer = optim.Adam(model.parameters(), lr=learning_rate)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [67]:
# Set up the wandb configuration and log hyperparameters
wandb.config.num_epochs = num_epochs
wandb.config.learning_rate = learning_rate

In [68]:
def MAE(losses):
    error_sum = 0
    for loss in losses:
        absolute_error = abs(loss - 0)  # Assuming 0 is the target value
        error_sum += absolute_error

    mean_absolute_error = error_sum / len(losses)
    return mean_absolute_error

In [69]:
%%time

train_losses = []
val_losses = []
epochs = []

for epoch in range(wandb.config.num_epochs):
    print(f"Epoch {epoch+1}\n-------------------------------")
    epochs.append(epoch)

    train_losses_epoch = [] 
    for batch_inputs, batch_labels in train_dataloader:

        # Forward pass
        outputs = model(batch_inputs)
        loss = criterion(outputs, batch_labels)
        train_losses_epoch.append(int(loss))

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    
    train_loss = MAE(train_losses_epoch)
    train_losses.append(train_loss)


    model.eval()
    with torch.no_grad():
        val_losses_epoch = []  # List to store validation losses for the current epoch
        for batch, (X_val, y_val) in enumerate(validate_dataloader):
            #X_val, y_val = X_val.to(device), y_val.to(device)

            val_pred = model(X_val)
            val_loss = criterion(val_pred, y_val)

            val_losses_epoch.append(int(val_loss))

        val_loss = MAE(val_losses_epoch)
        val_losses.append(val_loss)

wandb.log({"ResNet: loss [mean absolute error] vs epoch" : wandb.plot.line_series(
                       xs=epochs, 
                       ys=[train_losses, val_losses],
                       keys=["training", "validation"],
                       title="",
                       xname="epochs")})

print("Done!")

Epoch 1
-------------------------------


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 2
-------------------------------


  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 3
-------------------------------
Epoch 4
-------------------------------
Epoch 5
-------------------------------
Epoch 6
-------------------------------
Epoch 7
-------------------------------
Epoch 8
-------------------------------
Epoch 9
-------------------------------
Epoch 10
-------------------------------
Epoch 11
-------------------------------
Epoch 12
-------------------------------
Epoch 13
-------------------------------
Epoch 14
-------------------------------
Epoch 15
-------------------------------
Epoch 16
-------------------------------
Epoch 17
-------------------------------
Epoch 18
-------------------------------
Epoch 19
-------------------------------
Epoch 20
-------------------------------
Epoch 21
-------------------------------
Epoch 22
-------------------------------
Epoch 23
-------------------------------
Epoch 24
-------------------------------
Epoch 25
-------------------------------
Epoch 26
-------------------------------
Epoch 27
--------------

In [70]:
# finish
wandb.finish()