#### 1. Setup and Importing Libraries

In [175]:
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset # wraps an iterable around the dataset
from torchvision import datasets    # stores the samples and their corresponding labels
from torchvision.transforms import transforms  # transformations we can perform on our dataset
from torchvision.transforms import ToTensor
import pandas as pd
import numpy as np
import os
import wandb
import matplotlib.pyplot as plt

In [176]:
# Set API Key
os.environ["WANDB_API_KEY"] = "cf61e02cee13abdd3d8a232d29df527bd6cc7f89"

# Set the WANDB_NOTEBOOK_NAME environment variable to the name of your notebook (manually)
os.environ["WANDB_NOTEBOOK_NAME"] = "DataLoader-HR.ipynb"

# set the WANDB_TEMP environment variable to a directory where we have write permissions
os.environ["WANDB_TEMP"] = os.getcwd()
os.environ["WANDB_DIR"] = os.getcwd()
os.environ["WANDB_CONFIG_DIR"] = os.getcwd()

In [177]:
wandb.init(project='ECG-analysis-with-Deep-Learning-on-GPU-accelerators')

In [178]:
# Get cpu, gpu or mps device for training 
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)

#### 2. Data Loader

In [179]:
class ECGDataSet(Dataset):
    
    def __init__(self, split='train'):

        self.split = split

        # data loading
        current_directory = os.getcwd()
        self.parent_directory = os.path.dirname(current_directory)
        train_small_path = os.path.join(self.parent_directory, 'data', 'deepfake-ecg-small', str(self.split) + '.csv')
        self.df = pd.read_csv(train_small_path)  # Skip the header row
        
        # Avg RR interval
        # in milli seconds
        RR = torch.tensor(self.df['avgrrinterval'].values, dtype=torch.float32)
        # calculate HR
        self.y = 60 * 1000/RR

        # Size of the dataset
        self.samples = self.df.shape[0]

    def __getitem__(self, index):
        
        # file path
        filename= self.df['patid'].values[index]
        asc_path = os.path.join(self.parent_directory, 'data', 'deepfake-ecg-small', str(self.split), str(filename) + '.asc')
        
        ecg_signals = pd.read_csv( asc_path, header=None, sep=" ") # read into dataframe
        ecg_signals = torch.tensor(ecg_signals.values) # convert dataframe values to tensor
        
        ecg_signals = ecg_signals.float()
        
        # Transposing the ecg signals
        ecg_signals = ecg_signals/6000 # normalization
        ecg_signals = ecg_signals.t() 
        
        qt = self.y[index]
        # Retrieve a sample from x and y based on the index
        return ecg_signals, qt

    def __len__(self):
        # Return the total number of samples in the dataset
        return self.samples
    

In [180]:
# ECG dataset
train_dataset = ECGDataSet(split='train')
validate_dataset = ECGDataSet(split='validate')

In [181]:
# first data
first_data = train_dataset[0]
x, y = first_data

In [182]:
x

tensor([[-0.0212, -0.0270, -0.0237,  ..., -0.0148, -0.0065, -0.0155],
        [-0.0002,  0.0000, -0.0077,  ..., -0.0030,  0.0037,  0.0008],
        [-0.0055, -0.0013, -0.0045,  ...,  0.0073,  0.0118,  0.0137],
        ...,
        [-0.0153, -0.0143, -0.0145,  ...,  0.0112,  0.0148,  0.0175],
        [-0.0102, -0.0112, -0.0117,  ...,  0.0087,  0.0147,  0.0043],
        [ 0.0003, -0.0048, -0.0042,  ...,  0.0115,  0.0213,  0.0192]])

In [183]:
y

tensor(59.6421)

In [184]:
x.shape

torch.Size([8, 5000])

In [185]:
y.shape

torch.Size([])

In [186]:
# data loader
# It allows you to efficiently load and iterate over batches of data during the training or evaluation process.
train_dataloader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=True, num_workers=2)
validate_dataloader = DataLoader(dataset=validate_dataset, batch_size=32, shuffle=True, num_workers=2)

In [187]:
for x,y in train_dataloader:
    print(x.shape, y.shape)
    print(x.dtype, y.dtype)
    break

torch.Size([32, 8, 5000]) torch.Size([32])
torch.float32 torch.float32


### Nueral Network

In [188]:
num_epochs = 100
learning_rate = 0.000001

In [189]:
wandb.config.num_epochs = num_epochs
wandb.config.learning_rate = learning_rate

In [190]:
# Define the model
# nn.Module --> base class for all neural network modules
class NeuralNetwork(nn.Module):
    #network archirecture is defined in the init method
    def __init__(self):
        super().__init__()      #calls the __init__() method of the nn.Module pearent class 
        #( to ensure that the necessary setup and initialization from the parent class are performed.)
        #This is important because the nn.Module class performs important bookkeeping tasks and sets up the internal state of the module.

        #self.x are methods below. 
        self.flatten = nn.Flatten()

        self.linear_relu_stack = nn.Sequential( # allows to stack multiple layers in a sequential manner
            nn.Linear(8*5000,1000 ),
            nn.ReLU(),
            nn.Linear(1000, 1000),
            nn.ReLU(),
            nn.Linear(1000, 500),
            nn.ReLU(),
            nn.Linear(500,1)
        )
        

    # method to define the forward pass computation of the model
    def forward(self, x):
        #x = self.flatten(x)   --> __call__ method is used to call the forward method (IMPORTANT)
        #x = self.linear_relu_stack(x) this also can be used
        x = self.flatten.forward(x)
        logits = self.linear_relu_stack.forward(x)
        return logits
    
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=40000, out_features=1000, bias=True)
    (1): ReLU()
    (2): Linear(in_features=1000, out_features=1000, bias=True)
    (3): ReLU()
    (4): Linear(in_features=1000, out_features=500, bias=True)
    (5): ReLU()
    (6): Linear(in_features=500, out_features=1, bias=True)
  )
)


In [191]:
# loss function
#loss_fn = nn.CrossEntropyLoss()
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [192]:
%%time

train_losses = []
val_losses = []
epochs = []

for t in range(num_epochs):
    train_losses_epoch = [] 
    print(f"Epoch {t+1}\n-------------------------------")

    size = len(train_dataloader.dataset)  # get the total number of samples in the dataset
    model.train()   #sets the model in training mode (Stets the attribute named Training to True for the model instance) 
    #Dropout, batch normalization, etc. are used during training.

    # iterates over the batches in the dataloader
    for batch, (X, y) in enumerate(train_dataloader):
        # moves the input data to the device
        X, y = X.to(device), y.to(device)

        # compute prediction and loss --> Forward pass
        pred = model(X)
        loss = loss_fn(pred, y)

        train_losses_epoch.append(int(loss))

        #Backpropagation
        loss.backward() # compute the gradients of the model's parameters with respect to the loss function's output
        optimizer.step()    #Update the models parameters an optimization algorithm
        optimizer.zero_grad()   # Sets all the gradients to zero. If the gradients are not cleared they ll be accumilated.

    
    train_loss = np.mean(train_losses_epoch)
    train_losses.append(train_loss)

    # Validation loop
    model.eval()
    with torch.no_grad():
        val_losses_epoch = []  # List to store validation losses for the current epoch
        for batch, (X_val, y_val) in enumerate(validate_dataloader):
            X_val, y_val = X_val.to(device), y_val.to(device)

            val_pred = model(X_val)
            val_loss = loss_fn(val_pred, y_val)

            val_losses_epoch.append(int(val_loss))

    val_loss = np.mean(val_losses_epoch)
    val_losses.append(val_loss)

    epochs.append(t)



wandb.log({"loss vs epoch" : wandb.plot.line_series(
                       xs=epochs, 
                       ys=[train_losses, val_losses],
                       keys=["training", "validation"],
                       title="",
                       xname="epochs",
                       yname="loss")})

print("Done!")

SyntaxError: invalid syntax. Perhaps you forgot a comma? (<unknown>, line 57)

In [None]:
# finish
wandb.finish()