# Model

epoch = 1 forward and backward pass of all training samples

batch_size = number of training samples in one forward & backward pass

number of iterations = number of passes, each pass using [batch_size] number of samples

e.g. 100 samples, batch_size=20 --> 100/20 = 5 iterations for 1 epoch 

1) Design model (input, output size, forward pass)

2) Construct loss and optimizer

3) Training Loop
    - forward pass: compute prediction
    - backward pass: gradients
    - update weights: 

In [1]:
# import libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd
import os

In [2]:
# getting the path for the dataset

# Get the parent directory
current_directory = os.getcwd()
parent_directory = os.path.dirname(current_directory)

train_small_path = os.path.join(parent_directory, 'data','deepfake-ecg-small','train.csv')
train_small_path_2 = os.path.join(parent_directory, 'data','deepfake-ecg-small','train')
print(train_small_path)
print(train_small_path_2)

c:\Users\Hansa Alahakoon\Desktop\4th year\FYP\e17-4yp-Comprehensive-ECG-analysis-with-Deep-Learning-on-GPU-accelerators\data\deepfake-ecg-small\train.csv
c:\Users\Hansa Alahakoon\Desktop\4th year\FYP\e17-4yp-Comprehensive-ECG-analysis-with-Deep-Learning-on-GPU-accelerators\data\deepfake-ecg-small\train


In [3]:
ecg_tensors = []
for filename in os.listdir(train_small_path_2):
    file_path = os.path.join(train_small_path_2, filename)
    ecg_data = np.loadtxt(file_path)
    ecg_tensor = torch.from_numpy(ecg_data)
    ecg_tensor = ecg_tensor.permute(1, 0).unsqueeze(2)
    ecg_tensors.append(ecg_tensor)

print(ecg_tensors[0].shape)
print(len(ecg_tensors))
# ecg_tensors = torch.stack(ecg_tensors, dim=2)
# print(ecg_tensors.shape)

torch.Size([8, 5000, 1])
10000


In [4]:
class EcgDataset(Dataset):
    def __init__(self):

        # Read the CSV file using pandas
        data_frame = pd.read_csv(train_small_path)

        # Extract the column data as a pandas Series
        y_column_data = data_frame['qt']

        # Convert the pandas Series to a PyTorch tensor
        self.x_data = ecg_tensors
        self.y_data = torch.tensor(y_column_data.values)

        self.n_samples = len(ecg_tensors)
        
    # support indexing such that dataset[i] can be used to get i-th sample
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    # we can call len(dataset) to return the size
    def __len__(self):
        return self.n_samples
    
# create dataset
dataset = EcgDataset()

# get first sample and unpack
first_data = dataset[0]
features, labels = first_data
print(features, labels)

tensor([[[-127.],
         [-162.],
         [-142.],
         ...,
         [ -89.],
         [ -39.],
         [ -93.]],

        [[  -1.],
         [   0.],
         [ -46.],
         ...,
         [ -18.],
         [  22.],
         [   5.]],

        [[ -33.],
         [  -8.],
         [ -27.],
         ...,
         [  44.],
         [  71.],
         [  82.]],

        ...,

        [[ -92.],
         [ -86.],
         [ -87.],
         ...,
         [  67.],
         [  89.],
         [ 105.]],

        [[ -61.],
         [ -67.],
         [ -70.],
         ...,
         [  52.],
         [  88.],
         [  26.]],

        [[   2.],
         [ -29.],
         [ -25.],
         ...,
         [  69.],
         [ 128.],
         [ 115.]]], dtype=torch.float64) tensor(434)


In [5]:
# Load whole dataset with DataLoader
# shuffle: shuffle data, good for training
# num_workers: faster loading with multiple subprocesses
# !!! IF YOU GET AN ERROR DURING LOADING, SET num_workers TO 0 !!!
batch_size = 32
dataloader = DataLoader(dataset=dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=2)

# # convert to an iterator and look at one random sample
# dataiter = iter(train_loader)
# data = next(dataiter)
# features, labels = data
# print(features, labels)

In [6]:
# CNN model
class CNN(nn.Module):
    def __init__(self, num_classes):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv1d(8, 16, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool1d(kernel_size=2, stride=2)
        self.fc = nn.Linear(16 * 2500, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x


In [7]:
# hyperparameters
num_classes = 1  # Number of output classes
learning_rate = 0.001
num_epochs = 1

In [8]:
model = CNN(num_classes)

# criterion = nn.CrossEntropyLoss()
criterion = nn.MSELoss()

# optimizer = optim.Adam(model.parameters(), lr=learning_rate)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [9]:
for epoch in range(num_epochs):
    for batch_inputs, batch_labels in dataloader:
        # Forward pass
        outputs = model(batch_inputs)
        loss = criterion(outputs, batch_labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Print the loss after every epoch
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")