In [None]:
import pandas as pd
from google.colab import drive
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
import torch
import torch.nn as nn
from torch.autograd import Variable
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
import seaborn as sns

In [None]:
max_accuracy = 30
drive.mount('/content/drive')

train = pd.read_csv(r"/content/drive/MyDrive/ECG Project Files/normalized_processed_dataset(1_signal).csv", delimiter=",")
targets_numpy = train.Label.values
features_numpy = train.loc[:,train.columns != "Label"].values
features_numpy_length = features_numpy.shape[0]
print(features_numpy.shape)
processed_data = []
processed_labels = []

# Data conversion from string into numpy array
for i in range (features_numpy_length):
  try:
    temp_array = features_numpy[i][0].split(',')
    temp2_array = []
    for j in range(len(temp_array)):
      temp2_array.append(float(temp_array[j]))
    if(len(temp2_array)<401):
      processed_data.append(temp2_array)
      processed_labels.append(targets_numpy[i])

  except ValueError:
    print("Line # that contains an unexpected character:", i)
    continue

In [None]:
# Padding data
padded_data = np.stack([np.pad(i,( int((400-len(i))/2) , 400-int((400-len(i))/2)-len(i) ),mode='edge') for i in processed_data])
features_numpy = np.array(padded_data)
targets_numpy = np.array(processed_labels)

In [None]:
# Size check
print(features_numpy.shape)
print(targets_numpy.shape)

In [None]:
# Retrieving numbers for samples in each class

# save the original training examples
old_train_values = features_numpy
old_train_labels = targets_numpy

# create empty lists
temp_values_N = []
temp_values_B = []
temp_values_S = []
temp_values_V = []
temp_values_F = []
temp_values_Q = []
temp_labels_N = []
temp_labels_B = []
temp_labels_S = []
temp_labels_V = []
temp_labels_F = []
temp_labels_Q = []

# categorize into classes
for i in range (len(targets_numpy)):
    if targets_numpy[i] == 0:
        temp_values_N.append(features_numpy[i])
        temp_labels_N.append(0)
    elif targets_numpy[i] == 1:
        temp_values_B.append(features_numpy[i])
        temp_labels_B.append(1)
    elif targets_numpy[i] == 2:
        temp_values_S.append(features_numpy[i])
        temp_labels_S.append(2)
    elif targets_numpy[i] == 3:
        temp_values_V.append(features_numpy[i])
        temp_labels_V.append(3)
    elif targets_numpy[i] == 4:
        temp_values_F.append(features_numpy[i])
        temp_labels_F.append(4)
    elif targets_numpy[i] == 5:
        temp_values_Q.append(features_numpy[i])
        temp_labels_Q.append(5)

print("N size", len(temp_values_N))
print("B size", len(temp_values_B))
print("S size", len(temp_values_S))
print("V size", len(temp_values_V))
print("F size", len(temp_values_F))
print("Q size", len(temp_values_Q))

In [None]:
# Data augmentation

old_train_values = np.append(old_train_values, temp_values_B*4, axis=0)
old_train_labels = np.append(old_train_labels, temp_labels_B*4, axis=0)
print(old_train_values.shape)

old_train_values = np.append(old_train_values, temp_values_S*29, axis=0)
old_train_labels = np.append(old_train_labels, temp_labels_S*29, axis=0)
print(old_train_values.shape)

old_train_values = np.append(old_train_values, temp_values_V*10, axis=0)
old_train_labels = np.append(old_train_labels, temp_labels_V*10, axis=0)
print(old_train_values.shape)

old_train_values = np.append(old_train_values, temp_values_F*87, axis=0)
old_train_labels = np.append(old_train_labels, temp_labels_F*87, axis=0)
print(old_train_values.shape)

old_train_values = np.append(old_train_values, temp_values_Q*8, axis=0)
old_train_labels = np.append(old_train_labels, temp_labels_Q*8, axis=0)
print(old_train_values.shape)

# Adding back into the original numpy arrays
targets_numpy = old_train_labels
features_numpy = old_train_values

In [None]:
# Creating and loading dataloaders

# Train to validation data ratio is selected as 80% to 20%, respectively
features_train, features_test, targets_train, targets_test = train_test_split(features_numpy, targets_numpy, test_size = 0.2, random_state = 42)
featuresTrain = torch.from_numpy(features_train)
targetsTrain = torch.from_numpy(targets_train)
featuresTest = torch.from_numpy(features_test)
targetsTest = torch.from_numpy(targets_test)
train = TensorDataset(featuresTrain,targetsTrain)
test = TensorDataset(featuresTest,targetsTest)
train_loader = DataLoader(train, batch_size = batch_size, shuffle = True)
test_loader = DataLoader(test, batch_size = batch_size, shuffle = True)

# Calculating number of epochs from batch size and number of iterations
batch_size = 100
n_iters = 35000
num_epochs = int(n_iters / (len(features_train) / batch_size))
print("Epoch #:",num_epochs)

In [None]:
!nvidia-smi
use_cuda = True

In [None]:
import math

# Lets make sure the augmented data is in the right type (non nan)
print(features_numpy.shape)
for element in features_numpy:
  for number in element:
    if (math.isnan(number)):
      print("NaN")

In [None]:
class RNNModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(RNNModel, self).__init__()
        self.layer_dim = layer_dim
        self.hidden_dim = hidden_dim
        #self.rnn = nn.RNN(input_dim, hidden_dim, layer_dim, batch_first=True, nonlinearity='relu')
        self.lstm = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True) #lstm
        self.fc_2 = nn.Linear(hidden_dim, output_dim)
        self.relu = nn.ReLU()

    def forward(self, x):
        h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim))
        c0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim))
        if next(self.parameters()).is_cuda:
            h0 = h0.cuda()
            c0 = c0.cuda()
        out, (hn,cn) = self.lstm(x, (h0,c0))
        out = self.relu(out)
        out = self.fc_2(out[:, -1, :])
        return out

input_dim = 1
hidden_dim = 100
layer_dim = 1
output_dim = 6
model = RNNModel(input_dim, hidden_dim, layer_dim, output_dim)

if use_cuda and torch.cuda.is_available():
  model = model.cuda()
  print('CUDA is available!  Training on GPU ...')
else:
  print('CUDA is not available.  Training on CPU ...')

error = nn.CrossEntropyLoss()
learning_rate = 0.05
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [None]:
seq_dim = 400
loss_list = []
iteration_list = []
accuracy_list = []
count = 0
predictions_list = []
max_accuracy = 0
labels_list = []

for epoch in range(num_epochs):
    for i, (signals, labels) in enumerate(train_loader):

        #############################################
        #To Enable GPU Usage
        if use_cuda and torch.cuda.is_available():
          signals = signals.cuda()
          labels = labels.cuda()
        #############################################

        train  = Variable(signals.float().view(-1, seq_dim, input_dim))
        labels = Variable(labels)
        optimizer.zero_grad()
        outputs = model(train)
        loss = error(outputs, labels)
        loss.backward()
        optimizer.step()
        count += 1

        if count % 100 == 0:
                    correct = 0
                    total = 0
                    for signals, labels in test_loader:

                        #############################################
                        #To Enable GPU Usage
                        if use_cuda and torch.cuda.is_available():
                          signals = signals.cuda()
                          labels = labels.cuda()
                        #############################################

                        signals = Variable(signals.float().view(-1, seq_dim, input_dim))
                        outputs = model(signals)
                        predicted = torch.max(outputs.data, 1)[1]
                        predictions_list.append(predicted.detach().cpu().numpy())
                        labels_list.append(labels.detach().cpu().numpy())
                        total += float(labels.size[0])
                        if (predicted==labels):
                          correct += 1
                    accuracy = 100 * correct / total
                    loss_list.append(loss.data)
                    iteration_list.append(count)
                    accuracy_list.append(accuracy)
                    if count % 200 == 0:
                        print('Iteration: {}  Loss: {}  Accuracy: {} %'.format(count, loss.data, accuracy))

                    # save the best performing model
                    if (accuracy>max_accuracy):
                      max_accuracy = accuracy
                      torch.save(model, "rnn_lstm_model#"+str(count)+".pt")

In [None]:
import matplotlib.pyplot as plt

accuracy_list = torch.tensor(accuracy_list).detach().cpu().numpy()
iteration_list = torch.tensor(iteration_list).detach().cpu().numpy()
loss_list = torch.tensor(loss_list).detach().cpu().numpy()

stop_iteration = 90

# loss visualization
plt.plot(iteration_list[:stop_iteration],loss_list[:stop_iteration])
plt.xlabel("Number of iteration")
plt.ylabel("Loss")
plt.title("LSTM: Loss vs Number of iteration")
plt.show()

# accuracy visualization
plt.plot(iteration_list[:stop_iteration],accuracy_list[:stop_iteration],color = "red")
plt.xlabel("Number of iteration")
plt.ylabel("Accuracy")
plt.title("LSTM: Accuracy vs Number of iteration")
plt.savefig('graph.png')
plt.show()