In [48]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
from pyts.image import GramianAngularField
from pyts.datasets import load_gunpoint
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.utils import resample
from imblearn.combine import SMOTETomek
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import TomekLinks, NearMiss
from sklearn.model_selection import train_test_split
import h5py
from PIL import Image
from torchvision import datasets, transforms

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision

import torch.nn as nn
from torchvision.models import alexnet, vgg16, resnet152, resnet18, vgg19
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import classification_report, recall_score, f1_score, precision_score
from sklearn.metrics import confusion_matrix

In [11]:
df_hb_normal = pd.read_csv('ptbdb_normal.csv', header=None)
df_hb_abnormal = pd.read_csv('ptbdb_abnormal.csv', header=None)

In [16]:
df_hb = pd.concat([df_hb_normal, df_hb_abnormal])

In [17]:
df_hb = df_hb.reset_index().drop(['index'], axis=1)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,178,179,180,181,182,183,184,185,186,187
0,1.000000,0.900324,0.358590,0.051459,0.046596,0.126823,0.133306,0.119125,0.110616,0.113047,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.000000,0.794681,0.375387,0.116883,0.000000,0.171923,0.283859,0.293754,0.325912,0.345083,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.909029,0.791482,0.423169,0.186712,0.000000,0.007836,0.063032,0.077002,0.074957,0.077342,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1.000000,0.478893,0.056760,0.064176,0.081289,0.072732,0.055619,0.048774,0.054478,0.041643,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1.000000,0.867238,0.201360,0.099349,0.141336,0.120934,0.108516,0.096393,0.093436,0.100828,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14547,0.981409,1.000000,0.559171,0.287093,0.196639,0.204862,0.215946,0.243833,0.242760,0.250268,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
14548,0.906250,0.922379,0.878024,0.810484,0.712702,0.667339,0.608871,0.527218,0.480847,0.442540,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
14549,1.000000,0.867971,0.674122,0.470332,0.296987,0.169307,0.077664,0.081392,0.074868,0.089779,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
14550,1.000000,0.984672,0.658888,0.556394,0.446809,0.395790,0.315260,0.276367,0.261039,0.258522,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [18]:
x = df_hb.iloc[:,:187]
y = df_hb[187]

In [19]:
x_train, x_test, y_train, y_test = train_test_split(x, y, stratify=y, test_size=0.2)

In [20]:
gasf = GramianAngularField(image_size=150, method='difference')
x_gasf_train = gasf.fit_transform(x_train)
x_gasf_test = gasf.transform(x_test)

In [25]:
with h5py.File('hb_data_ptb.hdf5', mode='w') as hdf5_file:
    hdf5_file.create_dataset("x_train", (len(x_gasf_train), 224, 224), np.float32)
    hdf5_file.create_dataset("y_train", (len(y_train),), np.int32)
    hdf5_file.create_dataset("x_test", (len(x_gasf_test), 224, 224), np.float32)
    hdf5_file.create_dataset("y_test", (len(y_test),), np.int32)
    
    transform = transforms.Compose([transforms.Resize((224, 224))])
    
    for i, (x, y) in enumerate(zip(x_gasf_train, y_train)):
        image = Image.fromarray(x.astype(float))
        t = np.array(transform(image))
        hdf5_file["x_train"][i, ...] = t
        hdf5_file["y_train"][i] = y

    for i, (x, y) in enumerate(zip(x_gasf_test, y_test)):
        image = Image.fromarray(x.astype(float))
        t = np.array(transform(image))
        hdf5_file["x_test"][i, ...] = t
        hdf5_file["y_test"][i] = y

In [55]:
ckpt_recall = torch.load('res_net_test_recall_best.chk')
res_net_saved_recall = resnet18(pretrained=True)
for param in res_net_saved_recall.parameters():
    param.requires_grad = False
    
num_ftrs = res_net_saved_recall.fc.in_features
res_net_saved_recall.fc = nn.Sequential(
                nn.Linear(in_features=num_ftrs, out_features=256, bias=False),
                nn.ReLU(),
                nn.Linear(in_features=256, out_features=128, bias=True),
                nn.ReLU(),
                nn.Linear(in_features=128, out_features=5, bias=True))
res_net_saved_recall.load_state_dict(ckpt_recall['net'])
res_net_saved_recall.fc = nn.Sequential(
                nn.Linear(in_features=num_ftrs, out_features=256, bias=False),
                nn.ReLU(),
                nn.Linear(in_features=256, out_features=128, bias=True),
                nn.ReLU(),
                nn.Linear(in_features=128, out_features=2, bias=True))
res_net_saved_recall.cuda()   

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Co

In [34]:
# Function for moving tensor or model to GPU
def cuda(xs):
    if torch.cuda.is_available():
        if not isinstance(xs, (list, tuple)):
            return xs.cuda()
        else:
            return [x.cuda() for x in xs]
    else:
        return xs

# Custom class for defining dataset for training with augmentation
class Dataset_Hdf5(Dataset):

    def __init__(self, path, data_type):
        """ Intialize the dataset
        """
        self.path = path
        self.file = h5py.File(path, 'r')
        self.images = self.file['x_{}'.format(data_type)]
        self.labels = self.file['y_{}'.format(data_type)]
                
        self.len = self.images.shape[0]
        if data_type == 'train':
            self.transform = transforms.Compose([
#                                               transforms.ToPILImage(),
#                                               transforms.RandomRotation((0, 360)),
#                                               transforms.RandomHorizontalFlip(),
#                                               transforms.RandomVerticalFlip(),
                                              transforms.ToTensor()])
        else:
            self.transform = transforms.Compose([transforms.ToTensor()])

    # You must override __getitem__ and __len__
    def __getitem__(self, index):
        """ Get a sample from the dataset
        """
        # unsqueeze adds dimension to image -> converts to 1x224x224 since we don't have rgb
        return self.transform(self.images[index].astype('float32')), \
                torch.tensor(self.labels[index], dtype=torch.long)

    def __len__(self):
        """
        Total number of samples in the dataset
        """
        return self.len

In [36]:
hb_train_loader = torch.utils.data.DataLoader(Dataset_Hdf5('/home/asif/heartbeat/hb_data_ptb.hdf5', 'train'), 
                                                batch_size=64, shuffle=True)
hb_test_loader = torch.utils.data.DataLoader(Dataset_Hdf5('/home/asif/heartbeat/hb_data_ptb.hdf5', 'test'), 
                                                batch_size=64, shuffle=False)

In [51]:
class_weights = cuda(torch.tensor([2.0, 1.0]))
criterion = nn.CrossEntropyLoss(weight=class_weights)

In [56]:
optimizer_res_net = torch.optim.Adam([
                                          {"params": res_net_saved_recall.fc[0].parameters(), "lr": 0.001},
                                          {"params": res_net_saved_recall.fc[2].parameters(), "lr": 0.001},
                                          {"params": res_net_saved_recall.fc[4].parameters(), "lr": 0.001},
                                           ],  
                                lr=0.0001, betas=(0.9, 0.999))

In [57]:
def train(net, train_loader, criterion, optimizer, test_loader, num_epochs=30):
    net.train()
    train_acc_max = 0
    test_acc_max = 0

    scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma = 0.95)
    
    for epoch in range(num_epochs):  # loop over the dataset multiple times
        net.train()

        total = 0
        correct = 0
   
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = cuda(data)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs.expand(-1, 3, -1, -1))
            loss = criterion(outputs, labels)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
        
        scheduler.step()
        
        print('End of epoch {}, Loss {}'.format(epoch + 1, running_loss / len(train_loader)))
        
        train_acc = correct / total
        print('Train accuracy: {}'.format(train_acc))
        test_acc, all_true, all_pred = test(net, test_loader)
        print('Test accuracy: {}'.format(test_acc))
        precision = precision_score(all_true, all_pred, average='macro')
        recall = recall_score(all_true, all_pred, average='macro')
        f1 = f1_score(all_true, all_pred, average='macro')
        print('Test precision: {}'.format(precision))
        print('Test recall: {}'.format(recall))
        print('Test f1: {}'.format(f1))
        
        # Saving best checkpoint based on performance on test data
#         if train_acc > train_acc_max:
#             train_acc_max = train_acc
#             save_checkpoint(epoch + 1, net, optimizer, train_acc, test_acc, net_name, 'train')
        

    print('Finished Training')
    
def test(net, test_loader):
    net.eval()
    correct = 0
    total = 0
    all_true = []
    all_pred = []
    with torch.no_grad():
        for i, data in enumerate(test_loader, 0):
            images, labels = cuda(data)
            all_true.extend(labels.cpu().tolist())
            outputs = net(images.expand(-1, 3, -1, -1))
            _, predicted = torch.max(outputs, 1)
            all_pred.extend(predicted.cpu().tolist())
#             predicted = predicted.float()
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    acc = correct / total
#     print('Accuracy of the network on the images: %d %%' % (100 * acc))
    return acc, all_true, all_pred

In [58]:
train(res_net_saved_recall, hb_train_loader, criterion, optimizer_res_net, hb_test_loader, 30)

End of epoch 1, Loss 0.49732372295725474
Train accuracy: 0.7606734816596512
Test accuracy: 0.8151837856406733
Test precision: 0.7808712407472718
Test recall: 0.8343889920600664
Test f1: 0.7929979359510381
End of epoch 2, Loss 0.3855630888879954
Train accuracy: 0.8298256163559832
Test accuracy: 0.7317073170731707
Test precision: 0.7468183688132843
Test recall: 0.8062408042725805
Test f1: 0.721624879622334
End of epoch 3, Loss 0.3645995586783021
Train accuracy: 0.8415084614723821
Test accuracy: 0.8581243558914462
Test precision: 0.8399860816960604
Test recall: 0.7918887068528531
Test f1: 0.8106154274382312
End of epoch 4, Loss 0.3572270808802856
Train accuracy: 0.8465767545743493
Test accuracy: 0.8237718996908279
Test precision: 0.7959346947631059
Test recall: 0.8593446232265698
Test f1: 0.8061358137056851
End of epoch 5, Loss 0.3264171547450862
Train accuracy: 0.8599776651490422
Test accuracy: 0.8749570594297492
Test precision: 0.8386863579775481
Test recall: 0.870835827671333
Test f1: 