In [170]:
%matplotlib inline
from matplotlib import pyplot as plt
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import ToTensor
import pickle
from sklearn.utils.class_weight import compute_class_weight
import numpy as np
import pywt
import torchsummary
import math
import os

In [2]:
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


In [3]:
def unpickler(filename):
    with open(filename, 'rb') as f:
        a = pickle.load(f)
    return a

path = "pickled/"
train_readings = unpickler(path + 'train_readings.pkl')
train_diagnostic = unpickler(path + 'train_diagnostic.pkl').astype('float32')
validate_readings = unpickler(path + 'validate_readings.pkl')
validate_diagnostic = unpickler(path + 'validate_diagnostic.pkl').astype('float32')
test_readings = unpickler(path + 'test_readings.pkl')
test_diagnostic = unpickler(path + 'test_diagnostic.pkl').astype('float32')

In [4]:
class ECG(Dataset):
    def __init__(self, readings, labels, cwt=False, scales=[], wavelet=''):
        self.readings = readings # should i convert to torch.tensor here?
        self.labels = labels
        self.scales = scales
        self.wavelet = wavelet
        if cwt:
            self.apply_cwt()
    
    def __len__(self):
        return len(self.readings)

    def __getitem__(self, idx):
        return self.readings[idx], self.labels[idx]
    
    def apply_cwt(self):
        cwt = lambda x : pywt.cwt(x, self.scales, self.wavelet)[0] #pytorch uses channels first, unlike tensorflow's default
        self.readings = np.array([cwt(i) for i in self.readings])

In [6]:
total_scales = 20
scales = np.linspace(0.1, 4, total_scales) # 32 evenly spaced numbers between 0.1 and 4
wavelet = 'mexh'
batch_size = 64
kernel_size = 41

In [7]:
# how to increase efficiency?

train_dataloader = DataLoader(ECG(train_readings, 
                                  train_diagnostic, 
                                  cwt=True, scales=scales, wavelet=wavelet), batch_size=batch_size, shuffle=True)
validate_dataloader = DataLoader(ECG(validate_readings, 
                                  validate_diagnostic, 
                                  cwt=True, scales=scales, wavelet=wavelet), batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(ECG(test_readings, 
                                  test_diagnostic, 
                                  cwt=True, scales=scales, wavelet=wavelet), batch_size=batch_size, shuffle=True)

In [8]:
train_features, train_labels = next(iter(train_dataloader))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")

Feature batch shape: torch.Size([64, 20, 1000])
Labels batch shape: torch.Size([64])


In [159]:
# best way to adjust hyperparameters
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        modules = []
        modules.append(
            nn.Conv1d(in_channels=total_scales, out_channels=32, kernel_size=kernel_size, padding='same')) 
        modules.append(
            nn.BatchNorm1d(num_features=32))
        modules.append(
            nn.Dropout(0.2)) # 20 percent of the inputs are randomly dropped
        modules.append(
            nn.ReLU())
        modules.append(
            nn.MaxPool1d(kernel_size=kernel_size * 2))  
        
        modules.append(
            nn.Conv1d(in_channels=32, out_channels=32, kernel_size=6, padding='same')) 
        modules.append(
            nn.BatchNorm1d(num_features=32))
        modules.append(
            nn.Dropout(0.2)) # 20 percent of the inputs are randomly dropped
        modules.append(
            nn.ReLU())
        modules.append(
            nn.MaxPool1d(kernel_size=4)) 
        
        modules.append(nn.Flatten())
        modules.append(nn.Linear(96, 256)) # dense layer with 50 neurons 
        modules.append(nn.ReLU())
        modules.append(
            nn.Linear(256, 2))
        modules.append(
            nn.LogSoftmax(dim=1))
    
        
    
        self.network = nn.Sequential(*modules) # unpack the modules
    
    def summary(self):
        return torchsummary.summary(self, input_size=(total_scales, 1000))
    
    def forward(self, x):
        prob = self.network(x)
        return prob
    
    def count_parameters(self):
        return sum(p.numel() for p in self.parameters() if p.requires_grad)

In [160]:
model = CNN().to(device)
model.summary()

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv1d-1             [-1, 32, 1000]          26,272
       BatchNorm1d-2             [-1, 32, 1000]              64
           Dropout-3             [-1, 32, 1000]               0
              ReLU-4             [-1, 32, 1000]               0
         MaxPool1d-5               [-1, 32, 12]               0
            Conv1d-6               [-1, 32, 12]           6,176
       BatchNorm1d-7               [-1, 32, 12]              64
           Dropout-8               [-1, 32, 12]               0
              ReLU-9               [-1, 32, 12]               0
        MaxPool1d-10                [-1, 32, 3]               0
          Flatten-11                   [-1, 96]               0
           Linear-12                  [-1, 256]          24,832
             ReLU-13                  [-1, 256]               0
           Linear-14                   

In [161]:
EPS = 1e-5
def precision(y_pred, y_true):
    tp = 0
    fp = 0
    for i in range(len(y_pred)):
        if y_pred[i] == 1:
            if y_true[i] == 1:
                tp += 1
            else:
                fp += 1
    
    return tp / (tp + fp + EPS)

def recall(y_pred, y_true):
    tp = 0
    fn = 0
    for i in range(len(y_pred)):
        if y_true[i] == 1:
            if y_pred[i] == 1:
                tp += 1
            else:
                fn += 1
    return tp / (tp + fn + EPS)
        

def f1(_precision, _recall):
    return (2 * _precision * _recall) / (_precision + _recall + EPS)

In [162]:
class_weights = compute_class_weight(
                                        class_weight = "balanced",
                                        classes = np.unique(train_diagnostic),
                                        y = train_diagnostic                                                    
                                    )
class_weights = torch.ceil(torch.from_numpy(class_weights)).type(torch.FloatTensor).to(device)

In [167]:
loss_fn = nn.NLLLoss(weight=class_weights)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)

In [191]:
current_directory = os.path.join(os.getcwd(), 'torch_check')
def get_max_f1():
    max_f1_score = open(os.path.join(current_directory, 'max_f1.txt'), 'r').readline().strip("\n")
    return float(max_f1_score)

def get_max_accuracy():
    max_accuracy = open(os.path.join(current_directory, 'max_f1.txt'), 'r').readline().strip("\n")
    return float(max_accuracy)

In [164]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X = X.type(torch.FloatTensor)
        y = y.type(torch.LongTensor)
        X, y = X.to(device), y.to(device)
    
        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [165]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    total_true = torch.empty(0).to(device)
    total_pred = torch.empty(0).to(device)
    with torch.no_grad():
        for X, y in dataloader:
            X = X.type(torch.FloatTensor)
            y = y.type(torch.LongTensor)
            
            X, y = X.to(device), y.to(device)
            pred = model(X).to(device)
            
            round_pred = torch.argmax(pred, dim=1)
            
            total_true = torch.cat((total_true, y))
            
            total_pred = torch.cat((total_pred, round_pred))
            
            test_loss += loss_fn(pred, y).item()
            correct += (round_pred == y).type(torch.float).sum().item()
        
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    
    precision_ = precision(total_pred, total_true)
    recall_ = recall(total_pred, total_true)
    f1_ = f1(precision_, recall_)
    print(f"Precision: {precision_}")
    print(f"Recall: {recall_}")
    print(f"F1 Score: {f1_}")


In [168]:
epochs = 100
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(train_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 0.475975  [    0/10977]
loss: 0.178501  [ 6400/10977]
Test Error: 
 Accuracy: 90.9%, Avg loss: 0.173869 

Precision: 0.47376960262969386
Recall: 0.9722530413734402
F1 Score: 0.637086498672443
Epoch 2
-------------------------------
loss: 0.178847  [    0/10977]
loss: 0.095102  [ 6400/10977]
Test Error: 
 Accuracy: 79.6%, Avg loss: 0.321097 

Precision: 0.285943516412248
Recall: 0.9889012098901087
F1 Score: 0.443611155853753
Epoch 3
-------------------------------
loss: 0.145855  [    0/10977]
loss: 0.097671  [ 6400/10977]
Test Error: 
 Accuracy: 85.9%, Avg loss: 0.225271 

Precision: 0.3671198995998356
Recall: 0.9889012098901087
F1 Score: 0.5354527788460074
Epoch 4
-------------------------------
loss: 0.190881  [    0/10977]
loss: 0.077316  [ 6400/10977]
Test Error: 
 Accuracy: 94.7%, Avg loss: 0.171401 

Precision: 0.6184407749742071
Recall: 0.9156492684167673
F1 Score: 0.7382502146503269
Epoch 5
-------------------------------
loss: 0.26

loss: 0.081863  [ 6400/10977]
Test Error: 
 Accuracy: 91.9%, Avg loss: 0.147203 

Precision: 0.504788729550486
Recall: 0.9944505993956648
F1 Score: 0.6696517316736275
Epoch 36
-------------------------------
loss: 0.303272  [    0/10977]
loss: 0.240099  [ 6400/10977]
Test Error: 
 Accuracy: 86.8%, Avg loss: 0.216542 

Precision: 0.38326931121097174
Recall: 0.9966703551978873
F1 Score: 0.55363346916149
Epoch 37
-------------------------------
loss: 0.076769  [    0/10977]
loss: 0.044099  [ 6400/10977]
Test Error: 
 Accuracy: 89.1%, Avg loss: 0.180421 

Precision: 0.42877628858137523
Recall: 0.9955604772967761
F1 Score: 0.5993943844853463
Epoch 38
-------------------------------
loss: 0.104977  [    0/10977]
loss: 0.240027  [ 6400/10977]
Test Error: 
 Accuracy: 71.4%, Avg loss: 0.483073 

Precision: 0.22277350478101338
Recall: 0.9966703551978873
F1 Score: 0.36414948597171604
Epoch 39
-------------------------------
loss: 0.080950  [    0/10977]
loss: 0.135561  [ 6400/10977]
Test Error: 


Precision: 0.40688717531994933
Recall: 0.9966703551978873
F1 Score: 0.5778594570394209
Epoch 70
-------------------------------
loss: 0.059537  [    0/10977]
loss: 0.029354  [ 6400/10977]
Test Error: 
 Accuracy: 95.6%, Avg loss: 0.091459 

Precision: 0.6520467788593072
Recall: 0.9900110877912199
F1 Score: 0.7862446539996136
Epoch 71
-------------------------------
loss: 0.136039  [    0/10977]
loss: 0.079289  [ 6400/10977]
Test Error: 
 Accuracy: 88.7%, Avg loss: 0.189596 

Precision: 0.42088014784232136
Recall: 0.9977802330989985
F1 Score: 0.5920274330986015
Epoch 72
-------------------------------
loss: 0.122634  [    0/10977]
loss: 0.035139  [ 6400/10977]
Test Error: 
 Accuracy: 84.5%, Avg loss: 0.272850 

Precision: 0.34548944205186394
Recall: 0.9988901110001098
F1 Score: 0.5134017686125395
Epoch 73
-------------------------------
loss: 0.103987  [    0/10977]
loss: 0.126987  [ 6400/10977]
Test Error: 
 Accuracy: 76.1%, Avg loss: 0.460652 

Precision: 0.2556155807346728
Recall: 0.9

KeyboardInterrupt: 

In [None]:
(len(test_diagnostic) - sum(test_diagnostic)) / len(test_diagnostic)