# Diagnosis based on ECG data

- - - 

* Diagnosis using collected ECG data
* Currently total 42 subjects
* 3 classes (DEP, SUI, NOR)

- - -

In [1]:
# importing required components 
import os
import time
import json
import urllib
import random
import torch
import torchvision
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

In [139]:
from PIL import Image
from pdf2image import convert_from_path
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from torchvision import transforms, datasets, models
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from efficientnet_pytorch import EfficientNet

## Basic data check

In [None]:
ecg_df = pd.read_csv('E:/RESEARCH/Datasets/wearable/AI_coded_1.csv', sep=',')

* 1: depression, 2: suicidality, 3: normal

In [None]:
ecg_df['class'] = ecg_df['class'].astype("category")
ecg_df['sub'] = ecg_df['sub'].astype("category")

In [None]:
depression = ecg_df[ecg_df['class']==1]
print((depression.index)+1)

In [None]:
suicidal = ecg_df[ecg_df['class']==2]
print((suicidal.index)+1)

In [None]:
normal = ecg_df[ecg_df['class']==3]
print((normal.index)+1)

- - -

## Image Data handling

In [None]:
## Checking file path and names
file_path = "E:/RESEARCH/Datasets/wearable/ECG/test_0420/train/dep/"
file_names = os.listdir(file_path)

In [None]:
## Changing file names to 1, 2, ...
i = 1
for name in file_names:
    src = os.path.join(file_path, name)
    dst = str(i) + '.png'
    dst = os.path.join(file_path, dst)
    os.rename(src, dst)
    i += 1

In [None]:
## Converting pdf file into png file
for name in file_names:
    pages = convert_from_path(file_path + name, poppler_path="E:/RESEARCH/Datasets/wearable/ECG/poppler/Library/bin")
    
    for page in pages:
        page.save(file_path + name + '.png', "PNG")

In [None]:
## Data Crop (deleting patients' information)
# original png size = 2200 x 1700
# leaving the important part only (only the ecg data part)
left = 100
top = 450
right = 2100
bottom = 1300
## 100, 450, 2100, 1300 스케일로 자르면 딱 ecg 30초 전체 부분 나옴.

for name in file_names:
    im = Image.open(file_path + name)
    imc = im.crop((left, top, right, bottom))
    imc.save(file_path+name+'.png')

In [109]:
## directory setting and get file names for 2 second cycle of ECG
crop_path = "E:/RESEARCH/Datasets/wearable/ECG/test_0420/train_crop/sui/"
crop_names = os.listdir(crop_path)

In [108]:
# Setting the points for cropped image
left = 1000
top = 0
right =1400
bottom = 250
## 위에서 한번 처리한 이미지에 대해 1000, 0, 1400, 250로 자르면 딱 5~7초 ecg cycle 나옴.


for name in crop_names:
    im = Image.open(crop_path + name)
    imc = im.crop((left, top, right, bottom))
    imc.save(crop_path+name+'.png')

In [110]:
## Changing file names to 1, 2, ...
i = 1
for name in crop_names:
    src = os.path.join(crop_path, name)
    dst = str(i) + '.png'
    dst = os.path.join(crop_path, dst)
    os.rename(src, dst)
    i += 1

- - -

## With simple image classification

* Our dataset numbers (dep: 244, nor: 402, sui: 105)

In [122]:
class Args:
    # arugments
    epochs=50
    bs=16
    lr=0.001
    momentum=0.9
    num_channels=3 
    num_classes=3
    verbose='store_true'
    seed=710674

args = Args()    

np.random.seed(args.seed)
random.seed(args.seed)
torch.manual_seed(args.seed)

<torch._C.Generator at 0x2443a981c70>

In [123]:
#Setting torch environment

if torch.cuda.is_available():
    DEVICE = torch.device('cuda')
else:
    DEVICE = torch.device('cpu')
    
print('Using PyTorch version:', torch.__version__, ' Device: ', DEVICE)

Using PyTorch version: 1.7.1  Device:  cuda


In [140]:
# model_res = models.resnet18(num_classes=2, pretrained=True)
model_eff3 = EfficientNet.from_pretrained('efficientnet-b3', num_classes=args.num_classes)
# model_resnet18 = models.resnet18(pretrained=True)
# model_mobnetv2 = models.mobilenet_v2(pretrained=True)

Loaded pretrained weights for efficientnet-b3


In [125]:
model_mobnetv2

MobileNetV2(
  (features): Sequential(
    (0): ConvBNReLU(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNReLU(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNReLU(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=Tr

In [126]:
## resnet 구조는 마지막 fc layer의 out_features 를 바꿔주면 되고.
model_resnet18.fc = nn.Linear(in_features = 512, out_features = args.num_classes)
# model_mobnetv2.classifier = nn.Linear(in_features = 512, out_features = args.num_classes)

In [141]:
# model = model_res.to(DEVICE)
model = model_eff3.to(DEVICE)
# model = model_mobnetv2.to(DEVICE)
# model = model_resnet18.to(DEVICE)

In [152]:
# Data Transformation
data_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomResizedCrop(256),
#     transforms.RandomHorizontalFlip(),
#     transforms.RandomVerticalFlip(),
    transforms.ColorJitter(contrast=(0.3, 1), saturation=(0.3, 1)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456,0.406], [0.229, 0.224, 0.225])
])

In [153]:
# Uploading image data
# ecg_data = datasets.ImageFolder(root = 'E:/RESEARCH/Datasets/wearable/ECG/test_0420/train', transform = data_transforms)
ecg_data = datasets.ImageFolder(root = 'E:/RESEARCH/Datasets/wearable/ECG/test_0420/train_crop', transform = data_transforms)

In [154]:
train_size = int(0.8 * len(ecg_data))
test_size = len(ecg_data)-train_size
print(train_size)
print(test_size)

600
151


In [155]:
train_dataset, test_dataset = torch.utils.data.random_split(ecg_data, [train_size, test_size])

In [156]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.bs, shuffle=True, num_workers=4)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.bs, shuffle=False, num_workers=4)

In [157]:
dataiter = iter(train_loader)
images, labels = dataiter.next()
print(labels)

tensor([2, 2, 1, 1, 1, 2, 0, 0, 0, 1, 0, 1, 1, 2, 2, 1])


In [162]:
# Setting Optimizer and Objective Function

optimizer = torch.optim.Adam(model.parameters(), lr = args.lr)
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.001, total_steps=50, anneal_strategy='cos')
criterion = nn.CrossEntropyLoss() ## setup the loss function

# print(model)

In [163]:
# Function for checking model performance during CNN model

def train(model, train_loader, optimizer, log_interval):
    model.train()
    print(optimizer.param_groups[0]['lr'])
    
    for batch_idx, (image, label) in enumerate(train_loader):
        image = image.to(DEVICE)
        label = label.to(DEVICE)
        optimizer.zero_grad()
        output = model(image)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()

        if batch_idx % log_interval == 0:
            print("Train Epoch: {} [{}/{} ({:.0f}%)]\tTrain Loss: {:.6f}".format(
                epoch, batch_idx * len(image), 
                len(train_loader.dataset), 100. * batch_idx / len(train_loader), 
                loss.item()))

    scheduler.step() #for learning rate scheduler

In [164]:
# Function for checking model performance during the learning process

def evaluate(model, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    validation =[]

    with torch.no_grad():
        for image, label in test_loader:
            image = image.to(DEVICE)
            label = label.to(DEVICE)
            output = model(image)
            test_loss += criterion(output, label).item()
            prediction = output.max(1, keepdim = True)[1]
            correct += prediction.eq(label.view_as(prediction)).sum().item()
    
    test_loss /= (len(test_loader)) 
    validation_accuracy = 100. * correct / len(test_loader.dataset)
    validation.append(validation_accuracy)
    
    return test_loss, validation_accuracy

In [165]:
# Checking train, val loss and accuracy

total = []

for epoch in range(1, args.epochs):
    train(model, train_loader, optimizer, log_interval = 200)
    test_loss, validation_accuracy = evaluate(model, test_loader)
    print("\n[EPOCH: {}], \tTest Loss: {:.4f}, \tValidation Accuracy: {:.2f} % \n".format(
        epoch, test_loss, validation_accuracy))
    
    total.append((test_loss, validation_accuracy))

3.9999999999999996e-05

[EPOCH: 1], 	Test Loss: 0.5056, 	Validation Accuracy: 78.15 % 

5.2034602152724665e-05

[EPOCH: 2], 	Test Loss: 0.4547, 	Validation Accuracy: 80.13 % 

8.753494340683887e-05

[EPOCH: 3], 	Test Loss: 0.4665, 	Validation Accuracy: 80.79 % 

0.00014472088841534561

[EPOCH: 4], 	Test Loss: 0.4565, 	Validation Accuracy: 79.47 % 

0.0002207248951078079

[EPOCH: 5], 	Test Loss: 0.4129, 	Validation Accuracy: 84.11 % 

0.0003117358052235721

[EPOCH: 6], 	Test Loss: 0.4509, 	Validation Accuracy: 80.79 % 

0.0004131899517009691

[EPOCH: 7], 	Test Loss: 0.4755, 	Validation Accuracy: 79.47 % 

0.0005200000000000001

[EPOCH: 8], 	Test Loss: 0.5066, 	Validation Accuracy: 79.47 % 

0.0006268100482990309

[EPOCH: 9], 	Test Loss: 0.4311, 	Validation Accuracy: 80.13 % 

0.0007282641947764279

[EPOCH: 10], 	Test Loss: 0.4608, 	Validation Accuracy: 80.79 % 

0.0008192751048921921

[EPOCH: 11], 	Test Loss: 0.5170, 	Validation Accuracy: 76.82 % 

0.0008952791115846543

[EPOCH: 12], 	T

## Ensemble methods

In [167]:
from torchensemble import VotingClassifier