# **Applied AI in Biomedicine - Final Assignment**
## PAC/PVC classification from ECG signals
***
* Alberto Rota: *Person Code: 10615751 - Student Number: 964662 - [alberto2.rota@mail.polimi.it](mailto:alberto2.rota@mail.polimi.it)*  
* Gabriele Santicchi: *Person Code: 10579046 - Student Number: 969088 - [gabriele.santicchi@mail.polimi.it](mailto:gabriele.santicchi@mail.polimi.it)*
***

In [74]:
#@title Set up the environment
import os
import numpy as np, os, sys, joblib
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault)
mpl.rc('font', family='serif') 
mpl.rc('font', serif='Computer Modern Roman') 
import seaborn as sns
import tarfile
import math
import random
import sklearn
import timeit
import json
import warnings
import random

import scipy
from scipy.io import loadmat
from random import randint

import tensorflow as tf
tfk = tf.keras
tfkl = tfk.layers


from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn import preprocessing 

SEED = 69
tf.random.set_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)
os.environ['PYTHONHASHSEED'] = str(SEED)

print("Using Tensorflow version",tf.__version__)
print("RNG seed:", SEED)

Using Tensorflow version 2.6.0
RNG seed: 69


## Data loading

In [75]:
dataset_dir = "C:\\Users\\alber\\Desktop\\Files\\Università\\V ANNO ACCADEMICO\\AI in Biomedicine\\Workshop\\PAC-PVC-Beat-Classifier-for-ECGs\\data"

In [83]:
class Patient:
    def __init__(self,init_dict):
        self.path=init_dict['path']
        self.fs=init_dict['fs']
        self.id=init_dict['id']
        self.samples=init_dict['samples']
        self.lead1=init_dict['lead1']
        self.lead2=init_dict['lead2']
        self.rpeaks=init_dict['rpeaks']
        self.peaklabels=init_dict['peaklabels']
        
    def info(self):
        print(">> ID:",self.id)
        print("   At:",self.path)
        print("   fs:",self.fs)
        print("   samples:",self.samples)
        print("   lead1:",self.lead1)
        print("   lead2:",self.lead2)
        print("   rpeaks:",self.rpeaks)
        print("   peaklabels:",self.peaklabels)
        
    def plot(self,windowstart=None,xrange=4000):
        if windowstart is None: windowstart = randint(0,self.samples-xrange)
        
        plt.figure(figsize=(10,2.5))
        plt.plot(range(0,self.fs,self.samples/self.fs), self.lead1,color='#073b4c')
        plt.scatter(self.rpeaks[self.peaklabels=='N']/self.fs,self.lead1[self.rpeaks[self.peaklabels=='N']],color='#06d6a0',s=500,alpha=0.5,label="Normal")
        plt.scatter(self.rpeaks[self.peaklabels=='S']/self.fs,self.lead1[self.rpeaks[self.peaklabels=='S']],color='#ef476f',s=500,alpha=0.5,label="SopraVentricular")
        plt.scatter(self.rpeaks[self.peaklabels=='V']/self.fs,self.lead1[self.rpeaks[self.peaklabels=='V']],color='#ffd166',s=500,alpha=0.5,label="Ventricular")
        plt.grid(True)
        plt.xlim([windowstart,windowstart+xrange])
        plt.title("ECG lead 1 - Patient",self.id)
        plt.legend()
        plt.show()
        plt.figure(figsize=(10,2.5))
        plt.plot(self.lead2,color='#073b4c')
        plt.scatter(self.rpeaks[self.peaklabels=='N'],self.lead2[self.rpeaks[self.peaklabels=='N']],color='#06d6a0',s=500,alpha=0.5,label="Normal")
        plt.scatter(self.rpeaks[self.peaklabels=='S'],self.lead2[self.rpeaks[self.peaklabels=='S']],color='#ef476f',s=500,alpha=0.5,label="SopraVentricular")
        plt.scatter(self.rpeaks[self.peaklabels=='V'],self.lead2[self.rpeaks[self.peaklabels=='V']],color='#ffd166',s=500,alpha=0.5,label="Ventricular")
        plt.grid(True)
        plt.xlim([windowstart,windowstart+xrange])
        plt.title("ECG lead 2 - Patient",self.id)
        plt.legend()
        plt.show()


In [84]:
files = os.listdir(dataset_dir)
p = []
correct,ncorrect=0,0
for f in range(0,len(files),3):
    try:
        recs = loadmat(os.path.join("data",files[f]))
        ann = loadmat(os.path.join("data",files[f+1]))
        rp =loadmat(os.path.join("data",files[f+2]))
        patient_as_dict={
                'id':files[f].split(".")[0].split("_")[0],
                'path':os.path.join("data",files[f]),
                'fs':int(files[f].split(".")[0].split("_")[1]),
                'samples':recs['ecg'][:,0].shape[0],
                'lead1':recs['ecg'][:,0],
                'lead2':recs['ecg'][:,1],
                'rpeaks':rp['rpeaks'].T[0],
                'peaklabels':ann['labels'] 
        }
        p.append(Patient(patient_as_dict))
        correct+=1
        
    except:
        ncorrect+=1
print(f"Loaded {correct} files correctly")
if ncorrect>0: print(f"!!!! Error in loading {ncorrect} files")

Loaded 105 files correctly


In [85]:
p[0].info()
p[10].plot()

>> ID: S001
   At: data\S001_128.mat
   fs: 128
   samples: 230400
   lead1: [ 0.    0.04  0.03 ...  0.03 -0.01  0.  ]
   lead2: [ 0.08  0.07  0.1  ...  0.01 -0.03 -0.03]
   rpeaks: [    29    110    191 ... 230184 230261 230338]
   peaklabels: ['N' 'N' 'N' ... 'N' 'N' 'N']


ValueError: x and y must have same first dimension, but have shapes (1,) and (230400,)

def 

IndexError: boolean index did not match indexed array along dimension 0; dimension is 1 but corresponding boolean dimension is 2851

In [4]:
#@title Utility Functions
from scipy.io import loadmat

def load_data(header_file):
    with open(header_file, 'r') as f:
        header = f.readlines()
    mat_file = header_file.replace('.hea', '.mat')
    x = loadmat(mat_file)
    recording = np.asarray(x['val'], dtype=np.float32)
    return recording, header

def load_header(header_file):
    with open(header_file, 'r') as f:
        header = f.readlines()
    return header


def get_labels(header):
    classes = list()
    tmp = header[15]
    classes.append(tmp[5:-1]) 
    return classes   


In [6]:
# Get name of header files contained in the folder
input_dir = 'C:\\Users\\alber\\Desktop\\Files\\Università\\V ANNO ACCADEMICO\\AI in Biomedicine\\Workshop\\PAC-PVC-Beat-Classifier-for-ECGs\\training_set'
filenames = []
headers = list ()
for file in os.listdir(input_dir):
    file_path = os.path.join(input_dir, file)
    if not file.lower().startswith('.') and file.lower().endswith('hea') and os.path.isfile(file_path):
        filenames.append(file_path)
        with open(file_path, 'r') as f:
          headers.append(f.readlines())
num_files = len(headers)
print(num_files)

0
