In [1]:
import pandas as pd
import numpy as np
import cv2
import matplotlib.pyplot as plt
import os.path, sys, re
import time
from PIL import Image

from sktime.utils.data_processing import (
    from_3d_numpy_to_nested,
    from_multi_index_to_3d_numpy,
    from_nested_to_3d_numpy,
    from_multi_index_to_nested,
    from_nested_to_multi_index,
)


In [2]:
cd ~/Desktop/Data_Science/Erdos_Institute/ecg-proj/ecg-copy/

/home/moise/Desktop/Data_Science/Erdos_Institute/ecg-proj/ecg-copy


In [3]:
pathroot = "CSV_data_v2/"

In [4]:
LeadDict={'Lead1':np.array([[]]),'Lead2':np.array([[]]),'Lead3':np.array([[]]),'Lead4':np.array([[]]),
          'Lead5':np.array([[]]),'Lead6':np.array([[]]),'Lead7':np.array([[]]),'Lead8':np.array([[]]),
         'Lead9':np.array([[]]),'Lead10':np.array([[]]),'Lead11':np.array([[]]),'Lead12':np.array([[]])}

In [5]:
leadMinLen = {'Lead1':0,'Lead2':0,'Lead3':0,'Lead4':0,'Lead5':0,'Lead6':0,
           'Lead7':0,'Lead8':0,'Lead9':0,'Lead10':0,'Lead11':0,'Lead12':0}

In [6]:
ClassLabels={'ECGImagesofPatientthathaveHistoryofMI':0,'ECGImagesofPatientthathaveabnormalheartbeat':1,
            'ECGImagesofCOVID-19Patients':2,'NormalPersonECGImages':3,'ECGImagesofMyocardialInfarctionPatients':4}

In [7]:
"""
Note:
-----

1) For the "time series", only the second column of every lead is extracted as signal.
2) Because of non-uniformity in signal lenght across both observations and Leads, the code
uses the minimum signal length across observation and leads, in order to make the date "proper
for multivariate time series classification.
"""

labelArr = np.array([])
for dirs in os.listdir(pathroot):
#     if dirs == 'ECGImagesofCOVID-19Patients':
#         continue
    t = time.time()
    count = 0
    print('Processing {0} folder ...'.format(dirs))
    if not os.path.isfile(dirs):
        for item in os.listdir(os.path.join(pathroot,dirs)):
            #print('Processing {0} file ...'.format(item))
            of, oe = os.path.splitext(item)
            if of[0]=='.':
                continue
            else:
                signal = pd.read_csv(os.path.join(pathroot,os.path.join(dirs,item)),header=None,sep=' ')[1].to_numpy()
            try:
                leadNum = int(of[-2:])
            except ValueError:
                leadNum = int(of[-1:])
            finally:
                leadKey = 'Lead'+str(leadNum)
            if leadNum == 13:
                continue
            if LeadDict[leadKey].shape[1] > 0:
                if len(signal)> leadMinLen[leadKey]:
                    signal = np.reshape(signal[:leadMinLen[leadKey]],(1,leadMinLen[leadKey]))
                    LeadDict[leadKey] = np.concatenate( (LeadDict[leadKey],signal) )
                else:
                    LeadDict[leadKey] = LeadDict[leadKey][:,:len(signal)]
                    LeadDict[leadKey] = np.concatenate( (LeadDict[leadKey],np.reshape(signal,(1,len(signal)))) )
                    leadMinLen[leadKey] = len(signal) 
            else:
                LeadDict[leadKey] = np.reshape(signal,(1,len(signal)))
                leadMinLen[leadKey] = len(signal)         
            count = count+1
        labelArr = np.append(labelArr,np.repeat(ClassLabels[dirs],len(LeadDict[leadKey])-len(labelArr))) ##Add labels
        t = time.time()-t
        print('{0} files processed in this folder in {1} sec...\n'.format(count,round(t)))    

Processing ECGImagesofPatientthathaveHistoryofMI folder ...
2064 files processed in this folder in 7 sec...

Processing ECGImagesofPatientthathaveabnormalheartbeat folder ...
2796 files processed in this folder in 11 sec...

Processing ECGImagesofCOVID-19Patients folder ...
3000 files processed in this folder in 5 sec...

Processing NormalPersonECGImages folder ...
3408 files processed in this folder in 8 sec...

Processing ECGImagesofMyocardialInfarctionPatients folder ...
2868 files processed in this folder in 7 sec...



In [8]:
"""
Post Processing:
---------------

1) Put all the leads on the same "time" scale/Length
2) Pull all the leads(2D) into a 3D array of shape (n_obs,n_col,n_timepoints)
3) Convert result from step 2 into a nested data frame.
"""

minLen = min(leadMinLen.values())
for key in LeadDict.keys():
    LeadDict[key] = LeadDict[key][:,:minLen]

dim1 = LeadDict['Lead1'].shape[0]
dim2 = len(LeadDict.keys())
dim3 = LeadDict['Lead1'].shape[1]
X3d = np.zeros((dim1,dim2,dim3))
for j in range(dim2):
    X3d[:,j,:] = list(LeadDict.values())[j]
    
X3d_nested=from_3d_numpy_to_nested(X3d)
X3d_nested.columns = list(LeadDict.keys())
X3d_nested['Label'] = labelArr
X3d_nested.to_csv('muti-lead-dataFrame.csv',index=False,float_format='%d')

In [9]:
X3d_nested

Unnamed: 0,Lead1,Lead2,Lead3,Lead4,Lead5,Lead6,Lead7,Lead8,Lead9,Lead10,Lead11,Lead12,Label
0,0 108.0 1 108.0 2 109.0 3 ...,0 103.0 1 103.0 2 103.0 3 ...,0 13.0 1 13.0 2 13.0 3 53....,0 12.0 1 12.0 2 13.0 3 13....,0 138.0 1 138.0 2 138.0 3 ...,0 103.0 1 103.0 2 103.0 3 ...,0 135.0 1 135.0 2 135.0 3 ...,0 150.0 1 150.0 2 150.0 3 ...,0 140.0 1 140.0 2 140.0 3 ...,0 151.0 1 151.0 2 151.0 3 ...,0 0.0 1 0.0 2 0.0 3 0....,0 78.0 1 78.0 2 79.0 3 79....,0.0
1,0 122.0 1 122.0 2 122.0 3 ...,0 100.0 1 100.0 2 100.0 3 ...,0 78.0 1 78.0 2 78.0 3 78....,0 48.0 1 48.0 2 49.0 3 49....,0 120.0 1 120.0 2 120.0 3 ...,0 124.0 1 124.0 2 125.0 3 ...,0 128.0 1 128.0 2 128.0 3 ...,0 154.0 1 154.0 2 154.0 3 ...,0 126.0 1 126.0 2 126.0 3 ...,0 144.0 1 144.0 2 144.0 3 ...,0 0.0 1 0.0 2 0.0 3 0.0 4 ...,0 90.0 1 90.0 2 91.0 3 ...,0.0
2,0 137.0 1 137.0 2 137.0 3 ...,0 101.0 1 101.0 2 101.0 3 ...,0 102.0 1 102.0 2 102.0 3 ...,0 96.0 1 96.0 2 96.0 3 ...,0 2.0 1 41.0 2 77.0 3 77....,0 2.0 1 41.0 2 81.0 3 ...,0 2.0 1 2.0 2 2.0 3 ...,0 81.0 1 81.0 2 82.0 3 82....,0 139.0 1 139.0 2 139.0 3 ...,0 154.0 1 154.0 2 155.0 3 ...,0 144.0 1 144.0 2 144.0 3 ...,0 87.0 1 87.0 2 88.0 3 88....,0.0
3,0 105.0 1 105.0 2 106.0 3 ...,0 99.0 1 99.0 2 100.0 3 ...,0 103.0 1 103.0 2 103.0 3 ...,0 52.0 1 52.0 2 52.0 3 53....,0 115.0 1 115.0 2 115.0 3 ...,0 123.0 1 123.0 2 124.0 3 ...,0 129.0 1 129.0 2 130.0 3 ...,0 153.0 1 153.0 2 153.0 3 ...,0 139.0 1 139.0 2 139.0 3 ...,0 108.0 1 108.0 2 108.0 3 ...,0 0.0 1 0.0 2 0.0 3 0.0 4 ...,0 111.0 1 112.0 2 112.0 3 ...,0.0
4,0 137.0 1 137.0 2 137.0 3 ...,0 105.0 1 105.0 2 105.0 3 ...,0 104.0 1 104.0 2 104.0 3 ...,0 41.0 1 41.0 2 41.0 3 42....,0 135.0 1 135.0 2 135.0 3 ...,0 151.0 1 151.0 2 152.0 3 ...,0 129.0 1 129.0 2 129.0 3 ...,0 118.0 1 118.0 2 118.0 3 ...,0 59.0 1 99.0 2 139.0 3 ...,0 127.0 1 127.0 2 127.0 3 ...,0 128.0 1 128.0 2 129.0 3 ...,0 125.0 1 125.0 2 125.0 3 ...,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1173,0 126.0 1 126.0 2 126.0 3 ...,0 98.0 1 98.0 2 98.0 3 ...,0 105.0 1 105.0 2 105.0 3 ...,0 35.0 1 35.0 2 36.0 3 36....,0 94.0 1 94.0 2 95.0 3 ...,0 138.0 1 138.0 2 138.0 3 ...,0 80.0 1 80.0 2 81.0 3 81....,0 123.0 1 123.0 2 123.0 3 ...,0 136.0 1 136.0 2 136.0 3 ...,0 131.0 1 131.0 2 131.0 3 ...,0 0.0 1 0.0 2 0.0 3 0.0 4 ...,0 133.0 1 133.0 2 134.0 3 ...,4.0
1174,0 62.0 1 62.0 2 62.0 3 63....,0 101.0 1 101.0 2 101.0 3 ...,0 86.0 1 86.0 2 86.0 3 86....,0 69.0 1 69.0 2 69.0 3 70....,0 48.0 1 48.0 2 49.0 3 49....,0 99.0 1 99.0 2 99.0 3 ...,0 2.0 1 2.0 2 2.0 3 41....,0 118.0 1 118.0 2 118.0 3 ...,0 85.0 1 85.0 2 85.0 3 ...,0 133.0 1 133.0 2 134.0 3 ...,0 59.0 1 99.0 2 135.0 3 ...,0 105.0 1 105.0 2 105.0 3 ...,4.0
1175,0 118.0 1 119.0 2 119.0 3 ...,0 100.0 1 100.0 2 101.0 3 ...,0 95.0 1 95.0 2 95.0 3 95....,0 29.0 1 29.0 2 30.0 3 30....,0 136.0 1 136.0 2 137.0 3 ...,0 129.0 1 129.0 2 129.0 3 ...,0 80.0 1 80.0 2 81.0 3 81....,0 133.0 1 133.0 2 133.0 3 ...,0 85.0 1 85.0 2 85.0 3 ...,0 154.0 1 154.0 2 154.0 3 ...,0 133.0 1 133.0 2 133.0 3 ...,0 110.0 1 110.0 2 111.0 3 ...,4.0
1176,0 114.0 1 115.0 2 115.0 3 ...,0 98.0 1 98.0 2 98.0 3 ...,0 88.0 1 88.0 2 89.0 3 89....,0 69.0 1 69.0 2 69.0 3 70....,0 2.0 1 41.0 2 81.0 3 ...,0 117.0 1 117.0 2 118.0 3 ...,0 117.0 1 117.0 2 117.0 3 ...,0 109.0 1 109.0 2 109.0 3 ...,0 128.0 1 128.0 2 129.0 3 ...,0 17.0 1 17.0 2 18.0 3 18....,0 115.0 1 115.0 2 115.0 3 ...,0 101.0 1 101.0 2 102.0 3 ...,4.0
