In [24]:
import os
import random
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [2]:
data_path = os.path.join(os.getcwd(), "ECGData")
df = pd.read_excel("Diagnostics.xlsx")

In [3]:
def getFrames(files):
    frames = []
    col_names = ['I', 'II', 'III', 'aVR', 'aVL', 'aVF', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6']
    print("[INFO] Loading data...")

    for file in files:
        csv_path = os.path.join(data_path, file)
        df = pd.read_csv(csv_path)

        row1 = df.columns
        df.columns = col_names
        record = {}

        for i in range(12):
            lst = row1[i].split('.')
            if(len(lst) >= 3):
                num = lst[0] + "." + lst[1]
                record[col_names[i]] = float(num)
            else: 
                record[col_names[i]] = float(row1[i])

        record = pd.DataFrame(record, index=[0])
        df = pd.concat([record, df])
        frames.append(df)  
    
    return frames

In [4]:
df

Unnamed: 0,FileName,Rhythm,Beat,PatientAge,Gender,VentricularRate,AtrialRate,QRSDuration,QTInterval,QTCorrected,RAxis,TAxis,QRSCount,QOnset,QOffset,TOffset
0,MUSE_20180113_171327_27000,AFIB,RBBB TWC,85,MALE,117,234,114,356,496,81,-27,19,208,265,386
1,MUSE_20180112_073319_29000,SB,TWC,59,FEMALE,52,52,92,432,401,76,42,8,215,261,431
2,MUSE_20180111_165520_97000,SA,NONE,20,FEMALE,67,67,82,382,403,88,20,11,224,265,415
3,MUSE_20180113_121940_44000,SB,NONE,66,MALE,53,53,96,456,427,34,3,9,219,267,447
4,MUSE_20180112_122850_57000,AF,STDD STTC,73,FEMALE,162,162,114,252,413,68,-40,26,228,285,354
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10641,MUSE_20181222_204306_99000,SVT,NONE,80,FEMALE,196,73,168,284,513,258,244,32,177,261,319
10642,MUSE_20181222_204309_22000,SVT,NONE,81,FEMALE,162,81,162,294,482,110,-75,27,173,254,320
10643,MUSE_20181222_204310_31000,SVT,NONE,39,MALE,152,92,152,340,540,250,38,25,208,284,378
10644,MUSE_20181222_204312_58000,SVT,NONE,76,MALE,175,178,128,310,529,98,-83,29,205,269,360


## Extracting datasets names

In [5]:
afib_dataset = df[(df["Rhythm"] == "AFIB") | (df["Rhythm"] == "AF")]
afib_dataset.shape

(2225, 16)

In [6]:
gsvt_dataset = df[(df["Rhythm"] == "SVT") | (df["Rhythm"] == "AT") | (df["Rhythm"] == "SAAWR") 
                  | (df["Rhythm"] == "ST") | (df["Rhythm"] == "AVNRT") | (df["Rhythm"] == "AVRT")]
gsvt_dataset.shape

(2307, 16)

In [7]:
sb_dataset = df[df["Rhythm"] == "SB"]
sb_dataset.shape

(3889, 16)

In [8]:
sr_dataset = df[(df["Rhythm"] == "SR") | (df["Rhythm"] == "SI")]
sr_dataset.shape

(1826, 16)

## Extracting files names

In [9]:
afib_files = np.array(afib_dataset["FileName"])
afib_files = afib_files + ".csv"
np.random.shuffle(afib_files)
afib_train_files, afib_test_files = train_test_split(afib_files, test_size = 0.20)
print(f"Total:{len(afib_files)}, Train:{len(afib_train_files)}, Test:{len(afib_test_files)}")

Total:2225, Train:1780, Test:445


In [10]:
gsvt_files = np.array(gsvt_dataset["FileName"])
gsvt_files = gsvt_files + ".csv"
np.random.shuffle(gsvt_files)
gsvt_train_files, gsvt_test_files = train_test_split(gsvt_files, test_size = 0.20)
print(f"Total:{len(gsvt_files)}, Train:{len(gsvt_train_files)}, Test:{len(gsvt_test_files)}")

Total:2307, Train:1845, Test:462


In [11]:
sb_files = np.array(sb_dataset["FileName"])
sb_files = sb_files + ".csv"
np.random.shuffle(sb_files)
sb_train_files, sb_test_files = train_test_split(sb_files, test_size = 0.20)
print(f"Total:{len(sb_files)}, Train:{len(sb_train_files)}, Test:{len(sb_test_files)}")

Total:3889, Train:3111, Test:778


In [12]:
sr_files = np.array(sr_dataset["FileName"])
sr_files = sr_files + ".csv"
np.random.shuffle(sr_files)
sr_train_files, sr_test_files = train_test_split(sr_files, test_size = 0.20)
print(f"Total:{len(sr_files)}, Train:{len(sr_train_files)}, Test:{len(sr_test_files)}")

Total:1826, Train:1460, Test:366


In [13]:
afib_train_files.shape

(1780,)

In [14]:
x = afib_train_files[:50]
len(x)
x

array(['MUSE_20180113_133103_27000.csv', 'MUSE_20180116_132337_80000.csv',
       'MUSE_20180112_165701_83000.csv', 'MUSE_20180118_130022_09000.csv',
       'MUSE_20180114_134537_10000.csv', 'MUSE_20180113_074533_79000.csv',
       'MUSE_20180712_152013_25000.csv', 'MUSE_20180712_160114_86000.csv',
       'MUSE_20180118_123858_80000.csv', 'MUSE_20180120_123606_25000.csv',
       'MUSE_20180113_122611_39000.csv', 'MUSE_20180114_132336_04000.csv',
       'MUSE_20180712_152436_45000.csv', 'MUSE_20180118_180015_52000.csv',
       'MUSE_20180118_180427_36000.csv', 'MUSE_20180116_171441_37000.csv',
       'MUSE_20180114_133533_28000.csv', 'MUSE_20180114_120933_46000.csv',
       'MUSE_20180712_161228_84000.csv', 'MUSE_20180116_133520_60000.csv',
       'MUSE_20180115_115706_86000.csv', 'MUSE_20180116_122854_54000.csv',
       'MUSE_20180114_114956_44000.csv', 'MUSE_20180114_125554_54000.csv',
       'MUSE_20180113_073913_53000.csv', 'MUSE_20180113_130101_56000.csv',
       'MUSE_20180112_122

In [15]:
afib_dataframes = getFrames(afib_train_files)
afib_train_data = pd.concat(afib_dataframes, ignore_index = True)

[INFO] Loading data...


In [16]:
afib_train_data

Unnamed: 0,I,II,III,aVR,aVL,aVF,V1,V2,V3,V4,V5,V6
0,-54.18200,9.4272,70.9660,18.33000,-64.4820,28.3860,0.77393,-109.120,-100.650,-94.229,-59.3970,-40.352
1,-53.50200,8.5758,69.0650,18.64100,-63.4460,26.9840,0.99067,-108.580,-99.196,-92.056,-59.0850,-39.820
2,-52.90100,7.1787,66.6980,19.28200,-62.2180,25.0600,1.57490,-105.620,-94.785,-87.254,-57.0040,-38.297
3,-52.32700,4.9445,63.5240,20.38200,-60.5770,22.2670,2.73790,-98.590,-85.606,-78.315,-52.0000,-35.155
4,-51.61400,1.9871,59.4890,21.77800,-58.3650,18.6070,4.47740,-87.220,-71.696,-65.458,-43.9680,-30.339
...,...,...,...,...,...,...,...,...,...,...,...,...
8899995,-1.67620,2.9017,2.6111,-0.21039,-1.9172,2.5308,-15.04000,-77.929,-58.337,-45.076,-10.1370,10.095
8899996,-2.81270,6.6085,7.4006,-1.57610,-5.1307,6.6757,-11.91300,-70.911,-54.451,-40.429,-8.9298,11.442
8899997,-2.56310,11.9050,12.3930,-4.55300,-7.8803,11.7410,-9.05280,-65.168,-51.947,-36.514,-7.3996,13.915
8899998,-0.77931,18.5320,17.1820,-9.07710,-9.9059,17.3260,-6.56660,-61.207,-50.797,-33.593,-5.7342,17.359


In [17]:
# sns.displot(data = afib_train_data)

In [55]:
print(int(random.random() * len(gsvt_train_files)))
gsvt_train_files[int(random.random() * len(gsvt_train_files))]

647


'MUSE_20180113_133223_30000.csv'

In [38]:
len(gsvt_train_files)

1845

In [58]:
sr_train_files[:50]

array(['MUSE_20180209_174646_51000.csv', 'MUSE_20180209_112119_71000.csv',
       'MUSE_20180209_173611_82000.csv', 'MUSE_20180210_115929_22000.csv',
       'MUSE_20180210_123111_40000.csv', 'MUSE_20180210_132310_12000.csv',
       'MUSE_20180210_121655_81000.csv', 'MUSE_20180209_123631_33000.csv',
       'MUSE_20180210_121321_35000.csv', 'MUSE_20180210_123325_86000.csv',
       'MUSE_20180210_133123_70000.csv', 'MUSE_20180210_124246_00000.csv',
       'MUSE_20180210_133119_41000.csv', 'MUSE_20180209_171824_11000.csv',
       'MUSE_20180210_132948_45000.csv', 'MUSE_20180209_132551_67000.csv',
       'MUSE_20180210_130006_72000.csv', 'MUSE_20180209_125344_16000.csv',
       'MUSE_20180209_175222_29000.csv', 'MUSE_20180209_112139_49000.csv',
       'MUSE_20180209_115942_80000.csv', 'MUSE_20180209_170424_19000.csv',
       'MUSE_20180209_125208_73000.csv', 'MUSE_20180209_115556_82000.csv',
       'MUSE_20180210_115458_96000.csv', 'MUSE_20180210_122938_48000.csv',
       'MUSE_20180209_131