In [1]:
import os

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import neurokit2 as nk
import seaborn as sns



### Aioros

In [4]:
df = pd.read_pickle(f'../workdata/miro/part_0.pkl')
df = df.groupby(['diag', 'db']).head(5)
df.groupby(['diag', 'db']).size()

diag  db                  
AF    WFDB_CPSC2018           5
      WFDB_CPSC2018_2         5
      WFDB_ChapmanShaoxing    5
      WFDB_Ga                 5
      WFDB_PTB                1
      WFDB_PTBXL              5
SR    WFDB_CPSC2018           5
      WFDB_ChapmanShaoxing    5
      WFDB_Ga                 5
      WFDB_Ningbo             5
      WFDB_PTB                5
      WFDB_PTBXL              5
dtype: int64

### Aioros Large

In [12]:
df = pd.read_pickle(f'../workdata/miro/part_0.pkl')
df = df.append(pd.read_pickle(f'../workdata/miro/part_1.pkl'))
df = df.append(pd.read_pickle(f'../workdata/miro/part_2.pkl'))

df.groupby(['diag', 'db']).size()
print(df.shape)
df.head()

(9951, 7)


Unnamed: 0,db,filename,diag,total_diags,db_freq,time,data
0,WFDB_CPSC2018,A4644.mat,SR,1,500,10.0,"[[28.56657366217581, 193.9291784767534, 189.77..."
1,WFDB_PTBXL,HR15248.mat,SR,1,500,10.0,"[[6.123519477705961, 123.8742732531606, 185.69..."
2,WFDB_Ga,E02092.mat,SR,1,500,10.0,"[[-26.92540258258549, 1.9379950802880224, -49...."
3,WFDB_PTBXL,HR18712.mat,SR,5,500,10.0,"[[19.638712349506292, -30.40160146972147, -18...."
4,WFDB_PTBXL,HR21119.mat,AF,4,500,10.0,"[[-15.478587022808028, 37.04396020085395, 22.4..."


### Aioros CPSC2018

In [3]:
parts = os.listdir(f'../workdata/miro/')

df = pd.DataFrame()

for p in parts:
    df_p = pd.read_pickle(f'../workdata/miro/{p}')
    df_p = df_p[df_p['db'] == 'WFDB_CPSC2018']
    df = df.append(df_p)

In [7]:
df.head()

Unnamed: 0,db,filename,diag,total_diags,db_freq,time,data
26536,WFDB_CPSC2018,A6609.mat,AF,1,500,10.0,"[[-11.830971765775502, 37.89624923013383, 20.6..."
26547,WFDB_CPSC2018,A1053.mat,AF,1,500,15.0,"[[-7.144098719139925, -56.02482888253132, -30...."
26599,WFDB_CPSC2018,A3818.mat,SR,1,500,10.0,"[[-6.718628122028384, -55.11755203799143, -102..."
26620,WFDB_CPSC2018,A3861.mat,AF,1,500,21.0,"[[22.404769340349155, 409.03335089669645, 356...."
26622,WFDB_CPSC2018,A0453.mat,SR,1,500,16.0,"[[19.038973970920708, 3.7749817864698345, -19...."


In [8]:
leads = ('I', 'II', 'III', 'aVR', 'aVL', 'aVF', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6')
sns.set(rc={'figure.figsize':(11.7,8.27)})
df2 = pd.DataFrame(columns=['diag', 'data'])

PRESENTATION_MODE = False

if PRESENTATION_MODE:
    if not os.path.exists(f'../workdata/aioros/SR/'):
        os.makedirs(f'../workdata/aioros/SR/')
    if not os.path.exists(f'../workdata/aioros/AF/'):
        os.makedirs(f'../workdata/aioros/AF/')
        
rows = df.shape[0]

for i, row in df.iterrows():
    print(f'{i}/{rows}')
    try:
        ecg_signal = row['data'][-2] # V5
        _, rpeaks = nk.ecg_peaks(ecg_signal, sampling_rate=100)
        ecg_r_peaks = rpeaks['ECG_R_Peaks']
        ecg_r_peaks = list(map(lambda x: x - 20, ecg_r_peaks))
        ecg_size = len(ecg_signal)
        ecg_r_peaks.insert(0, 0)
        ecg_r_peaks.insert(ecg_size, ecg_size)

        n_peaks = len(ecg_r_peaks)
        max_interval_RR = 0
        i_RR = 0

        for i_peak, peak in enumerate(ecg_r_peaks):
            if i_peak == n_peaks - 1:
                continue           
            interval_size = ecg_r_peaks[i_peak+1] - ecg_r_peaks[i_peak]
            if interval_size > max_interval_RR:
                max_interval_RR = interval_size
                i_RR = i_peak

        boundies = ecg_r_peaks[i_RR:i_RR+2]
        diag = row["diag"]
        new_row = {
            'db': row['db'],
            'filename': row['filename'],
            'diag': diag,
            'data': row['data'][:, boundies[0]:boundies[1]]
        }
        
        if PRESENTATION_MODE:
            if not os.path.exists(f'../workdata/aioros/{diag}/sample_{i}/'):
                os.makedirs(f'../workdata/aioros/{diag}/sample_{i}/')
            plot = sns.lineplot(
            x=np.arange(0, len(ecg_signal), 1, dtype=int),
            y=ecg_signal)
            plot.axvline(boundies[0], color='red')
            plot.axvline(boundies[1], color='red')
            fig = plot.get_figure()
            fig.set_size_inches(25,15)
            fig.savefig(f"../workdata/aioros/{diag}/sample_{i}/sample_v5.png", facecolor='white', transparent=False, dpi=300) 
            fig.clf()
            for j, lead in enumerate(leads):
                plot = sns.lineplot(x=np.arange(0, boundies[1]-boundies[0], 1, dtype=int), 
                                    y=new_row['data'][j])
                fig = plot.get_figure()
                fig.savefig(f"../workdata/aioros/{diag}/sample_{i}/lead_{lead}.png", facecolor='white', transparent=False, dpi=300) 
                fig.clf()
        df2 = df2.append(new_row, ignore_index=True)
    except Exception as e:
        print(e)
    
df2.to_pickle(f'../workdata/aioros/aioros_cpsc2018.pkl')

26536/2139
26547/2139
26599/2139
26620/2139
26622/2139
26624/2139
26625/2139
26626/2139
26629/2139
26632/2139
26644/2139
26665/2139
26691/2139
26736/2139
26774/2139
26777/2139
26793/2139
26830/2139
26833/2139
26839/2139
26851/2139
26863/2139
26864/2139
26887/2139
26901/2139
26903/2139
26923/2139
26954/2139
26998/2139
27008/2139
27017/2139
27033/2139
27042/2139
27060/2139
27066/2139
27080/2139
27107/2139
27124/2139
27130/2139
27134/2139
27136/2139
27150/2139
27170/2139
27181/2139
27207/2139
27214/2139
27249/2139
27252/2139
27274/2139
27293/2139
27334/2139
27341/2139
27346/2139
27369/2139
27378/2139
27382/2139
27389/2139
27399/2139
27411/2139
27422/2139
27430/2139
27437/2139
27446/2139
27456/2139
27475/2139
27498/2139
27553/2139
27558/2139
27576/2139
27596/2139
27620/2139
27630/2139
27652/2139
27653/2139
27680/2139
27729/2139
27759/2139
27836/2139
27869/2139
27872/2139
27885/2139
27903/2139
27953/2139
27968/2139
27997/2139
28021/2139
28023/2139
28026/2139
28035/2139
28097/2139
28104/2139

18806/2139
18808/2139
18815/2139
18824/2139
18833/2139
18846/2139
18855/2139
18867/2139
18873/2139
18895/2139
18929/2139
18959/2139
18990/2139
19002/2139
19006/2139
19008/2139
19030/2139
19033/2139
19035/2139
19040/2139
19044/2139
19066/2139
19089/2139
19090/2139
19091/2139
19093/2139
19114/2139
19122/2139
19134/2139
19138/2139
19157/2139
19195/2139
19209/2139
19221/2139
19252/2139
19294/2139
19329/2139
19355/2139
19365/2139
19369/2139
19370/2139
19371/2139
19375/2139
19380/2139
19381/2139
19397/2139
19407/2139
19419/2139
19424/2139
19428/2139
19436/2139
19451/2139
19452/2139
19466/2139
19480/2139
19487/2139
19491/2139
19495/2139
19503/2139
19505/2139
19531/2139
19593/2139
19600/2139
19601/2139
19614/2139
19617/2139
19625/2139
19629/2139
19631/2139
19638/2139
19652/2139
19654/2139
19659/2139
19665/2139
19680/2139
19697/2139
19712/2139
19713/2139
19716/2139
19738/2139
19741/2139
19745/2139
19760/2139
19761/2139
19765/2139
19777/2139
19791/2139
19811/2139
19813/2139
19827/2139
19829/2139

4082/2139
4083/2139
4109/2139
4147/2139
4151/2139
4165/2139
4173/2139
4198/2139
4202/2139
4212/2139
4216/2139
4246/2139
4252/2139
4294/2139
4305/2139
4353/2139
4370/2139
4374/2139
4429/2139
4435/2139
4440/2139
4465/2139
4476/2139
4488/2139
4489/2139
4495/2139
4507/2139
4510/2139
4512/2139
4517/2139
4519/2139
4545/2139
4549/2139
4577/2139
4592/2139
4630/2139
4646/2139
4657/2139
4664/2139
4672/2139
4684/2139
4695/2139
4702/2139
4706/2139
4710/2139
4715/2139
4722/2139
4726/2139
4774/2139
4792/2139
4810/2139
4814/2139
4826/2139
4831/2139
4838/2139
4850/2139
4863/2139
4875/2139
4911/2139
4934/2139
4935/2139
4949/2139
4955/2139
4985/2139
4990/2139
5044/2139
5048/2139
5063/2139
5066/2139
5073/2139
5074/2139
5091/2139
5100/2139
5122/2139
5138/2139
5139/2139
5141/2139
5158/2139
5161/2139
5196/2139
5225/2139
5232/2139
5236/2139
5240/2139
5250/2139
5265/2139
5336/2139
5374/2139
5382/2139
5384/2139
5414/2139
5430/2139
5437/2139
5442/2139
5450/2139
5458/2139
5474/2139
5495/2139
5506/2139
5525/2139
