In [None]:
%matplotlib inline
from collections import defaultdict
import h5py
import numpy as np
import os

from biosppy.signals import ecg
import pandas as pd

# IPython imports
from IPython.display import Image

In [None]:
from ml4cvd.arguments import _get_tmap
from ml4cvd.tensor_generators import get_test_train_valid_paths

import matplotlib
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from matplotlib.ticker import (AutoMinorLocator, MultipleLocator)

tm_ramp = _get_tmap('ecg_rest_ramplitude_raw')
tm_samp = _get_tmap('ecg_rest_samplitude_raw')
tm_ecg_rest_raw = _get_tmap('ecg_rest_raw')
tm_ecg_median_raw = _get_tmap('ecg_rest_median_raw')
tm_ecg_lvh_avl = _get_tmap('ecg_rest_lvh_avl')
tm_ecg_lvh_sokolow_lyon = _get_tmap('ecg_rest_lvh_sokolow_lyon')
tm_ecg_lvh_cornell = _get_tmap('ecg_rest_lvh_cornell')
tm_lvh_fine = _get_tmap('lvh_fine')
train_paths, valid_paths, test_paths = get_test_train_valid_paths('/mnt/disks/ecg-rest-37k-tensors/2019-11-04', 0.1, 0.1, 10)

In [None]:
import h5py
df_dic = {'patient_id': [], 'sex': [], 'LVH': [], 'aVL': [], 'Sokolow_Lyon': [], 'Cornell': [],
         'full_path': [], 'ecg_text': [], 'ramp': [], 'samp': [], 'LVH_fine': [], 'minimal_voltage': [],
         'moderate_voltage': [], 'voltage_criteria': []}
all_paths = train_paths+valid_paths+test_paths
cnt = 0
for train_path in all_paths:
    hd5 = h5py.File(train_path, 'r')    
    try:
        ramp = tm_ramp.tensor_from_file(tm_ramp, hd5)
        samp = tm_samp.tensor_from_file(tm_samp, hd5)
        ecg_rest = tm_ecg_rest_raw.tensor_from_file(tm_ecg_rest_raw, hd5)
        ecg_median = tm_ecg_median_raw.tensor_from_file(tm_ecg_median_raw, hd5)
        lvh_avl = tm_ecg_lvh_avl.tensor_from_file(tm_ecg_lvh_avl, hd5)    
        lvh_sokolow_lyon = tm_ecg_lvh_sokolow_lyon.tensor_from_file(tm_ecg_lvh_sokolow_lyon, hd5)
        lvh_cornell = tm_ecg_lvh_cornell.tensor_from_file(tm_ecg_lvh_cornell, hd5)
        lvh_fine_list = tm_lvh_fine.tensor_from_file(tm_lvh_fine, hd5)
    except ValueError as e :
        print(train_path, str(e))
        continue
        
    is_female = 'Genetic-sex_Female_0_0' in hd5['categorical']                                                                          
    is_male   = 'Genetic-sex_Male_0_0' in hd5['categorical']                                                                            
    # If genetic sex not available, try phenotypic                                                                                      
    if not(is_female or is_male):                                                                                                       
        is_female = 'Sex_Female_0_0' in hd5['categorical']                                                                              
        is_male   = 'Sex_Male_0_0' in hd5['categorical']                                                                                
    # If neither available, raise error                                                                                                 
    if not(is_female or is_male):                                                                                                       
        raise ValueError('Sex info required to evaluate LVH criteria')     
    df_dic['sex'].append('M')
    if is_female : 
        df_dic['sex'][-1] = 'F'
    
    df_dic['patient_id'].append(train_path.split('/')[-1].split('.hd5')[0])
    df_dic['full_path'].append(train_path)            
    df_dic['ecg_text'].append(hd5['ecg_rest_text'][0])
        
    df_dic['ramp'].append(ramp)
    df_dic['samp'].append(samp)
    
    df_dic['LVH'].append(0.0)
    if ('Left ventricular hypertrophy' in hd5['ecg_rest_text'][0]) or \
       ('LVH' in hd5['ecg_rest_text'][0]) or ('left ventricular hypertrophy' in hd5['ecg_rest_text'][0]): 
        df_dic['LVH'][-1] = 1.0
    
    df_dic['minimal_voltage'].append(0.0)
    df_dic['moderate_voltage'].append(0.0)
    df_dic['voltage_criteria'].append(0.0)
    df_dic['LVH_fine'].append(0.0)
    if lvh_fine_list[1] > 0.5 : df_dic['minimal_voltage'][-1] = 1.0
    if lvh_fine_list[2] > 0.5 : df_dic['moderate_voltage'][-1] = 1.0
    if lvh_fine_list[3] > 0.5 : df_dic['voltage_criteria'][-1] = 1.0
    if lvh_fine_list[4] > 0.5 : df_dic['LVH_fine'][-1] = 1.0
        
    df_dic['aVL'].append(0.0)
    df_dic['Sokolow_Lyon'].append(0.0)
    df_dic['Cornell'].append(0.0)
    if lvh_avl[1] > 0.5 : df_dic['aVL'][-1] = 1.0
    if lvh_sokolow_lyon[1] > 0.5 : df_dic['Sokolow_Lyon'][-1] = 1.0
    if lvh_cornell[1] > 0.5 : df_dic['Cornell'][-1] = 1.0
        
    #cnt += 1
    #if cnt == 1000 : break
        
df = pd.DataFrame(df_dic)
df.to_csv('/home/pdiachil/ecg_views_lvh_11_04_2019.csv')

In [None]:
from ml4cvd.plots import _ecg_rest_csv_to_df
#df = pd.read_csv('/home/pdiachil/ecg_views_lvh_11_04_2019.csv')
df = _ecg_rest_csv_to_df('/home/pdiachil/ecg_views_lvh_11_04_2019.csv')
#literal_eval(df['ramp'].iloc[0].split())

In [None]:
from ml4cvd.plots import _ecg_rest_traces, _subplot_ecg_rest, _ecg_rest_yrange
from ml4cvd.plots import ECG_REST_PLOT_LEADS, ECG_REST_PLOT_MEDIAN_LEADS

raw_scale = 0.005
default_yrange = 3.0
time_interval = 2.5
matplotlib.rcParams.update({'font.size': 20})

f, ax = plt.subplots(nrows=6, ncols=4, figsize=(24,18), tight_layout=True)
with h5py.File(df.iloc[0]['full_path'], 'r') as hd5:
    traces = _ecg_rest_traces(hd5)
yrange = _ecg_rest_yrange(traces, default_yrange, raw_scale, time_interval)
_subplot_ecg_rest(traces, raw_scale, time_interval, ECG_REST_PLOT_LEADS, f, ax, yrange,
                 offset=3, pat_df=None, is_median=False, is_blind=False)
_subplot_ecg_rest(traces, raw_scale, time_interval, ECG_REST_PLOT_MEDIAN_LEADS, f, ax, yrange,
                 offset=0, pat_df=df.iloc[0], is_median=True, is_blind=False)

# aVL Criterion

In [None]:
pp = df[df['aVL']>0.5].iloc[0]
f, ax = plt.subplots(nrows=6, ncols=4, figsize=(24,18), tight_layout=True)
with h5py.File(pp['full_path'], 'r') as hd5:
    traces = _ecg_rest_traces(hd5)
yrange = _ecg_rest_yrange(traces, default_yrange, raw_scale, time_interval)
_subplot_ecg_rest(traces, raw_scale, time_interval, ECG_REST_PLOT_LEADS, f, ax, yrange,
                 offset=3, pat_df=None, is_median=False, is_blind=False)
_subplot_ecg_rest(traces, raw_scale, time_interval, ECG_REST_PLOT_MEDIAN_LEADS, f, ax, yrange,
                 offset=0, pat_df=pp, is_median=True, is_blind=False)
pp

# Sokolow-Lyon Criterion

In [None]:
pp = df[df['Sokolow_Lyon']>0.5].iloc[0]
f, ax = plt.subplots(nrows=6, ncols=4, figsize=(24,18), tight_layout=True)
with h5py.File(pp['full_path'], 'r') as hd5:
    traces = _ecg_rest_traces(hd5)
yrange = _ecg_rest_yrange(traces, default_yrange, raw_scale, time_interval)
_subplot_ecg_rest(traces, raw_scale, time_interval, ECG_REST_PLOT_LEADS, f, ax, yrange,
                 offset=3, pat_df=None, is_median=False, is_blind=False)
_subplot_ecg_rest(traces, raw_scale, time_interval, ECG_REST_PLOT_MEDIAN_LEADS, f, ax, yrange,
                 offset=0, pat_df=pp, is_median=True, is_blind=False)
pp

# Cornell criterion

In [None]:
pp = df[df['Cornell']>0.5].iloc[5]
f, ax = plt.subplots(nrows=6, ncols=4, figsize=(24,18), tight_layout=True)
with h5py.File(pp['full_path'], 'r') as hd5:
    traces = _ecg_rest_traces(hd5)
yrange = _ecg_rest_yrange(traces, default_yrange, raw_scale, time_interval)
_subplot_ecg_rest(traces, raw_scale, time_interval, ECG_REST_PLOT_LEADS, f, ax, yrange,
                 offset=3, pat_df=None, is_median=False, is_blind=False)
_subplot_ecg_rest(traces, raw_scale, time_interval, ECG_REST_PLOT_MEDIAN_LEADS, f, ax, yrange,
                 offset=0, pat_df=pp, is_median=True, is_blind=False)
pp

# Control

In [None]:
pp = df[df['LVH']<0.5].iloc[0]
f, ax = plt.subplots(nrows=6, ncols=4, figsize=(24,18), tight_layout=True)
with h5py.File(pp['full_path'], 'r') as hd5:
    traces = _ecg_rest_traces(hd5)
yrange = _ecg_rest_yrange(traces, default_yrange, raw_scale, time_interval)
_subplot_ecg_rest(traces, raw_scale, time_interval, ECG_REST_PLOT_LEADS, f, ax, yrange,
                 offset=3, pat_df=None, is_median=False, is_blind=False)
_subplot_ecg_rest(traces, raw_scale, time_interval, ECG_REST_PLOT_MEDIAN_LEADS, f, ax, yrange,
                 offset=0, pat_df=pp, is_median=True, is_blind=False)
pp

In [None]:
import seaborn as sns
matplotlib.rcParams.update({'font.size': 20})
cat_labels = ['minimal', 'moderate', 'voltage', 'LVH', 'any LVH', 'SL', 'aVL', 'Cornell']

In [None]:
pp = df[df['patient_id'] == '2926169'].iloc[0]
f, ax = plt.subplots(nrows=6, ncols=4, figsize=(24,18), tight_layout=True)
with h5py.File(pp['full_path'], 'r') as hd5:
    traces = _ecg_rest_traces(hd5)
yrange = _ecg_rest_yrange(traces, default_yrange, raw_scale, time_interval)
_subplot_ecg_rest(traces, raw_scale, time_interval, ECG_REST_PLOT_LEADS, f, ax, yrange,
                 offset=3, pat_df=None, is_median=False, is_blind=False)
_subplot_ecg_rest(traces, raw_scale, time_interval, ECG_REST_PLOT_MEDIAN_LEADS, f, ax, yrange,
                 offset=0, pat_df=pp, is_median=True, is_blind=False)
f.savefig(f"/home/pdiachil/{pp['patient_id']}.pdf")
pp

In [None]:
pp = df[df['patient_id'] == '2926169'].iloc[0]
f, ax = plt.subplots(nrows=6, ncols=4, figsize=(24,18), tight_layout=True)
with h5py.File(pp['full_path'], 'r') as hd5:
    traces = _ecg_rest_traces(hd5)
yrange = _ecg_rest_yrange(traces, default_yrange, raw_scale, time_interval)
_subplot_ecg_rest(traces, raw_scale, time_interval, ECG_REST_PLOT_LEADS, f, ax, yrange,
                 offset=3, pat_df=None, is_median=False, is_blind=True)
_subplot_ecg_rest(traces, raw_scale, time_interval, ECG_REST_PLOT_MEDIAN_LEADS, f, ax, yrange,
                 offset=0, pat_df=pp, is_median=True, is_blind=True)
f.savefig(f"/home/pdiachil/{pp['patient_id']}_blind.pdf")
pp

In [None]:
pp = df[df['patient_id'] == '3559973'].iloc[0]
f, ax = plt.subplots(nrows=6, ncols=4, figsize=(24,18), tight_layout=True)
with h5py.File(pp['full_path'], 'r') as hd5:
    traces = _ecg_rest_traces(hd5)
yrange = _ecg_rest_yrange(traces, default_yrange, raw_scale, time_interval)
_subplot_ecg_rest(traces, raw_scale, time_interval, ECG_REST_PLOT_LEADS, f, ax, yrange,
                 offset=3, pat_df=None, is_median=False, is_blind=False)
_subplot_ecg_rest(traces, raw_scale, time_interval, ECG_REST_PLOT_MEDIAN_LEADS, f, ax, yrange,
                 offset=0, pat_df=pp, is_median=True, is_blind=False)
f.savefig(f"/home/pdiachil/{pp['patient_id']}.pdf")
pp

In [None]:
pp = df[df['patient_id'] == '3559973'].iloc[0]
f, ax = plt.subplots(nrows=6, ncols=4, figsize=(24,18), tight_layout=True)
with h5py.File(pp['full_path'], 'r') as hd5:
    traces = _ecg_rest_traces(hd5)
yrange = _ecg_rest_yrange(traces, default_yrange, raw_scale, time_interval)
_subplot_ecg_rest(traces, raw_scale, time_interval, ECG_REST_PLOT_LEADS, f, ax, yrange,
                 offset=3, pat_df=None, is_median=False, is_blind=True)
_subplot_ecg_rest(traces, raw_scale, time_interval, ECG_REST_PLOT_MEDIAN_LEADS, f, ax, yrange,
                 offset=0, pat_df=pp, is_median=True, is_blind=True)
f.savefig(f"/home/pdiachil/{pp['patient_id']}_blind.pdf")
pp

In [None]:
pp = df[df['patient_id'] == '3499910'].iloc[0]
f, ax = plt.subplots(nrows=6, ncols=4, figsize=(24,18), tight_layout=True)
with h5py.File(pp['full_path'], 'r') as hd5:
    traces = _ecg_rest_traces(hd5)
yrange = _ecg_rest_yrange(traces, default_yrange, raw_scale, time_interval)
yrange = 6.0
_subplot_ecg_rest(traces, raw_scale, time_interval, ECG_REST_PLOT_LEADS, f, ax, yrange,
                 offset=3, pat_df=None, is_median=False, is_blind=False)
_subplot_ecg_rest(traces, raw_scale, time_interval, ECG_REST_PLOT_MEDIAN_LEADS, f, ax, yrange,
                 offset=0, pat_df=pp, is_median=True, is_blind=False)
pp