### Inference from a list of echo studies ###

In [2]:
import os
import glob
import pickle
import pandas as pd

# Custom imports
from werdich_cfr.tfutils.Modeltrainer import VideoTrainer

TensorFlow Version: 2.2.0


In [9]:
def read_model_dict(file):
    with open(file, 'rb') as f:
        model_dict = pickle.load(f)
    return model_dict

# Directories and data sets
cfr_data_root = os.path.normpath('/mnt/obi0/andreas/data/cfr')
meta_date = '200626'
location='BWH'
meta_dir = os.path.join(cfr_data_root, 'metadata_'+meta_date)
file_df_file = 'echo_'+location+'_npy_feather_files_'+meta_date+'.parquet'
view = 'a4c'

# echo list
#echo_list_dir = os.path.normpath('/mnt/obi0/andreas/data/cfr/predictions_echodata/SecondEchoGenetics')
#echo_list_filename = 'a4cname_'+location+'.txt'

echo_list_dir = os.path.normpath('/mnt/obi0/andreas/data/cfr/predictions_echodata/FirstEcho')
echo_list_filename = 'BWH_2015-05-01_2015-10-31_FirstEcho.txt'

echo_list_file = os.path.join(echo_list_dir, echo_list_filename)
echo_list = pd.read_csv(echo_list_file, header='infer', sep='\t')
#echo_file_list = list(echo_list.names.unique())
#print(f'Original file list: {len(echo_file_list)} files.')

study_list = list(echo_list.study.unique())

#best_models = pd.read_parquet(os.path.join(echo_list_dir, 'cfr_models_200611.parquet')).reset_index(drop=True)

In [10]:
print(len(study_list))
echo_list.head()

7860


Unnamed: 0.2,Unnamed: 0,index,Unnamed: 0.1,study,date,PID
0,0,3881,24697,49028a1813c0cb79_4903a5805092805bca15ed1b16b7,2015-05-01,49028a1813c0cb79
1,1,7932,42286,490af4964b74d14b_4903a5805092805bcbd116030dcc,2015-05-01,490af4964b74d14b
2,2,717,4894,4a15355fbd842af6_4903a5805092805bca15ef6d4a45,2015-05-01,4a15355fbd842af6
3,3,6319,32489,490b1fd0c2cc4753_4903a5805092805bca15ed1abe1f,2015-05-01,490b1fd0c2cc4753
4,4,137,1221,4904bf1c6e67fec9_4903a5805092805bca15ed120704,2015-05-01,4904bf1c6e67fec9


In [22]:
# Create a reduced npy_feather files to speed up the metadata collections
file_df_file = 'echo_'+location+'_npy_feather_files_'+meta_date+'.parquet'
file_df = pd.read_parquet(os.path.join(meta_dir, file_df_file))

file_df_study = file_df[file_df.study.isin(study_list)]
file_df_study_list = list(file_df_study.study.unique())

print(f'Number of studies in echo list {len(study_list)}')
print(f'Number of studies found in .npy file list {len(file_df_study_list)}')
# What's missing?

missing_studies = list(set(study_list).difference(file_df_study_list))
print(f'Missing studies: {len(missing_studies)}')

# Save missing echo list
echo_list_file_missing = echo_list_filename.split('.')[0]+'_missing.parquet'
echo_list_missing = echo_list[echo_list.study.isin(missing_studies)]
echo_list_missing.to_parquet(os.path.join(echo_list_dir, echo_list_file_missing))

# Save the new list to collect the metadata
file_df_file_pred = os.path.join(echo_list_dir, 'echo_BWH_npy_feather_files_eventechos_'+meta_date+'.parquet')
file_df_study.to_parquet(file_df_file_pred)

Number of studies in echo list 7860
Number of studies found in .npy file list 7784
Missing studies: 76


In [23]:
# Join the echo list with the meta data and filter VIEWS!!!
meta_file = os.path.join(cfr_data_root, 'metadata_'+meta_date, 'echo_'+location+'_meta_'+meta_date+'.parquet')
meta_df = pd.read_parquet(meta_file)
print(len(meta_df.filename.unique()))

2268064


In [24]:
# Filter the meta data by the echos we need
meta_df = meta_df[meta_df.study.isin(study_list)]
#meta_df = meta_df[meta_df.filename.isin(echo_file_list)]
print(f'Number of files in meta data after echo filter: {len(meta_df.filename.unique())}')
print(meta_df.shape)

# Remove meta rows without view classification
meta_df = meta_df.loc[~meta_df.a4c.isnull()]

# Remove rows without frame_time
meta_df = meta_df.loc[~meta_df.frame_time.isnull()].reset_index(drop = True)

print()
print('After removal of rows without view clasification:')
print('Total number of patients in meta data {}'.format(len(meta_df.mrn.unique())))
print('Total number of studies {}'.format(len(meta_df.study.unique())))
print('Total number of files in meta data {}'.format(len(meta_df.filename.unique())))

Number of files in meta data after echo filter: 372544
(372544, 44)

After removal of rows without view clasification:
Total number of patients in meta data 7766
Total number of studies 7766
Total number of files in meta data 372503


In [25]:
view_dict = {'view_a2c': ['a2c', 'a2c_laocc', 'a2c_lvocc_s'],
             'view_a3c': ['a3c', 'a3c_laocc', 'a3c_lvocc_s'],
             'view_a4c': ['a4c', 'a4c_far', 'a4c_laocc', 'a4c_lvocc_s', 'a4c_rv', 'a4c_rv_laocc'],
             'view_plax': ['plax_far', 'plax_lac', 'plax_laz', 'plax_laz_ao', 'plax_plax'],
             'view_psax': ['psax_avz', 'psax_az', 'psax_mv', 'psax_pap'],
             'view_other': ['other', 'a5c', 'apex', 'rvinf', 'subcostal', 'suprasternal']}

# Get the maxiumum view classification score for each row
view_list = [item for sublist in view_dict.values() for item in sublist]
echo_meta_sum_views = meta_df.assign(max_view = meta_df[view_list].\
                                     idxmax(axis = 1))

# Make sure that we have all views in our list and for consistency: Sum up all view columns
echo_meta_sum_views = echo_meta_sum_views.assign(sum_views = echo_meta_sum_views[view_list].sum(axis=1))

print(f'Studies in data:     {len(echo_meta_sum_views.study.unique())}')
print(f'Total files:         {len(echo_meta_sum_views.filename.unique())}')

meta_df_a4c = echo_meta_sum_views[echo_meta_sum_views.max_view==view].reset_index(drop=True)

print()
print(f'Studies in a4c data: {len(meta_df_a4c.study.unique())}')
print(f'Total files:         {len(meta_df_a4c.filename.unique())}')
print(f'Total patients:      {len(meta_df_a4c.mrn.unique())}')

Studies in data:     7766
Total files:         372503

Studies in a4c data: 7127
Total files:         21953
Total patients:      7127


In [26]:
meta_a4c_filename = echo_list_filename.split('.')[0] + '_'+view+'.parquet'
print(meta_a4c_filename)

BWH_2015-05-01_2015-10-31_FirstEcho_a4c.parquet


In [21]:
meta_a4c_filename

'BWH_2015-05-01_2015-10-31_FirstEcho_a4c.parquet'

In [20]:
meta_a4c_file = os.path.join(echo_list_dir, meta_a4c_filename)
meta_df_a4c.to_parquet(meta_a4c_file)
meta_df_a4c.head(2)

Unnamed: 0,filename,dir,study,mrn,datetime,file_base,identifier,frame_time,number_of_frames,heart_rate,deltaX,deltaY,a2c,a2c_laocc,a2c_lvocc_s,a3c,a3c_laocc,a3c_lvocc_s,a4c,a4c_far,a4c_laocc,a4c_lvocc_s,a4c_rv,a4c_rv_laocc,a5c,apex,other,plax_far,plax_lac,plax_laz,plax_laz_ao,plax_plax,psax_avz,psax_az,psax_mv,psax_pap,rvinf,subcostal,suprasternal,year_month,study_full_time,institution,model,manufacturer,max_view,sum_views
0,48b093000d7f_4903a580509f95b92b2105b5e972_Imag...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b093000...,48b093000d7f_4903a580509f95b92b2105b5e972,352917,2015-08-16 10:52:25,48b093000d7f_4903a580509f95b92b2105b5e972_Imag...,48b093000d7f_4903a580509f95b92b2105b5e972_Imag...,33.333,38.0,98.0,0.041843,0.041843,1.029911e-12,1.078118e-10,6.555025e-11,4.524656e-13,1.383462e-11,8.704672e-12,1.0,3.328544e-09,6.031483e-12,1.011731e-13,1.491283e-09,2.553013e-13,2.601509e-10,7.555116e-14,1.384863e-12,1.448429e-12,4.486194e-12,5.324471e-12,4.132527e-13,1.562419e-16,9.941389e-12,6.200402e-10,3.362825e-12,3.102536e-10,3.127262e-11,7.137986e-16,4.884658e-12,2015.0,20150816105225,BWH,iE33,Philips Medical Systems,a4c,1.0
1,48b093000d7f_4903a580509f95b92b2105b5e972_Imag...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b093000...,48b093000d7f_4903a580509f95b92b2105b5e972,352917,2015-08-16 10:52:25,48b093000d7f_4903a580509f95b92b2105b5e972_Imag...,48b093000d7f_4903a580509f95b92b2105b5e972_Imag...,33.333,38.0,96.0,0.041843,0.041843,4.055386e-12,6.216032e-12,3.634575e-11,6.060132e-12,1.590235e-10,6.489257e-12,0.999999,6.272859e-11,1.488713e-11,1.422675e-13,9.23323e-09,2.933806e-11,1.076182e-10,6.407318e-13,4.47031e-16,2.696358e-12,2.048425e-13,6.161614e-12,1.317816e-13,3.299598e-15,1.571574e-10,1.025484e-06,1.807344e-12,3.956003e-10,1.070398e-11,5.63957e-15,1.382021e-10,2015.0,20150816105225,BWH,iE33,Philips Medical Systems,a4c,1.0


In [17]:
# Concatenate MGH and BWH data
df_mgh = pd.read_parquet(os.path.join(echo_list_dir, 'a4cname_MGH_a4c.parquet'))
df_bwh = pd.read_parquet(os.path.join(echo_list_dir, 'a4cname_BWH_a4c.parquet'))
print(f'MGH files {len(df_mgh.filename.unique())}')
print(f'BWH files {len(df_bwh.filename.unique())}')
df = pd.concat([df_mgh, df_bwh], ignore_index=True).reset_index(drop=True)
fn = 'a4cname_MGHBWH_a4c.parquet'
df.to_parquet(os.path.join(echo_list_dir, fn))
print(f'Total files {len(df.filename.unique())}')
df.head()

MGH files 13273
BWH files 26537
Total files 39810


Unnamed: 0,filename,dir,study,mrn,datetime,file_base,identifier,frame_time,number_of_frames,heart_rate,deltaX,deltaY,a2c,a2c_laocc,a2c_lvocc_s,a3c,a3c_laocc,a3c_lvocc_s,a4c,a4c_far,a4c_laocc,a4c_lvocc_s,a4c_rv,a4c_rv_laocc,a5c,apex,other,plax_far,plax_lac,plax_laz,plax_laz_ao,plax_plax,psax_avz,psax_az,psax_mv,psax_pap,rvinf,subcostal,suprasternal,year_month,study_full_time,institution,model,manufacturer,max_view,sum_views
0,48b090129a7ecc_4903a582ec7227e72ca1a12db852_Im...,/mnt/obi0/phi/echo/npyFiles/MGH/48b0/48b090129...,48b090129a7ecc_4903a582ec7227e72ca1a12db852,3517085,2017-03-29 10:52:45,48b090129a7ecc_4903a582ec7227e72ca1a12db852_Im...,48b090129a7ecc_4903a582ec7227e72ca1a12db852_Im...,33.333,76.0,48.0,0.038599,0.038599,4.369181e-14,1.002213e-11,4.952e-17,9.540701e-20,8.480630999999999e-19,3.362204e-15,1.0,1.380723e-14,1.235565e-16,1.717246e-15,1.413478e-16,1.533292e-16,2.999292e-17,3.079603e-17,2.82771e-22,6.419594e-17,4.40153e-16,2.819618e-16,3.035525e-18,6.578e-17,9.705355e-16,1.93774e-14,8.963561e-17,1.988039e-15,1.5890309999999998e-20,2.990445e-20,2.887505e-18,2017.0,20170329105245,MGH,EPIQ 7C,Philips Medical Systems,a4c,1.0
1,48b09016b32e8b_4903a580509311eb732873606d54_Im...,/mnt/obi0/phi/echo/npyFiles/MGH/48b0/48b09016b...,48b09016b32e8b_4903a580509311eb732873606d54,3513257,2015-04-14 08:24:23,48b09016b32e8b_4903a580509311eb732873606d54_Im...,48b09016b32e8b_4903a580509311eb732873606d54_Im...,33.333,91.0,42.0,0.047111,0.047111,3.749047e-07,2.99079e-10,2.754385e-09,4.811635e-08,3.790895e-10,8.239009e-09,0.996496,1.198259e-08,1.066312e-10,8.882178e-09,0.001425669,5.24065e-09,1.40447e-08,1.202118e-09,6.22932e-08,1.117606e-08,4.633523e-11,1.920444e-09,4.513532e-11,1.979736e-09,3.215599e-09,8.065429e-06,1.350255e-10,0.002069227,1.68513e-08,9.140099e-11,4.707263e-11,2015.0,20150414082423,MGH,iE33,Philips Medical Systems,a4c,1.0
2,48b09016b32e8b_4903a581a72ff2fdc4492ca8e75b_Im...,/mnt/obi0/phi/echo/npyFiles/MGH/48b0/48b09016b...,48b09016b32e8b_4903a581a72ff2fdc4492ca8e75b,3513257,2014-04-28 08:31:21,48b09016b32e8b_4903a581a72ff2fdc4492ca8e75b_Im...,48b09016b32e8b_4903a581a72ff2fdc4492ca8e75b_Im...,33.333,54.0,70.0,0.047111,0.047111,0.0002788943,1.360963e-11,7.516717e-11,4.570663e-08,4.818267e-10,8.912853e-10,0.999433,1.198572e-08,8.800127e-11,4.504397e-11,0.0002830311,5.260635e-10,3.795436e-10,1.40303e-10,1.79719e-12,1.253029e-08,7.809595e-10,9.42671e-07,1.11142e-10,5.252734e-10,1.304089e-06,2.442252e-06,3.190528e-11,1.43952e-07,2.255056e-11,9.298466e-13,3.384194e-11,2014.0,20140428083121,MGH,iE33,Philips Medical Systems,a4c,1.0
3,48b09016b32e8b_4903a581a72ff2fdc4492ca8e75b_Im...,/mnt/obi0/phi/echo/npyFiles/MGH/48b0/48b09016b...,48b09016b32e8b_4903a581a72ff2fdc4492ca8e75b,3513257,2014-04-28 08:31:21,48b09016b32e8b_4903a581a72ff2fdc4492ca8e75b_Im...,48b09016b32e8b_4903a581a72ff2fdc4492ca8e75b_Im...,33.333,52.0,69.0,0.047111,0.047111,1.682169e-06,1.904493e-08,1.83649e-08,7.471302e-07,2.894902e-08,1.003419e-06,0.990336,8.451686e-09,1.251782e-07,3.772201e-09,0.003665891,1.620741e-07,3.043488e-08,5.759533e-07,1.788566e-09,1.232272e-08,6.766641e-10,1.071222e-06,3.109066e-08,5.948356e-08,0.000181175,0.004088665,4.349659e-09,0.0007658837,0.0009561967,1.155783e-10,5.315388e-08,2014.0,20140428083121,MGH,iE33,Philips Medical Systems,a4c,1.0
4,48b09016b32e8b_4903a58358333064ca670990adc4_Im...,/mnt/obi0/phi/echo/npyFiles/MGH/48b0/48b09016b...,48b09016b32e8b_4903a58358333064ca670990adc4,3513257,2016-04-11 08:47:09,48b09016b32e8b_4903a58358333064ca670990adc4_Im...,48b09016b32e8b_4903a58358333064ca670990adc4_Im...,33.333,102.0,71.0,0.047111,0.047111,0.000301172,1.236817e-09,5.63716e-10,6.550184e-09,1.458524e-10,1.070566e-08,0.99967,5.558679e-08,4.529298e-11,1.584545e-10,2.30614e-06,4.355476e-11,4.112836e-10,1.137845e-10,2.314202e-09,7.296246e-08,1.466954e-08,1.012915e-06,5.2549e-11,3.632735e-11,1.306045e-05,1.190157e-05,6.665559e-11,1.076e-07,7.744462e-08,2.888699e-12,1.301527e-08,2016.0,20160411084709,MGH,iE33,Philips Medical Systems,a4c,1.0


In [13]:
model = best_models
model

Unnamed: 0,model_name,model_output,epoch,chechkpoint_file,spear_cor,spear_p,pear_cor,pear_p,n_samples,dset,view,gpu
0,nondefect_a4c_dgx-1_rest_mbf_unaff,rest_mbf_unaff,100,/mnt/obi0/andreas/data/cfr/log/nondefect_a4c_d...,0.343776,7.655502e-26,0.35096,6.191784e-27,881,nondefect,a4c,dgx-1
1,nondefect_a4c_dgx-1_stress_mbf_unaff,stress_mbf_unaff,100,/mnt/obi0/andreas/data/cfr/log/nondefect_a4c_d...,0.446963,1.734637e-44,0.440022,5.1122440000000006e-43,881,nondefect,a4c,dgx-1
2,nondefect_a4c_dgx-1_unaffected_cfr,unaffected_cfr,100,/mnt/obi0/andreas/data/cfr/log/nondefect_a4c_d...,0.226273,1.083594e-11,0.263326,1.934262e-15,881,nondefect,a4c,dgx-1
3,global_a4c_gpu2_stress_global_mbf,stress_global_mbf,150,/mnt/obi0/andreas/data/cfr/log/global_a4c_gpu2...,0.521974,1.406801e-74,0.532282,5.11009e-78,1052,global,a4c,gpu2
4,global_a4c_dgx-1_rest_global_mbf,rest_global_mbf,150,/mnt/obi0/andreas/data/cfr/log/global_a4c_dgx-...,0.444414,3.753792e-52,0.458937,6.3436149999999995e-56,1052,global,a4c,dgx-1
5,global_a4c_gpu2_global_cfr_calc,global_cfr_calc,50,/mnt/obi0/andreas/data/cfr/log/global_a4c_gpu2...,0.325327,2.339397e-27,0.354585,1.5939430000000001e-32,1052,global,a4c,gpu2


In [20]:
# Load disqualified files after predictions
dis_df_file_BWH = 'a4cname_BWH_a4c_disqualified.parquet'
echo_df_file_BWH = 'a4cname_BWH_a4c.parquet'
echo_list_filename = 'a4cname_BWH.txt'
echo_list_file = os.path.join(echo_list_dir, echo_list_filename)
echo_list = pd.read_csv(echo_list_file, header='infer', sep='\t')
echo_file_list_BWH = list(echo_list.names.unique())
dis_df_BWH = pd.read_parquet(os.path.join(echo_list_dir, dis_df_file_BWH))
echo_df_BWH = pd.read_parquet(os.path.join(echo_list_dir, echo_df_file_BWH))
print('BWH')
print(f'Files in original list:       {len(echo_file_list_BWH)}')
print(f'Files with .npy and metadata: {len(echo_df_BWH.filename.unique())}')
print(f'Disqualified files:           {len(dis_df_BWH.filename.unique())}')


dis_df_file_MGH = 'a4cname_MGH_a4c_disqualified.parquet'
echo_df_file_MGH = 'a4cname_MGH_a4c.parquet'
echo_list_filename = 'a4cname_MGH.txt'
echo_list_file = os.path.join(echo_list_dir, echo_list_filename)
echo_list = pd.read_csv(echo_list_file, header='infer', sep='\t')
echo_file_list_MGH = list(echo_list.names.unique())

dis_df_MGH = pd.read_parquet(os.path.join(echo_list_dir, dis_df_file_MGH))
echo_df_MGH = pd.read_parquet(os.path.join(echo_list_dir, echo_df_file_MGH))

print()
print('MGH')
print(f'Files in original list:       {len(echo_file_list_MGH)}')
print(f'Files with .npy and metadata: {len(echo_df_MGH.filename.unique())}')
print(f'Disqualified files:           {len(dis_df_MGH.filename.unique())}')

BWH
Files in original list:       26537
Files with .npy and metadata: 26537
Disqualified files:           4869

MGH
Files in original list:       13273
Files with .npy and metadata: 13273
Disqualified files:           4309


In [61]:
4309/13273*100

32.46440141640925

In [62]:
max_frame_time=33.34 ### FINAL CUTOFF FOR FRAME TIME INTERVAL
dis_df_BWH = dis_df_BWH.assign(videolen=(dis_df_BWH.frame_time * 1e-3 * dis_df_BWH.number_of_frames))
min_len = max_frame_time*40

dis_df_BWH_filter = dis_df_BWH[(dis_df_BWH.frame_time<=max_frame_time*1000) & (dis_df_BWH.videolen>=min_len)]

In [63]:
dis_df_BWH_filter

Unnamed: 0,filename,dir,study,mrn,datetime,fileid,institution,model,manufacturer,index,frame_time,number_of_frames,heart_rate,deltaX,deltaY,a2c,a2c_laocc,a2c_lvocc_s,a3c,a3c_laocc,a3c_lvocc_s,a4c,a4c_far,a4c_laocc,a4c_lvocc_s,a4c_rv,a4c_rv_laocc,a5c,apex,other,plax_far,plax_lac,plax_laz,plax_laz_ao,plax_plax,psax_avz,psax_az,psax_mv,psax_pap,rvinf,subcostal,suprasternal,max_view,sum_views,videolen
