### Inference from a list of echo studies ###

In [1]:
import os
import glob
import pickle
import pandas as pd

# Custom imports
from werdich_cfr.tfutils.Modeltrainer import VideoTrainer

TensorFlow Version: 2.2.0


In [3]:
meta_df.columns

Index(['filename', 'dir', 'study', 'mrn', 'datetime', 'fileid', 'institution', 'model', 'manufacturer', 'index', 'frame_time', 'number_of_frames', 'heart_rate', 'deltaX', 'deltaY', 'a2c', 'a2c_laocc', 'a2c_lvocc_s', 'a3c', 'a3c_laocc', 'a3c_lvocc_s', 'a4c', 'a4c_far', 'a4c_laocc', 'a4c_lvocc_s', 'a4c_rv', 'a4c_rv_laocc', 'a5c', 'apex', 'other', 'plax_far', 'plax_lac', 'plax_laz', 'plax_laz_ao', 'plax_plax', 'psax_avz', 'psax_az', 'psax_mv', 'psax_pap', 'rvinf', 'subcostal', 'suprasternal'], dtype='object')

In [7]:
def read_model_dict(file):
    with open(file, 'rb') as f:
        model_dict = pickle.load(f)
    return model_dict

# Directories and data sets
cfr_data_root = os.path.normpath('/mnt/obi0/andreas/data/cfr')
meta_date = '200606'
meta_dir = os.path.join(cfr_data_root, 'metadata_'+meta_date)
file_df_file = 'echo_BWH_npy_feather_files_'+meta_date+'.parquet'
view = 'a4c'
meta_df_file = 'echo_BWH_meta_200606.parquet'
meta_df = pd.read_parquet(os.path.join(meta_dir, meta_df_file))
print(f'Echo meta data has {len(meta_df.study.unique())} studies and {len(meta_df.filename.unique())} files.')


best_models = pd.read_parquet(os.path.join(cfr_data_root, 'best_models_200607.parquet')).reset_index(drop=True)

# echo list
echo_list_dir = os.path.normpath('/mnt/obi0/sgoto/BWHCFREvents/echoList')
echo_list_filename = 'BWH_2015-05-01_2015-10-31_FirstEcho.txt'
echo_list_file = os.path.join(echo_list_dir, echo_list_filename)
echo_list = pd.read_csv(echo_list_file, header='infer', sep='\t')
study_list = list(echo_list.study.unique())
print(f'Echo list has {len(echo_list.study.unique())} studies.')

Echo meta data has 45633 studies and 2257427 files.
Echo list has 7860 studies.


In [3]:
# Create a reduced npy_feather files to speed up the metadata collections
meta_df_study_list = list(meta_df.study.unique())
print(len(study_list))
print(len(meta_df_study_list))
# What's missing?
missing_files = list(set(study_list).difference(file_df_study_list))
print(len(missing_files))

# Save missing echo list
echo_list_file_missing = echo_list_filename.split('.')[0]+'_missing.parquet'
echo_list_missing = echo_list[echo_list.study.isin(missing_files)]
echo_list_missing.to_parquet(os.path.join(meta_dir, echo_list_file_missing))

# Save the new list to collect the metadata
file_df_file_pred = os.path.join(meta_dir, 'echo_BWH_npy_feather_files_pred_'+meta_date+'.parquet')
file_df_study.to_parquet(file_df_file_pred)

7860
7705
155


In [4]:
# Join the echo list with the meta data and filter VIEWS!!!
meta_file = os.path.join(cfr_data_root, 'metadata_200606', 'echo_BWH_meta_pred_200606.parquet')
meta_df = pd.read_parquet(meta_file)
print(meta_df.shape)
meta_df = meta_df[meta_df.study.isin(study_list)]
print(len(meta_df.study.unique()))
print(meta_df.shape)

# Remove meta rows without view classification
meta_df = meta_df.loc[~meta_df.a4c.isnull()]

# Remove rows without frame_time
meta_df = meta_df.loc[~meta_df.frame_time.isnull()].reset_index(drop = True)

print()
print('After removal of rows without view clasification:')
print('Total number of patients in meta data {}'.format(len(meta_df.mrn.unique())))
print('Total number of studies {}'.format(len(meta_df.study.unique())))
print('Total number of files in meta data {}'.format(len(meta_df.filename.unique())))


(368504, 42)
7687
(368504, 42)

After removal of rows without view clasification:
Total number of patients in meta data 7687
Total number of studies 7687
Total number of files in meta data 368463


In [5]:
view_dict = {'view_a2c': ['a2c', 'a2c_laocc', 'a2c_lvocc_s'],
             'view_a3c': ['a3c', 'a3c_laocc', 'a3c_lvocc_s'],
             'view_a4c': ['a4c', 'a4c_far', 'a4c_laocc', 'a4c_lvocc_s', 'a4c_rv', 'a4c_rv_laocc'],
             'view_plax': ['plax_far', 'plax_lac', 'plax_laz', 'plax_laz_ao', 'plax_plax'],
             'view_psax': ['psax_avz', 'psax_az', 'psax_mv', 'psax_pap'],
             'view_other': ['other', 'a5c', 'apex', 'rvinf', 'subcostal', 'suprasternal']}

# Get the maxiumum view classification score for each row
view_list = [item for sublist in view_dict.values() for item in sublist]
echo_meta_sum_views = meta_df.assign(max_view = meta_df[view_list].\
                                     idxmax(axis = 1))

# Make sure that we have all views in our list and for consistency: Sum up all view columns
echo_meta_sum_views = echo_meta_sum_views.assign(sum_views = echo_meta_sum_views[view_list].sum(axis=1))

print(f'Studies in data:     {len(echo_meta_sum_views.study.unique())}')
print(f'Total files:         {len(echo_meta_sum_views.filename.unique())}')

meta_df_a4c = echo_meta_sum_views[echo_meta_sum_views.max_view==view].reset_index(drop=True)

print()
print(f'Studies in a4c data: {len(meta_df_a4c.study.unique())}')
print(f'Total files:         {len(meta_df_a4c.filename.unique())}')
print(f'Total patients:      {len(meta_df_a4c.mrn.unique())}')

Studies in data:     7687
Total files:         368463

Studies in a4c data: 7061
Total files:         21735
Total patients:      7061


In [6]:
meta_a4c_filename = echo_list_filename.split('.')[0] + '_'+view+'.parquet'
meta_a4c_file = os.path.join(cfr_data_root, 'metadata_200606', meta_a4c_filename)
meta_df_a4c.to_parquet(meta_a4c_file)
meta_df_a4c.head(2)

Unnamed: 0,filename,dir,study,mrn,datetime,fileid,institution,model,manufacturer,index,frame_time,number_of_frames,heart_rate,deltaX,deltaY,a2c,a2c_laocc,a2c_lvocc_s,a3c,a3c_laocc,a3c_lvocc_s,a4c,a4c_far,a4c_laocc,a4c_lvocc_s,a4c_rv,a4c_rv_laocc,a5c,apex,other,plax_far,plax_lac,plax_laz,plax_laz_ao,plax_plax,psax_avz,psax_az,psax_mv,psax_pap,rvinf,subcostal,suprasternal,max_view,sum_views
0,48b093000d7f_4903a580509f95b92b2105b5e972_Imag...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b093000...,48b093000d7f_4903a580509f95b92b2105b5e972,352917,2015-08-16 10:52:25,48b093000d7f_4903a580509f95b92b2105b5e972_Imag...,BWH,iE33,Philips Medical Systems,0.0,33.333,38.0,98.0,0.041843,0.041843,1.029911e-12,1.078118e-10,6.555025e-11,4.524656e-13,1.383462e-11,8.704672e-12,1.0,3.328544e-09,6.031483e-12,1.011731e-13,1.491283e-09,2.553013e-13,2.601509e-10,7.555116e-14,1.384863e-12,1.448429e-12,4.486194e-12,5.324471e-12,4.132527e-13,1.562419e-16,9.941389e-12,6.200402e-10,3.362825e-12,3.102536e-10,3.127262e-11,7.137986e-16,4.884658e-12,a4c,1.0
1,48b093000d7f_4903a580509f95b92b2105b5e972_Imag...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b093000...,48b093000d7f_4903a580509f95b92b2105b5e972,352917,2015-08-16 10:52:25,48b093000d7f_4903a580509f95b92b2105b5e972_Imag...,BWH,iE33,Philips Medical Systems,0.0,33.333,38.0,96.0,0.041843,0.041843,4.055386e-12,6.216032e-12,3.634575e-11,6.060132e-12,1.590235e-10,6.489257e-12,0.999999,6.272859e-11,1.488713e-11,1.422675e-13,9.23323e-09,2.933806e-11,1.076182e-10,6.407318e-13,4.47031e-16,2.696358e-12,2.048425e-13,6.161614e-12,1.317816e-13,3.299598e-15,1.571574e-10,1.025484e-06,1.807344e-12,3.956003e-10,1.070398e-11,5.63957e-15,1.382021e-10,a4c,1.0


In [7]:
model = best_models.iloc[0]
model

model_name                       nondefect_a4c_dgx-1_stress_mbf_unaff
model_output                                         stress_mbf_unaff
epoch                                                             100
chechkpoint_file    /mnt/obi0/andreas/data/cfr/log/nondefect_a4c_d...
spear_cor                                                    0.446963
spear_p                                                   1.73464e-44
pear_cor                                                     0.440022
pear_p                                                    5.11224e-43
n_samples                                                         881
dset                                                        nondefect
view                                                              a4c
gpu                                                             dgx-1
Name: 0, dtype: object