### Select patients for PET-CFR-MBF models: Combine file names, cfr measurements and views ###

In [1]:
import os
import numpy as np
import pandas as pd

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 50)
pd.set_option('display.width', 1000)

from bokeh.io import output_notebook, reset_output, show, output_file, save
from bokeh.plotting import figure
from bokeh.layouts import column, row, gridplot
from bokeh.models import ColumnDataSource, HoverTool, Legend

from bokeh.palettes import Category10

In [2]:
cfr_data_root = os.path.normpath('/mnt/obi0/andreas/data/cfr')
cfr_meta_date = '200519'
meta_dir = os.path.join(cfr_data_root, 'metadata_'+cfr_meta_date)
print(meta_dir)

/mnt/obi0/andreas/data/cfr/metadata_200519


In [3]:
# PET measurements matched with echo studies
study_cfr_file = os.path.join(meta_dir, 'pet_match365_diff_'+cfr_meta_date+'.parquet')
study_cfr_df = pd.read_parquet(study_cfr_file)
print('Number of matched CFR-ECHO studies: {}'.format(len(study_cfr_df.study.unique())))
study_cfr_df.head()

Number of matched CFR-ECHO studies: 6443


Unnamed: 0,mrn,study,pet_date,echo_date,petmrn_identifier,days_post_pet,difference(days),pet_measurement
7910,35156678,48b09010a2219aad_4903a582edf3bd118ffb3386065b,2018-10-15,2017-12-06,35156678_2018-10-15,-313,313.0,1.0
7911,35156678,48b09010a2219aad_4903a58dd44a9e3cbfca18de5a4a,2018-10-15,2018-10-12,35156678_2018-10-15,-3,3.0,1.0
7912,35169325,48b09013185d7d6b_4903a582ec746dc05201b6bc42b3,2017-05-22,2017-05-18,35169325_2017-05-22,-4,4.0,1.0
7913,35169325,48b09013185d7d6b_4903a582ec77f16c8e735c3b6113,2017-05-22,2017-06-21,35169325_2017-05-22,30,30.0,1.0
7908,35133560,48b09016b25a7f36_4903a582ec746dce5e9b6e469ed0,2018-04-25,2017-05-16,35133560_2018-04-25,-344,344.0,1.0


In [4]:
# Meta data for all echo videos (or just this subset)
echo_meta_file = os.path.join(meta_dir, 'echo_BWH_meta_'+cfr_meta_date+'.parquet')
echo_meta_df = pd.read_parquet(echo_meta_file)
echo_meta_df = echo_meta_df.astype({'mrn': 'int64'})
print('Number of studies in meta data {}'.format(len(echo_meta_df.study.unique())))
echo_meta_df.head()

Number of studies in meta data 40677


Unnamed: 0,filename,dir,study,mrn,datetime,fileid,institution,model,manufacturer,index,frame_time,number_of_frames,heart_rate,deltaX,deltaY,a2c,a2c_laocc,a2c_lvocc_s,a3c,a3c_laocc,a3c_lvocc_s,a4c,a4c_far,a4c_laocc,a4c_lvocc_s,a4c_rv,a4c_rv_laocc,a5c,apex,other,plax_far,plax_lac,plax_laz,plax_laz_ao,plax_plax,psax_avz,psax_az,psax_mv,psax_pap,rvinf,subcostal,suprasternal
0,48b09010a09a4b2e_4903a582ec77f16c8f7cbc7f8bf7_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b09010a...,48b09010a09a4b2e_4903a582ec77f16c8f7cbc7f8bf7,35154293,2017-06-21 13:58:47,48b09010a09a4b2e_4903a582ec77f16c8f7cbc7f8bf7_...,BWH,iE33,Philips Medical Systems,0.0,63.756,46.0,64.0,0.037484,0.037484,1.825323e-12,9.551256e-10,3.902316e-12,1.877408e-12,1.445331e-10,9.478744e-12,8.755016e-05,7.88908e-11,2.352379e-15,2.875156e-11,1.463021e-10,2.815503e-12,1.344274e-09,7.833037e-14,0.9999125,3.43628e-13,3.458812e-11,4.586925e-13,1.264374e-12,7.937974e-10,1.614885e-15,3.353979e-09,2.809696e-10,1.067778e-09,4.724536e-14,4.397777e-10,2.398687e-12
1,48b09010a09a4b2e_4903a582ec77f16c8f7cbc7f8bf7_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b09010a...,48b09010a09a4b2e_4903a582ec77f16c8f7cbc7f8bf7,35154293,2017-06-21 13:58:47,48b09010a09a4b2e_4903a582ec77f16c8f7cbc7f8bf7_...,BWH,iE33,Philips Medical Systems,0.0,44.333,64.0,67.0,0.041844,0.041844,1.841953e-11,5.083539e-10,1.585738e-11,1.233755e-10,1.995799e-10,1.127267e-12,1.235251e-12,4.256902e-11,9.41896e-16,7.657971e-12,1.053997e-11,6.121281e-11,5.98139e-10,2.671665e-12,0.9999999,4.888175e-14,2.039909e-12,1.602643e-13,4.351348e-11,4.508235e-10,1.274972e-14,1.103503e-11,2.00157e-10,7.542065e-08,9.852938e-14,8.525817e-11,4.462386e-11
2,48b09010a09a4b2e_4903a582ec77f16c8f7cbc7f8bf7_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b09010a...,48b09010a09a4b2e_4903a582ec77f16c8f7cbc7f8bf7,35154293,2017-06-21 13:58:47,48b09010a09a4b2e_4903a582ec77f16c8f7cbc7f8bf7_...,BWH,iE33,Philips Medical Systems,0.0,47.167,43.0,114.0,0.015574,0.015574,5.759206e-14,8.534522e-15,1.077153e-15,4.819185e-12,1.353083e-14,1.152021e-14,8.355481e-14,1.249883e-13,1.769145e-14,2.50443e-13,2.842752e-16,4.828185e-11,1.230143e-15,2.409846e-14,1.0,2.3599530000000002e-17,1.18448e-15,1.507853e-15,1.212771e-10,7.517319e-12,7.758529e-17,2.740249e-12,5.46094e-14,2.999578e-13,1.918233e-17,2.649029e-16,2.228561e-13
3,48b09010a09a4b2e_4903a582ec77f16c8f7cbc7f8bf7_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b09010a...,48b09010a09a4b2e_4903a582ec77f16c8f7cbc7f8bf7,35154293,2017-06-21 13:58:47,48b09010a09a4b2e_4903a582ec77f16c8f7cbc7f8bf7_...,BWH,iE33,Philips Medical Systems,0.0,52.725,52.0,65.0,0.037484,0.037484,1.751908e-13,2.611923e-10,3.829348e-11,4.161394e-09,4.401294e-07,3.898549e-11,1.846615e-09,1.314888e-08,2.163399e-10,1.356451e-11,1.390473e-08,4.085282e-09,3.373987e-09,8.878076e-12,0.9999992,5.287207e-11,9.219395e-10,1.477895e-10,9.975262e-09,3.489092e-13,1.091603e-12,1.06178e-08,6.673472e-09,3.354299e-07,3.170946e-13,3.176687e-12,7.696499e-10
4,48b09010a09a4b2e_4903a582ec77f16c8f7cbc7f8bf7_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b09010a...,48b09010a09a4b2e_4903a582ec77f16c8f7cbc7f8bf7,35154293,2017-06-21 13:58:47,48b09010a09a4b2e_4903a582ec77f16c8f7cbc7f8bf7_...,BWH,iE33,Philips Medical Systems,0.0,33.333,41.0,64.0,0.025704,0.025704,6.211826e-05,2.515238e-08,2.613256e-07,7.509958e-06,1.858536e-08,0.000106212,2.109523e-05,0.0003603096,4.731141e-09,8.429137e-07,0.003748224,1.115757e-07,0.01138439,0.0003313288,3.940278e-10,4.206316e-05,3.919671e-05,0.1193105,5.120688e-06,1.212203e-07,0.0005713144,0.001576522,3.926847e-07,9.254544e-05,0.8621842,1.390069e-08,0.0001556575


In [5]:
print('Total number of patients in meta data {}'.format(len(echo_meta_df.mrn.unique())))
print('Total number of studies {}'.format(len(echo_meta_df.study.unique())))
print('Total number of files in meta data {}'.format(len(echo_meta_df.filename.unique())))

temp = echo_meta_df.loc[echo_meta_df.a4c.isnull()].reset_index(drop = True)
print()
print('Studies without view classification {}'.format(len(temp.study.unique())))

# Remove meta rows without view classification
echo_meta_all_views = echo_meta_df.loc[~echo_meta_df.a4c.isnull()]
# Remove rows without frame_time
echo_meta_all_views = echo_meta_all_views.loc[~echo_meta_all_views.frame_time.isnull()].reset_index(drop = True)

print()
print('After removal of rows without view clasification:')
print('Total number of patients in meta data {}'.format(len(echo_meta_all_views.mrn.unique())))
print('Total number of studies {}'.format(len(echo_meta_all_views.study.unique())))
print('Total number of files in meta data {}'.format(len(echo_meta_all_views.filename.unique())))

Total number of patients in meta data 23753
Total number of studies 40677
Total number of files in meta data 2024841

Studies without view classification 247

After removal of rows without view clasification:
Total number of patients in meta data 23747
Total number of studies 40669
Total number of files in meta data 2017346


In [7]:
view_dict = {'view_a2c': ['a2c', 'a2c_laocc', 'a2c_lvocc_s'],
             'view_a3c': ['a3c', 'a3c_laocc', 'a3c_lvocc_s'],
             'view_a4c': ['a4c', 'a4c_far', 'a4c_laocc', 'a4c_lvocc_s', 'a4c_rv', 'a4c_rv_laocc'],
             'view_plax': ['plax_far', 'plax_lac', 'plax_laz', 'plax_laz_ao', 'plax_plax'],
             'view_psax': ['psax_avz', 'psax_az', 'psax_mv', 'psax_pap'],
             'view_other': ['other', 'a5c', 'apex', 'rvinf', 'subcostal', 'suprasternal']}

In [8]:
# Combine some of the view columns
echo_meta_sum_views = echo_meta_all_views.copy()
for view in view_dict.keys():
    cols = view_dict[view]
    echo_meta_sum_views[view] = echo_meta_sum_views[cols].sum(axis =1)
    # Drop the old columns
    echo_meta_sum_views = echo_meta_sum_views.drop(columns = cols)

# Sum up the new view columns as a consistency check
#echo_meta_sum_views = echo_meta_sum_views.assign(sum_views = echo_meta_sum_views[list(view_dict.keys())].\                                                 sum(axis = 1))

# Get the maxiumum view classification score for each row
#echo_meta_sum_views = echo_meta_sum_views.assign(max_view = echo_meta_sum_views[list(view_dict.keys())].\
#                                                 idxmax(axis = 1))

# Get the maxiumum view classification score for each row
view_list = [item for sublist in view_dict.values() for item in sublist]
echo_meta_sum_views = echo_meta_all_views.assign(max_view = echo_meta_all_views[view_list].\
                                                 idxmax(axis = 1))

# Make sure that we have all views in our list and for consistency: Sum up all view columns
echo_meta_sum_views = echo_meta_sum_views.assign(sum_views = echo_meta_sum_views[view_list].sum(axis=1))

In [10]:
echo_meta_sum_views.head()

Unnamed: 0,filename,dir,study,mrn,datetime,fileid,institution,model,manufacturer,index,frame_time,number_of_frames,heart_rate,deltaX,deltaY,a2c,a2c_laocc,a2c_lvocc_s,a3c,a3c_laocc,a3c_lvocc_s,a4c,a4c_far,a4c_laocc,a4c_lvocc_s,a4c_rv,a4c_rv_laocc,a5c,apex,other,plax_far,plax_lac,plax_laz,plax_laz_ao,plax_plax,psax_avz,psax_az,psax_mv,psax_pap,rvinf,subcostal,suprasternal,max_view,sum_views
0,48b09010a09a4b2e_4903a582ec77f16c8f7cbc7f8bf7_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b09010a...,48b09010a09a4b2e_4903a582ec77f16c8f7cbc7f8bf7,35154293,2017-06-21 13:58:47,48b09010a09a4b2e_4903a582ec77f16c8f7cbc7f8bf7_...,BWH,iE33,Philips Medical Systems,0.0,63.756,46.0,64.0,0.037484,0.037484,1.825323e-12,9.551256e-10,3.902316e-12,1.877408e-12,1.445331e-10,9.478744e-12,8.755016e-05,7.88908e-11,2.352379e-15,2.875156e-11,1.463021e-10,2.815503e-12,1.344274e-09,7.833037e-14,0.9999125,3.43628e-13,3.458812e-11,4.586925e-13,1.264374e-12,7.937974e-10,1.614885e-15,3.353979e-09,2.809696e-10,1.067778e-09,4.724536e-14,4.397777e-10,2.398687e-12,other,1.0
1,48b09010a09a4b2e_4903a582ec77f16c8f7cbc7f8bf7_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b09010a...,48b09010a09a4b2e_4903a582ec77f16c8f7cbc7f8bf7,35154293,2017-06-21 13:58:47,48b09010a09a4b2e_4903a582ec77f16c8f7cbc7f8bf7_...,BWH,iE33,Philips Medical Systems,0.0,44.333,64.0,67.0,0.041844,0.041844,1.841953e-11,5.083539e-10,1.585738e-11,1.233755e-10,1.995799e-10,1.127267e-12,1.235251e-12,4.256902e-11,9.41896e-16,7.657971e-12,1.053997e-11,6.121281e-11,5.98139e-10,2.671665e-12,0.9999999,4.888175e-14,2.039909e-12,1.602643e-13,4.351348e-11,4.508235e-10,1.274972e-14,1.103503e-11,2.00157e-10,7.542065e-08,9.852938e-14,8.525817e-11,4.462386e-11,other,1.0
2,48b09010a09a4b2e_4903a582ec77f16c8f7cbc7f8bf7_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b09010a...,48b09010a09a4b2e_4903a582ec77f16c8f7cbc7f8bf7,35154293,2017-06-21 13:58:47,48b09010a09a4b2e_4903a582ec77f16c8f7cbc7f8bf7_...,BWH,iE33,Philips Medical Systems,0.0,47.167,43.0,114.0,0.015574,0.015574,5.759206e-14,8.534522e-15,1.077153e-15,4.819185e-12,1.353083e-14,1.152021e-14,8.355481e-14,1.249883e-13,1.769145e-14,2.50443e-13,2.842752e-16,4.828185e-11,1.230143e-15,2.409846e-14,1.0,2.3599530000000002e-17,1.18448e-15,1.507853e-15,1.212771e-10,7.517319e-12,7.758529e-17,2.740249e-12,5.46094e-14,2.999578e-13,1.918233e-17,2.649029e-16,2.228561e-13,other,1.0
3,48b09010a09a4b2e_4903a582ec77f16c8f7cbc7f8bf7_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b09010a...,48b09010a09a4b2e_4903a582ec77f16c8f7cbc7f8bf7,35154293,2017-06-21 13:58:47,48b09010a09a4b2e_4903a582ec77f16c8f7cbc7f8bf7_...,BWH,iE33,Philips Medical Systems,0.0,52.725,52.0,65.0,0.037484,0.037484,1.751908e-13,2.611923e-10,3.829348e-11,4.161394e-09,4.401294e-07,3.898549e-11,1.846615e-09,1.314888e-08,2.163399e-10,1.356451e-11,1.390473e-08,4.085282e-09,3.373987e-09,8.878076e-12,0.9999992,5.287207e-11,9.219395e-10,1.477895e-10,9.975262e-09,3.489092e-13,1.091603e-12,1.06178e-08,6.673472e-09,3.354299e-07,3.170946e-13,3.176687e-12,7.696499e-10,other,1.0
4,48b09010a09a4b2e_4903a582ec77f16c8f7cbc7f8bf7_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b09010a...,48b09010a09a4b2e_4903a582ec77f16c8f7cbc7f8bf7,35154293,2017-06-21 13:58:47,48b09010a09a4b2e_4903a582ec77f16c8f7cbc7f8bf7_...,BWH,iE33,Philips Medical Systems,0.0,33.333,41.0,64.0,0.025704,0.025704,6.211826e-05,2.515238e-08,2.613256e-07,7.509958e-06,1.858536e-08,0.000106212,2.109523e-05,0.0003603096,4.731141e-09,8.429137e-07,0.003748224,1.115757e-07,0.01138439,0.0003313288,3.940278e-10,4.206316e-05,3.919671e-05,0.1193105,5.120688e-06,1.212203e-07,0.0005713144,0.001576522,3.926847e-07,9.254544e-05,0.8621842,1.390069e-08,0.0001556575,rvinf,1.0


In [11]:
# Left join in the PET CFR table, thereby filtering those studies that have CFR values
files_cfr = study_cfr_df.merge(right = echo_meta_sum_views, on = ['mrn', 'study'], how = 'left')
files_cfr.tail(2)

Unnamed: 0,mrn,study,pet_date,echo_date,petmrn_identifier,days_post_pet,difference(days),pet_measurement,filename,dir,datetime,fileid,institution,model,manufacturer,index,frame_time,number_of_frames,heart_rate,deltaX,deltaY,a2c,a2c_laocc,a2c_lvocc_s,a3c,a3c_laocc,a3c_lvocc_s,a4c,a4c_far,a4c_laocc,a4c_lvocc_s,a4c_rv,a4c_rv_laocc,a5c,apex,other,plax_far,plax_lac,plax_laz,plax_laz_ao,plax_plax,psax_avz,psax_az,psax_mv,psax_pap,rvinf,subcostal,suprasternal,max_view,sum_views
302371,41128695,4f27e0c8b2e37c75_4903a58c1a3b6182b7d6f5d19fb4,2020-01-10,2019-12-08,41128695_2020-01-10,-33,33.0,1.0,4f27e0c8b2e37c75_4903a58c1a3b6182b7d6f5d19fb4_...,/mnt/obi0/phi/echo/npyFiles/BWH/4f27/4f27e0c8b...,2019-12-08 09:36:15,4f27e0c8b2e37c75_4903a58c1a3b6182b7d6f5d19fb4_...,BWH,Affiniti 70C,Philips Medical Systems,0.0,33.333,102.0,72.0,0.038101,0.038101,8.525484e-11,4.761628e-08,1.553953e-09,2.346275e-06,3.757656e-09,2.746353e-06,0.01128907,2.365555e-07,5.502079e-09,1.608749e-08,0.9886258,3.376832e-06,3.909833e-10,5.127357e-09,8e-06,2.165053e-11,8.86631e-09,6.407424e-05,8.364941e-11,2.129735e-08,3.325654e-08,4.564876e-08,2.464894e-10,6.655624e-07,8.407422e-08,3.769934e-06,4.126614e-10,a4c_rv,1.0
302372,41128695,4f27e0c8b2e37c75_4903a58c1a3b6182b7d6f5d19fb4,2020-01-10,2019-12-08,41128695_2020-01-10,-33,33.0,1.0,4f27e0c8b2e37c75_4903a58c1a3b6182b7d6f5d19fb4_...,/mnt/obi0/phi/echo/npyFiles/BWH/4f27/4f27e0c8b...,2019-12-08 09:36:15,4f27e0c8b2e37c75_4903a58c1a3b6182b7d6f5d19fb4_...,BWH,Affiniti 70C,Philips Medical Systems,0.0,41.247,82.0,71.0,0.0267,0.0267,2.842068e-13,6.22142e-13,1.013217e-14,5.017845e-12,9.585414e-15,3.793506e-14,1.407506e-12,1.564643e-13,1.627178e-14,2.177066e-13,1.087211e-13,2.088464e-12,4.119476e-11,1.6211e-14,1.0,1.938321e-17,4.369375e-13,1.636336e-13,3.210785e-11,1.750581e-15,2.289004e-16,1.318241e-15,5.781358e-15,3.735791e-10,1.78197e-13,1.233108e-15,3.046032e-12,other,1.0


In [12]:
# Find echo studies without meta data
file_cfr_no_meta = study_cfr_df.merge(right = echo_meta_sum_views, on = ['mrn', 'study'], how = 'outer')
file_cfr_no_meta = file_cfr_no_meta.loc[file_cfr_no_meta.frame_time.isnull()]
file_cfr_no_meta.head()
print(f'Echo studies without meta data: {len(file_cfr_no_meta.study.unique())}')
file_cfr_no_meta

Echo studies without meta data: 172


Unnamed: 0,mrn,study,pet_date,echo_date,petmrn_identifier,days_post_pet,difference(days),pet_measurement,filename,dir,datetime,fileid,institution,model,manufacturer,index,frame_time,number_of_frames,heart_rate,deltaX,deltaY,a2c,a2c_laocc,a2c_lvocc_s,a3c,a3c_laocc,a3c_lvocc_s,a4c,a4c_far,a4c_laocc,a4c_lvocc_s,a4c_rv,a4c_rv_laocc,a5c,apex,other,plax_far,plax_lac,plax_laz,plax_laz_ao,plax_plax,psax_avz,psax_az,psax_mv,psax_pap,rvinf,subcostal,suprasternal,max_view,sum_views
8999,31673205,48b4c0c942c83773_4903a58051d59c181751a5fa8e9b,2016-10-03,2015-11-17,31673205_2016-10-03,-321.0,321.0,1.0,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
15528,30257158,48b5fb02eee43ec8_4903a580509429421435645b5b43,2015-03-16,2015-03-20,30257158_2015-03-16,4.0,4.0,1.0,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
27937,23947245,490047ab0f21e224_4903a581a728dc04cfbd98628980,2014-04-07,2014-03-26,23947245_2014-04-07,-12.0,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
28217,23938202,490047ac48bf3975_4903a44ab12daff13ab494653950,2009-06-15,2009-06-06,23938202_2009-06-15,-9.0,9.0,1.0,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
28640,23617475,490048122e3f1c93_4903a44ab129ee85ac27774f1f46,2009-10-13,2009-02-09,23617475_2009-10-13,-246.0,246.0,1.0,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
31080,23748668,4900490cbeca1305_4903a44ab12c996e0ff88ff98b52,2009-04-15,2009-07-20,23748668_2009-04-15,96.0,96.0,1.0,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
33109,23549215,49004be9debebbf7_4903a44ab09602c52cf938e228a0,2009-03-02,2009-11-27,23549215_2009-03-02,270.0,270.0,1.0,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
35542,23244098,49004c5f71c67723_4903a44ab128d921b851c8dd9a5e,2009-03-31,2009-03-31,23244098_2009-03-31,0.0,0.0,1.0,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
36421,23356231,49004dc39796d03c_4903a44b330893fe719d15ded9d5,2008-10-20,2008-12-24,23356231_2008-10-20,65.0,65.0,1.0,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
36535,23309586,49004dc6a5f8e639_4903a44b330ae01c3fe95d2a5e12,2008-10-14,2008-10-28,23309586_2008-10-14,14.0,14.0,1.0,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [13]:
# Let's see what we have
df_stat = pd.DataFrame()
for view in view_list:
    df = files_cfr[files_cfr.max_view == view]
    
    df_dict = {'view': [view],
               'patients': [len(df.mrn.unique())],
               'echo studies': [len(df.study.unique())],
               'PET studies': [len(df.petmrn_identifier.unique())]}    
    df_stat = pd.concat([df_stat, pd.DataFrame(df_dict)], ignore_index = True)

df_stat = df_stat.sort_values(by = 'view')
print(df_stat.head(100))

            view  patients  echo studies  PET studies
0            a2c      2826          4859         3079
1      a2c_laocc      2247          3464         2410
2    a2c_lvocc_s        13            13           13
3            a3c      2619          4391         2863
4      a3c_laocc      1661          2329         1766
5    a3c_lvocc_s        85            87           86
6            a4c      3065          5506         3350
7        a4c_far      1889          2784         2015
8      a4c_laocc      2514          3979         2702
9    a4c_lvocc_s       195           200          196
10        a4c_rv      2096          3211         2248
11  a4c_rv_laocc        98           102           98
22           a5c      2420          3790         2606
23          apex      1808          2698         1903
21         other      3232          6182         3553
12      plax_far      2551          4192         2764
13      plax_lac      1372          1905         1449
14      plax_laz      2671  

In [14]:
match_view_filename = 'pet_match365_diff_files_'+cfr_meta_date+'.parquet'
files_cfr.to_parquet(os.path.join(meta_dir, match_view_filename))

# Let's do a .csv file as well
match_view_filename_csv = 'pet_match365_diff_files_'+cfr_meta_date+'.csv'
files_cfr.to_csv(os.path.join(meta_dir, match_view_filename_csv), index = False)

In [19]:
files_cfr.head()

Unnamed: 0,mrn,study,pet_date,echo_date,petmrn_identifier,days_post_pet,difference(days),pet_measurement,filename,dir,datetime,fileid,institution,model,manufacturer,index,frame_time,number_of_frames,heart_rate,deltaX,deltaY,a2c,a2c_laocc,a2c_lvocc_s,a3c,a3c_laocc,a3c_lvocc_s,a4c,a4c_far,a4c_laocc,a4c_lvocc_s,a4c_rv,a4c_rv_laocc,a5c,apex,other,plax_far,plax_lac,plax_laz,plax_laz_ao,plax_plax,psax_avz,psax_az,psax_mv,psax_pap,rvinf,subcostal,suprasternal,max_view,sum_views
0,35156678,48b09010a2219aad_4903a582edf3bd118ffb3386065b,2018-10-15,2017-12-06,35156678_2018-10-15,-313,313.0,1.0,48b09010a2219aad_4903a582edf3bd118ffb3386065b_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b09010a...,2017-12-06 13:11:41,48b09010a2219aad_4903a582edf3bd118ffb3386065b_...,BWH,Vivid E95,GE Vingmed Ultrasound,0.0,47.769231,66.0,60.0,0.028951,0.028951,3.446593e-08,6.452001e-09,2.939033e-08,0.003095016,9.225302e-08,3.967397e-08,7.041133e-10,3.144316e-09,1.198068e-13,4.564848e-10,1.454091e-09,1.773068e-08,7.812481e-08,3.755003e-08,0.9968956,7.881907e-10,5.391607e-10,2.151894e-10,1.15716e-08,4.940982e-08,7.734493e-12,1.395856e-06,3.054052e-09,7.720808e-06,5.067335e-09,2.187016e-13,2.230961e-08,other,1.0
1,35156678,48b09010a2219aad_4903a582edf3bd118ffb3386065b,2018-10-15,2017-12-06,35156678_2018-10-15,-313,313.0,1.0,48b09010a2219aad_4903a582edf3bd118ffb3386065b_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b09010a...,2017-12-06 13:11:41,48b09010a2219aad_4903a582edf3bd118ffb3386065b_...,BWH,Vivid E95,GE Vingmed Ultrasound,0.0,20.087146,154.0,60.0,0.020448,0.020448,4.212547e-13,1.0,5.081462e-15,5.16278e-16,3.739126e-14,4.737708e-15,3.574842e-17,1.105575e-13,7.188538e-16,3.057809e-15,4.562448e-17,4.610617e-16,1.488447e-12,2.472197e-12,2.470155e-16,1.77298e-16,7.891783e-16,6.224149e-17,6.330659e-14,5.080497e-13,2.415044e-15,4.977857e-15,6.916603e-13,5.833864e-15,2.562479e-15,6.839081e-22,8.41705e-17,a2c_laocc,1.0
2,35156678,48b09010a2219aad_4903a582edf3bd118ffb3386065b,2018-10-15,2017-12-06,35156678_2018-10-15,-313,313.0,1.0,48b09010a2219aad_4903a582edf3bd118ffb3386065b_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b09010a...,2017-12-06 13:11:41,48b09010a2219aad_4903a582edf3bd118ffb3386065b_...,BWH,Vivid E95,GE Vingmed Ultrasound,0.0,33.385965,96.0,60.0,0.028951,0.028951,8.949438e-12,1.52701e-11,1.032435e-11,1.731728e-12,8.588255e-09,4.44479e-13,5.656191e-10,8.559975e-10,1.013023e-09,1.203025e-11,7.54872e-14,1.192051e-10,2.723761e-12,2.971284e-13,1.0,4.889395e-11,1.707197e-12,3.167443e-13,4.534164e-11,4.018771e-11,3.015296e-16,1.463824e-11,6.176486e-11,5.01245e-08,3.739098e-14,8.764302e-11,7.585989e-10,other,1.0
3,35156678,48b09010a2219aad_4903a582edf3bd118ffb3386065b,2018-10-15,2017-12-06,35156678_2018-10-15,-313,313.0,1.0,48b09010a2219aad_4903a582edf3bd118ffb3386065b_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b09010a...,2017-12-06 13:11:41,48b09010a2219aad_4903a582edf3bd118ffb3386065b_...,BWH,Vivid E95,GE Vingmed Ultrasound,0.0,20.076253,154.0,60.0,0.02555,0.02555,2.756245e-08,8.159813e-08,1.349713e-07,0.0002232582,2.104798e-05,0.0002801413,1.836982e-06,0.0004025422,1.919646e-06,1.904979e-08,1.831858e-07,7.573378e-06,9.969383e-06,9.283376e-07,0.0001450835,1.276905e-07,1.183806e-07,5.436503e-10,3.23423e-06,0.0004654251,9.928958e-07,0.1464581,2.779426e-06,1.162626e-05,1.426432e-07,0.8517988,0.0001638508,subcostal,1.0
4,35156678,48b09010a2219aad_4903a582edf3bd118ffb3386065b,2018-10-15,2017-12-06,35156678_2018-10-15,-313,313.0,1.0,48b09010a2219aad_4903a582edf3bd118ffb3386065b_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b09010a...,2017-12-06 13:11:41,48b09010a2219aad_4903a582edf3bd118ffb3386065b_...,BWH,Vivid E95,GE Vingmed Ultrasound,0.0,10.673887,278.0,62.0,0.012357,0.012357,7.778262e-08,9.886885e-07,1.039657e-06,5.820243e-07,0.000305535,1.34048e-06,2.491018e-08,9.160528e-07,9.756121e-09,1.460179e-06,1.225109e-08,1.272529e-06,3.398212e-05,0.0002213792,0.007812755,4.475427e-09,1.293485e-11,0.001715419,0.9891934,3.791654e-06,0.0005341106,0.0001012632,7.045915e-05,1.855576e-08,7.979656e-08,6.84117e-08,1.438755e-07,plax_laz_ao,1.0


In [15]:
print(files_cfr.shape)
files_cfr.head()

(302373, 50)


Unnamed: 0,mrn,study,pet_date,echo_date,petmrn_identifier,days_post_pet,difference(days),pet_measurement,filename,dir,datetime,fileid,institution,model,manufacturer,index,frame_time,number_of_frames,heart_rate,deltaX,deltaY,a2c,a2c_laocc,a2c_lvocc_s,a3c,a3c_laocc,a3c_lvocc_s,a4c,a4c_far,a4c_laocc,a4c_lvocc_s,a4c_rv,a4c_rv_laocc,a5c,apex,other,plax_far,plax_lac,plax_laz,plax_laz_ao,plax_plax,psax_avz,psax_az,psax_mv,psax_pap,rvinf,subcostal,suprasternal,max_view,sum_views
0,35156678,48b09010a2219aad_4903a582edf3bd118ffb3386065b,2018-10-15,2017-12-06,35156678_2018-10-15,-313,313.0,1.0,48b09010a2219aad_4903a582edf3bd118ffb3386065b_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b09010a...,2017-12-06 13:11:41,48b09010a2219aad_4903a582edf3bd118ffb3386065b_...,BWH,Vivid E95,GE Vingmed Ultrasound,0.0,47.769231,66.0,60.0,0.028951,0.028951,3.446593e-08,6.452001e-09,2.939033e-08,0.003095016,9.225302e-08,3.967397e-08,7.041133e-10,3.144316e-09,1.198068e-13,4.564848e-10,1.454091e-09,1.773068e-08,7.812481e-08,3.755003e-08,0.9968956,7.881907e-10,5.391607e-10,2.151894e-10,1.15716e-08,4.940982e-08,7.734493e-12,1.395856e-06,3.054052e-09,7.720808e-06,5.067335e-09,2.187016e-13,2.230961e-08,other,1.0
1,35156678,48b09010a2219aad_4903a582edf3bd118ffb3386065b,2018-10-15,2017-12-06,35156678_2018-10-15,-313,313.0,1.0,48b09010a2219aad_4903a582edf3bd118ffb3386065b_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b09010a...,2017-12-06 13:11:41,48b09010a2219aad_4903a582edf3bd118ffb3386065b_...,BWH,Vivid E95,GE Vingmed Ultrasound,0.0,20.087146,154.0,60.0,0.020448,0.020448,4.212547e-13,1.0,5.081462e-15,5.16278e-16,3.739126e-14,4.737708e-15,3.574842e-17,1.105575e-13,7.188538e-16,3.057809e-15,4.562448e-17,4.610617e-16,1.488447e-12,2.472197e-12,2.470155e-16,1.77298e-16,7.891783e-16,6.224149e-17,6.330659e-14,5.080497e-13,2.415044e-15,4.977857e-15,6.916603e-13,5.833864e-15,2.562479e-15,6.839081e-22,8.41705e-17,a2c_laocc,1.0
2,35156678,48b09010a2219aad_4903a582edf3bd118ffb3386065b,2018-10-15,2017-12-06,35156678_2018-10-15,-313,313.0,1.0,48b09010a2219aad_4903a582edf3bd118ffb3386065b_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b09010a...,2017-12-06 13:11:41,48b09010a2219aad_4903a582edf3bd118ffb3386065b_...,BWH,Vivid E95,GE Vingmed Ultrasound,0.0,33.385965,96.0,60.0,0.028951,0.028951,8.949438e-12,1.52701e-11,1.032435e-11,1.731728e-12,8.588255e-09,4.44479e-13,5.656191e-10,8.559975e-10,1.013023e-09,1.203025e-11,7.54872e-14,1.192051e-10,2.723761e-12,2.971284e-13,1.0,4.889395e-11,1.707197e-12,3.167443e-13,4.534164e-11,4.018771e-11,3.015296e-16,1.463824e-11,6.176486e-11,5.01245e-08,3.739098e-14,8.764302e-11,7.585989e-10,other,1.0
3,35156678,48b09010a2219aad_4903a582edf3bd118ffb3386065b,2018-10-15,2017-12-06,35156678_2018-10-15,-313,313.0,1.0,48b09010a2219aad_4903a582edf3bd118ffb3386065b_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b09010a...,2017-12-06 13:11:41,48b09010a2219aad_4903a582edf3bd118ffb3386065b_...,BWH,Vivid E95,GE Vingmed Ultrasound,0.0,20.076253,154.0,60.0,0.02555,0.02555,2.756245e-08,8.159813e-08,1.349713e-07,0.0002232582,2.104798e-05,0.0002801413,1.836982e-06,0.0004025422,1.919646e-06,1.904979e-08,1.831858e-07,7.573378e-06,9.969383e-06,9.283376e-07,0.0001450835,1.276905e-07,1.183806e-07,5.436503e-10,3.23423e-06,0.0004654251,9.928958e-07,0.1464581,2.779426e-06,1.162626e-05,1.426432e-07,0.8517988,0.0001638508,subcostal,1.0
4,35156678,48b09010a2219aad_4903a582edf3bd118ffb3386065b,2018-10-15,2017-12-06,35156678_2018-10-15,-313,313.0,1.0,48b09010a2219aad_4903a582edf3bd118ffb3386065b_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b09010a...,2017-12-06 13:11:41,48b09010a2219aad_4903a582edf3bd118ffb3386065b_...,BWH,Vivid E95,GE Vingmed Ultrasound,0.0,10.673887,278.0,62.0,0.012357,0.012357,7.778262e-08,9.886885e-07,1.039657e-06,5.820243e-07,0.000305535,1.34048e-06,2.491018e-08,9.160528e-07,9.756121e-09,1.460179e-06,1.225109e-08,1.272529e-06,3.398212e-05,0.0002213792,0.007812755,4.475427e-09,1.293485e-11,0.001715419,0.9891934,3.791654e-06,0.0005341106,0.0001012632,7.045915e-05,1.855576e-08,7.979656e-08,6.84117e-08,1.438755e-07,plax_laz_ao,1.0


In [16]:
len(files_cfr.filename.unique())

302202

In [17]:
view_list_plot = [v for v in view_list if 'a4c' in v]
# For the paper: Make a graph with the days of echo before or after CFR
def style(p):
    # Title 
    p.title.align = 'center'
    p.title.text_font_size = '11pt'
    #p.title.text_font = 'serif'

    # Axis titles
    p.xaxis.axis_label_text_font_size = '11pt'
    p.xaxis.axis_label_text_font_style = 'bold'
    p.yaxis.axis_label_text_font_size = '11pt'
    p.yaxis.axis_label_text_font_style = 'bold'

    # Tick labels
    p.xaxis.major_label_text_font_size = '11pt'
    p.yaxis.major_label_text_font_size = '11pt'
    
    return p

def make_dataset(df = files_cfr, view_list = view_list_plot, range_start = -200, range_end = 200, bin_width = 1):

    arr_df_list = []
    
    for view_idx, view in enumerate(view_list):
        subset = df[df.max_view == view]
        # This should actually be on the study level, not videos
        subset = subset[['study', 'days_post_pet']].drop_duplicates().reset_index(drop = True)

        range_extent = range_end - range_start

        days_hist, edges = np.histogram(subset.days_post_pet,
                                        bins = int(range_extent/bin_width),
                                        range = [range_start, range_end])

        edges += 0.5
        
        arr_df = pd.DataFrame({'studies': days_hist,
                               'freq': days_hist/np.sum(days_hist),
                               'left': edges[:-1], 'right': edges[1:],
                               'center': edges[:-1]+0.5})

        arr_df['f_freq'] = ['%0.5f' % p for p in arr_df.freq]

        arr_df['view'] = view

        arr_df['color'] = Category10[10][view_idx]

        arr_df_list.append(arr_df)

    hist_df = pd.concat(arr_df_list, ignore_index=True, axis = 0)
    hist_df = hist_df.sort_values(['view', 'left'])

    return hist_df

def make_plot(df):
    p = figure(title = 'Time for echocardiography studies after CFR measurement by views',
               x_axis_label = 'Time post-pet [days]',
               y_axis_label = 'Echocardiography studies')
    
    hist_dict = {}
    for v, view in enumerate(df.view.unique()):
    
        df_view = df[df.view == view]
        datasource = ColumnDataSource(df_view) 
        label = view.split('_')[-1]
        hist_dict[view] = p.quad(source = datasource, bottom = 0, top = 'studies', 
                                 left = 'left', right = 'right',
                                 color = 'color', fill_alpha = 0.7, 
                                 hover_fill_color = 'color', hover_fill_alpha = 1.0,
                                 legend_label = label, line_color = 'black', 
                                 name = view, muted_alpha = 0.2)
        
        hist_dict[view].visible = True if view == 'a4c' else False
    
    hover = HoverTool(tooltips = [('view', '@view'),
                                  ('days post-cfr', '@center'),
                                  ('echo studies', '@studies'),
                                  ('proportion', '@f_freq')],
                      mode = 'vline',
                      names = list(df.view.unique()))
    
    p.add_tools(hover)
    
    p.legend.location = 'top_right'
    p.legend.title = 'View: click to hide'
    p.legend.click_policy = 'hide'
    p = style(p)
    
    return p

In [18]:
view_list_plot = ['a4c']
hist_df = make_dataset(df = files_cfr, range_start = -30, range_end = 30, bin_width = 1, view_list = view_list_plot)
hist_plot = make_plot(hist_df)
reset_output()
output_notebook()
show(hist_plot)

In [None]:
reset_output()
output_file(os.path.join(cfr_data_root, 'cfr_echo_time_'+cfr_meta_date+'.html'), title = 'cfr_echo_time_delay')
save(hist_plot)

In [None]:
files_cfr.head()