### Select patients for PET-CFR-MBF models: Combine file names, cfr measurements and views ###

In [1]:
import os
import numpy as np
import pandas as pd

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 50)
pd.set_option('display.width', 1000)

from bokeh.io import output_notebook, reset_output, show, output_file, save
from bokeh.plotting import figure
from bokeh.layouts import column, row, gridplot
from bokeh.models import ColumnDataSource, HoverTool, Legend

from bokeh.palettes import Category10

In [2]:
cfr_data_root = os.path.normpath('/mnt/obi0/andreas/data/cfr')
cfr_meta_date = '200425'
meta_dir = os.path.join(cfr_data_root, 'metadata_'+cfr_meta_date)
print(meta_dir)

/mnt/obi0/andreas/data/cfr/metadata_200425


In [3]:
# PET measurements matched with echo studies
study_cfr_file = os.path.join(meta_dir, 'pet_match365_diff_'+cfr_meta_date+'.parquet')
study_cfr_df = pd.read_parquet(study_cfr_file)
print('Number of matched CFR-ECHO studies: {}'.format(len(study_cfr_df.study.unique())))
study_cfr_df.head()

Number of matched CFR-ECHO studies: 4143


Unnamed: 0,mrn,study,pet_date,echo_date,petmrn_identifier,days_post_pet,difference(days)
5452,35169325,48b09013185d7d6b_4903a582ec77f16c8e735c3b6113,2017-05-22,2017-06-21,35169325_2017-05-22,30,30.0
5450,35133560,48b09016b25a7f36_4903a58dd44b7b81d524809aba5f,2018-04-25,2018-11-02,35133560_2018-04-25,191,191.0
5446,35014216,48b091b44637c987_4903a58c1bcb23767fc2818c6e63,2018-05-08,2019-04-25,35014216_2018-05-08,352,352.0
5448,35034941,48b091b6184a771a_4903a582ec751999e07cf30ae81a,2017-04-17,2017-04-18,35034941_2017-04-17,1,1.0
5447,35029545,48b091b714b41ffe_4903a582ec746dce5e9b6c1f4070,2017-07-24,2017-05-16,35029545_2017-07-24,-69,69.0


In [4]:
# Meta data for all echo videos (or just this subset)
echo_meta_file = os.path.join(meta_dir, 'echo_BWH_meta_'+cfr_meta_date+'.parquet')
echo_meta_df = pd.read_parquet(echo_meta_file)
echo_meta_df = echo_meta_df.astype({'mrn': 'int64'})
print('Number of studies in meta data {}'.format(len(echo_meta_df.study.unique())))
echo_meta_df.head()

Number of studies in meta data 27721


Unnamed: 0,filename,dir,study,mrn,datetime,fileid,institution,model,manufacturer,index,frame_time,number_of_frames,heart_rate,deltaX,deltaY,a2c,a2c_laocc,a2c_lvocc_s,a3c,a3c_laocc,a3c_lvocc_s,a4c,a4c_far,a4c_laocc,a4c_lvocc_s,a4c_rv,a4c_rv_laocc,a5c,apex,other,plax_far,plax_lac,plax_laz,plax_laz_ao,plax_plax,psax_avz,psax_az,psax_mv,psax_pap,rvinf,subcostal,suprasternal
0,48b09010a09ba991_4903a582ec77f1640cdeecd8cb4a_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b09010a...,48b09010a09ba991_4903a582ec77f1640cdeecd8cb4a,35154368,2017-06-29 13:59:05,48b09010a09ba991_4903a582ec77f1640cdeecd8cb4a_...,BWH,iE33,Philips Medical Systems,0.0,59.4,41.0,75.0,0.032639,0.032639,1.178971e-15,8.604196999999999e-19,1.479184e-16,8.796266e-16,2.571893e-13,8.185579000000001e-17,3.043832e-11,1.21468e-15,2.095986e-16,5.133588e-12,2.513449e-18,3.839156e-16,2.490527e-12,6.126238e-21,1.0,1.3095090000000001e-17,7.667199000000001e-18,1.798559e-16,6.010073000000001e-17,1.03526e-15,4.1699590000000004e-22,4.270829e-17,3.529437e-16,7.931992e-14,4.603354e-18,5.1197989999999996e-20,3.6778380000000005e-17
1,48b09010a09ba991_4903a582ec77f1640cdeecd8cb4a_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b09010a...,48b09010a09ba991_4903a582ec77f1640cdeecd8cb4a,35154368,2017-06-29 13:59:05,48b09010a09ba991_4903a582ec77f1640cdeecd8cb4a_...,BWH,iE33,Philips Medical Systems,0.0,50.767,44.0,73.0,0.039452,0.039452,1.569346e-11,1.914748e-11,5.463878e-14,3.877103e-13,7.9256e-13,2.651529e-14,1.524154e-08,1.427691e-14,2.865148e-15,6.230805e-12,1.459255e-15,3.287361e-14,9.74779e-11,1.068038e-13,1.0,1.653648e-14,1.795145e-13,1.900974e-12,2.668856e-14,5.122239e-13,1.466861e-16,2.361316e-14,3.318508e-13,8.564641e-11,1.485405e-14,6.088076e-14,2.630603e-14
2,48b09010a09ba991_4903a582ec77f1640cdeecd8cb4a_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b09010a...,48b09010a09ba991_4903a582ec77f1640cdeecd8cb4a,35154368,2017-06-29 13:59:05,48b09010a09ba991_4903a582ec77f1640cdeecd8cb4a_...,BWH,iE33,Philips Medical Systems,0.0,33.333,75.0,74.0,0.021885,0.021885,2.596062e-10,4.713628e-10,4.50974e-11,3.253085e-08,1.618168e-12,8.574959e-07,9.00294e-13,8.230614e-08,1.569278e-11,2.141411e-05,1.549814e-09,2.236664e-05,0.9999183,1.550872e-07,1e-06,1.438738e-11,1.189717e-08,3.180146e-12,1.841605e-08,1.396017e-13,4.716286e-06,3.029323e-05,2.817287e-08,9.520272e-09,7.060649e-11,6.475617e-12,2.797988e-07
3,48b09010a09ba991_4903a582ec77f1640cdeecd8cb4a_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b09010a...,48b09010a09ba991_4903a582ec77f1640cdeecd8cb4a,35154368,2017-06-29 13:59:05,48b09010a09ba991_4903a582ec77f1640cdeecd8cb4a_...,BWH,iE33,Philips Medical Systems,0.0,53.064,48.0,86.0,0.019907,0.019907,1.147595e-16,1.1995940000000002e-17,1.65494e-15,2.217913e-14,1.312532e-14,2.831267e-14,6.425138e-15,1.004305e-14,3.877898e-15,5.087564e-14,6.862201e-17,7.347565e-16,4.375135e-14,2.4402120000000003e-17,1.0,3.204124e-18,1.924967e-19,1.314328e-14,4.42152e-13,1.040406e-16,2.975521e-16,1.651787e-15,1.373754e-15,7.584037000000001e-17,1.4741700000000002e-17,7.888934e-21,2.18698e-20
4,48b09010a09ba991_4903a582ec77f1640cdeecd8cb4a_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b09010a...,48b09010a09ba991_4903a582ec77f1640cdeecd8cb4a,35154368,2017-06-29 13:59:05,48b09010a09ba991_4903a582ec77f1640cdeecd8cb4a_...,BWH,iE33,Philips Medical Systems,0.0,33.333,72.0,74.0,0.020193,0.020193,2.521683e-10,0.001677873,7.510346e-07,0.0007467039,8.374749e-08,0.01646851,6.397274e-06,5.584654e-05,0.0002535375,0.1359369,0.0001734045,0.001442146,0.09200491,1.108767e-06,2.7e-05,8.538134e-08,1.34111e-05,7.100427e-07,0.02379809,9.187641e-06,0.117547,0.6095443,2.964346e-06,0.000288791,7.983387e-08,1.483031e-07,1.847825e-07


In [5]:
print('Total number of patients in meta data {}'.format(len(echo_meta_df.mrn.unique())))
print('Total number of studies {}'.format(len(echo_meta_df.study.unique())))
print('Total number of files in meta data {}'.format(len(echo_meta_df.filename.unique())))

temp = echo_meta_df.loc[echo_meta_df.a4c.isnull()].reset_index(drop = True)
print()
print('Studies without view classification {}'.format(len(temp.study.unique())))

# Remove meta rows without view classification
echo_meta_all_views = echo_meta_df.loc[~echo_meta_df.a4c.isnull()]
# Remove rows without frame_time
echo_meta_all_views = echo_meta_all_views.loc[~echo_meta_all_views.frame_time.isnull()].reset_index(drop = True)

print()
print('After removal of rows without view clasification:')
print('Total number of patients in meta data {}'.format(len(echo_meta_all_views.mrn.unique())))
print('Total number of studies {}'.format(len(echo_meta_all_views.study.unique())))
print('Total number of files in meta data {}'.format(len(echo_meta_all_views.filename.unique())))

Total number of patients in meta data 16570
Total number of studies 27721
Total number of files in meta data 1389513

Studies without view classification 36

After removal of rows without view clasification:
Total number of patients in meta data 16565
Total number of studies 27714
Total number of files in meta data 1387517


In [6]:
view_dict = {'view_a2c': ['a2c', 'a2c_laocc', 'a2c_lvocc_s'],
             'view_a3c': ['a3c', 'a3c_laocc', 'a3c_lvocc_s'],
             'view_a4c': ['a4c', 'a4c_far', 'a4c_laocc', 'a4c_lvocc_s', 'a4c_rv', 'a4c_rv_laocc'],
             'view_plax': ['plax_far', 'plax_lac', 'plax_laz', 'plax_laz_ao', 'plax_plax'],
             'view_psax': ['psax_avz', 'psax_az', 'psax_mv', 'psax_pap'],
             'view_other': ['other', 'a5c', 'apex', 'rvinf', 'subcostal', 'suprasternal']}

In [7]:
# Combine some of the view columns
echo_meta_sum_views = echo_meta_all_views.copy()
for view in view_dict.keys():
    cols = view_dict[view]
    echo_meta_sum_views[view] = echo_meta_sum_views[cols].sum(axis =1)
    # Drop the old columns
    echo_meta_sum_views = echo_meta_sum_views.drop(columns = cols)

# Sum up the new view columns as a consistency check
#echo_meta_sum_views = echo_meta_sum_views.assign(sum_views = echo_meta_sum_views[list(view_dict.keys())].\                                                 sum(axis = 1))

# Get the maxiumum view classification score for each row
#echo_meta_sum_views = echo_meta_sum_views.assign(max_view = echo_meta_sum_views[list(view_dict.keys())].\
#                                                 idxmax(axis = 1))

# Get the maxiumum view classification score for each row
view_list = [item for sublist in view_dict.values() for item in sublist]
echo_meta_sum_views = echo_meta_all_views.assign(max_view = echo_meta_all_views[view_list].\
                                                 idxmax(axis = 1))

# Make sure that we have all views in our list and for consistency: Sum up all view columns
echo_meta_sum_views = echo_meta_sum_views.assign(sum_views = echo_meta_sum_views[view_list].sum(axis=1))

In [8]:
echo_meta_sum_views.head()

Unnamed: 0,filename,dir,study,mrn,datetime,fileid,institution,model,manufacturer,index,frame_time,number_of_frames,heart_rate,deltaX,deltaY,a2c,a2c_laocc,a2c_lvocc_s,a3c,a3c_laocc,a3c_lvocc_s,a4c,a4c_far,a4c_laocc,a4c_lvocc_s,a4c_rv,a4c_rv_laocc,a5c,apex,other,plax_far,plax_lac,plax_laz,plax_laz_ao,plax_plax,psax_avz,psax_az,psax_mv,psax_pap,rvinf,subcostal,suprasternal,max_view,sum_views
0,48b09010a09ba991_4903a582ec77f1640cdeecd8cb4a_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b09010a...,48b09010a09ba991_4903a582ec77f1640cdeecd8cb4a,35154368,2017-06-29 13:59:05,48b09010a09ba991_4903a582ec77f1640cdeecd8cb4a_...,BWH,iE33,Philips Medical Systems,0.0,59.4,41.0,75.0,0.032639,0.032639,1.178971e-15,8.604196999999999e-19,1.479184e-16,8.796266e-16,2.571893e-13,8.185579000000001e-17,3.043832e-11,1.21468e-15,2.095986e-16,5.133588e-12,2.513449e-18,3.839156e-16,2.490527e-12,6.126238e-21,1.0,1.3095090000000001e-17,7.667199000000001e-18,1.798559e-16,6.010073000000001e-17,1.03526e-15,4.1699590000000004e-22,4.270829e-17,3.529437e-16,7.931992e-14,4.603354e-18,5.1197989999999996e-20,3.6778380000000005e-17,other,1.0
1,48b09010a09ba991_4903a582ec77f1640cdeecd8cb4a_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b09010a...,48b09010a09ba991_4903a582ec77f1640cdeecd8cb4a,35154368,2017-06-29 13:59:05,48b09010a09ba991_4903a582ec77f1640cdeecd8cb4a_...,BWH,iE33,Philips Medical Systems,0.0,50.767,44.0,73.0,0.039452,0.039452,1.569346e-11,1.914748e-11,5.463878e-14,3.877103e-13,7.9256e-13,2.651529e-14,1.524154e-08,1.427691e-14,2.865148e-15,6.230805e-12,1.459255e-15,3.287361e-14,9.74779e-11,1.068038e-13,1.0,1.653648e-14,1.795145e-13,1.900974e-12,2.668856e-14,5.122239e-13,1.466861e-16,2.361316e-14,3.318508e-13,8.564641e-11,1.485405e-14,6.088076e-14,2.630603e-14,other,1.0
2,48b09010a09ba991_4903a582ec77f1640cdeecd8cb4a_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b09010a...,48b09010a09ba991_4903a582ec77f1640cdeecd8cb4a,35154368,2017-06-29 13:59:05,48b09010a09ba991_4903a582ec77f1640cdeecd8cb4a_...,BWH,iE33,Philips Medical Systems,0.0,33.333,75.0,74.0,0.021885,0.021885,2.596062e-10,4.713628e-10,4.50974e-11,3.253085e-08,1.618168e-12,8.574959e-07,9.00294e-13,8.230614e-08,1.569278e-11,2.141411e-05,1.549814e-09,2.236664e-05,0.9999183,1.550872e-07,1e-06,1.438738e-11,1.189717e-08,3.180146e-12,1.841605e-08,1.396017e-13,4.716286e-06,3.029323e-05,2.817287e-08,9.520272e-09,7.060649e-11,6.475617e-12,2.797988e-07,a5c,1.0
3,48b09010a09ba991_4903a582ec77f1640cdeecd8cb4a_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b09010a...,48b09010a09ba991_4903a582ec77f1640cdeecd8cb4a,35154368,2017-06-29 13:59:05,48b09010a09ba991_4903a582ec77f1640cdeecd8cb4a_...,BWH,iE33,Philips Medical Systems,0.0,53.064,48.0,86.0,0.019907,0.019907,1.147595e-16,1.1995940000000002e-17,1.65494e-15,2.217913e-14,1.312532e-14,2.831267e-14,6.425138e-15,1.004305e-14,3.877898e-15,5.087564e-14,6.862201e-17,7.347565e-16,4.375135e-14,2.4402120000000003e-17,1.0,3.204124e-18,1.924967e-19,1.314328e-14,4.42152e-13,1.040406e-16,2.975521e-16,1.651787e-15,1.373754e-15,7.584037000000001e-17,1.4741700000000002e-17,7.888934e-21,2.18698e-20,other,1.0
4,48b09010a09ba991_4903a582ec77f1640cdeecd8cb4a_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b09010a...,48b09010a09ba991_4903a582ec77f1640cdeecd8cb4a,35154368,2017-06-29 13:59:05,48b09010a09ba991_4903a582ec77f1640cdeecd8cb4a_...,BWH,iE33,Philips Medical Systems,0.0,33.333,72.0,74.0,0.020193,0.020193,2.521683e-10,0.001677873,7.510346e-07,0.0007467039,8.374749e-08,0.01646851,6.397274e-06,5.584654e-05,0.0002535375,0.1359369,0.0001734045,0.001442146,0.09200491,1.108767e-06,2.7e-05,8.538134e-08,1.34111e-05,7.100427e-07,0.02379809,9.187641e-06,0.117547,0.6095443,2.964346e-06,0.000288791,7.983387e-08,1.483031e-07,1.847825e-07,psax_az,1.0


In [9]:
# Left join in the PET CFR table, thereby filtering those studies that have CFR values
files_cfr = study_cfr_df.merge(right = echo_meta_sum_views, on = ['mrn', 'study'], how = 'left')
files_cfr.tail(2)

Unnamed: 0,mrn,study,pet_date,echo_date,petmrn_identifier,days_post_pet,difference(days),filename,dir,datetime,fileid,institution,model,manufacturer,index,frame_time,number_of_frames,heart_rate,deltaX,deltaY,a2c,a2c_laocc,a2c_lvocc_s,a3c,a3c_laocc,a3c_lvocc_s,a4c,a4c_far,a4c_laocc,a4c_lvocc_s,a4c_rv,a4c_rv_laocc,a5c,apex,other,plax_far,plax_lac,plax_laz,plax_laz_ao,plax_plax,psax_avz,psax_az,psax_mv,psax_pap,rvinf,subcostal,suprasternal,max_view,sum_views
196341,41128695,4f27e0c8b2e37c75_4903a58c1a3b6182b7d6f5d19fb4,2020-01-10,2019-12-08,41128695_2020-01-10,-33,33.0,4f27e0c8b2e37c75_4903a58c1a3b6182b7d6f5d19fb4_...,/mnt/obi0/phi/echo/npyFiles/BWH/4f27/4f27e0c8b...,2019-12-08 09:36:15,4f27e0c8b2e37c75_4903a58c1a3b6182b7d6f5d19fb4_...,BWH,Affiniti 70C,Philips Medical Systems,0.0,33.333,102.0,72.0,0.038101,0.038101,8.525484e-11,4.761628e-08,1.553953e-09,2.346275e-06,3.757656e-09,2.746353e-06,0.01128907,2.365555e-07,5.502079e-09,1.608749e-08,0.9886258,3.376832e-06,3.909833e-10,5.127357e-09,8e-06,2.165053e-11,8.86631e-09,6.407424e-05,8.364941e-11,2.129735e-08,3.325654e-08,4.564876e-08,2.464894e-10,6.655624e-07,8.407422e-08,3.769934e-06,4.126614e-10,a4c_rv,1.0
196342,41128695,4f27e0c8b2e37c75_4903a58c1a3b6182b7d6f5d19fb4,2020-01-10,2019-12-08,41128695_2020-01-10,-33,33.0,4f27e0c8b2e37c75_4903a58c1a3b6182b7d6f5d19fb4_...,/mnt/obi0/phi/echo/npyFiles/BWH/4f27/4f27e0c8b...,2019-12-08 09:36:15,4f27e0c8b2e37c75_4903a58c1a3b6182b7d6f5d19fb4_...,BWH,Affiniti 70C,Philips Medical Systems,0.0,41.247,82.0,71.0,0.0267,0.0267,2.842068e-13,6.22142e-13,1.013217e-14,5.017845e-12,9.585414e-15,3.793506e-14,1.407506e-12,1.564643e-13,1.627178e-14,2.177066e-13,1.087211e-13,2.088464e-12,4.119476e-11,1.6211e-14,1.0,1.938321e-17,4.369375e-13,1.636336e-13,3.210785e-11,1.750581e-15,2.289004e-16,1.318241e-15,5.781358e-15,3.735791e-10,1.78197e-13,1.233108e-15,3.046032e-12,other,1.0


In [10]:
# Let's see what we have
df_stat = pd.DataFrame()
for view in view_list:
    df = files_cfr[files_cfr.max_view == view]
    
    df_dict = {'view': [view],
               'patients': [len(df.mrn.unique())],
               'echo studies': [len(df.study.unique())],
               'PET studies': [len(df.petmrn_identifier.unique())]}    
    df_stat = pd.concat([df_stat, pd.DataFrame(df_dict)], ignore_index = True)

df_stat = df_stat.sort_values(by = 'view')
print(df_stat.head(100))

            view  patients  echo studies  PET studies
0            a2c      2259          3153         2348
1      a2c_laocc      1701          2211         1746
2    a2c_lvocc_s         9             9            9
3            a3c      2064          2854         2143
4      a3c_laocc      1242          1524         1267
5    a3c_lvocc_s        53            53           53
6            a4c      2483          3593         2580
7        a4c_far      1394          1754         1432
8      a4c_laocc      1969          2615         2031
9    a4c_lvocc_s       124           125          124
10        a4c_rv      1607          2109         1652
11  a4c_rv_laocc        58            60           58
22           a5c      1872          2466         1931
23          apex      1333          1726         1361
21         other      2694          4083         2804
12      plax_far      1989          2699         2053
13      plax_lac      1004          1237         1031
14      plax_laz      2105  

In [11]:
match_view_filename = 'pet_match365_diff_files_'+cfr_meta_date+'.parquet'
files_cfr.to_parquet(os.path.join(meta_dir, match_view_filename))

# Let's do a .csv file as well
match_view_filename_csv = 'pet_match365_diff_files_'+cfr_meta_date+'.csv'
files_cfr.to_csv(os.path.join(meta_dir, match_view_filename_csv), index = False)

In [12]:
print(files_cfr.shape)
files_cfr.head()

(196343, 49)


Unnamed: 0,mrn,study,pet_date,echo_date,petmrn_identifier,days_post_pet,difference(days),filename,dir,datetime,fileid,institution,model,manufacturer,index,frame_time,number_of_frames,heart_rate,deltaX,deltaY,a2c,a2c_laocc,a2c_lvocc_s,a3c,a3c_laocc,a3c_lvocc_s,a4c,a4c_far,a4c_laocc,a4c_lvocc_s,a4c_rv,a4c_rv_laocc,a5c,apex,other,plax_far,plax_lac,plax_laz,plax_laz_ao,plax_plax,psax_avz,psax_az,psax_mv,psax_pap,rvinf,subcostal,suprasternal,max_view,sum_views
0,35169325,48b09013185d7d6b_4903a582ec77f16c8e735c3b6113,2017-05-22,2017-06-21,35169325_2017-05-22,30,30.0,48b09013185d7d6b_4903a582ec77f16c8e735c3b6113_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b090131...,2017-06-21 09:04:50,48b09013185d7d6b_4903a582ec77f16c8e735c3b6113_...,BWH,iE33,Philips Medical Systems,0.0,33.333,77.0,71.0,0.044038,0.044038,8.544426e-11,1.134038e-13,5.271967e-14,1.336331e-13,1.708275e-15,4.455052e-13,1.0,1.499724e-11,5.666772e-14,6.379312e-14,6.884242e-12,1.625494e-14,1.684185e-12,1.107935e-14,2.403445e-12,6.817977e-16,1.88851e-14,5.909166e-11,1.349804e-14,8.245356e-14,4.105062e-13,4.618445e-11,4.207604e-15,4.772874e-14,9.679026e-17,8.89431e-18,5.028371e-13,a4c,1.0
1,35169325,48b09013185d7d6b_4903a582ec77f16c8e735c3b6113,2017-05-22,2017-06-21,35169325_2017-05-22,30,30.0,48b09013185d7d6b_4903a582ec77f16c8e735c3b6113_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b090131...,2017-06-21 09:04:50,48b09013185d7d6b_4903a582ec77f16c8e735c3b6113_...,BWH,iE33,Philips Medical Systems,0.0,49.49,52.0,71.0,0.04404,0.04404,2.206956e-11,2.73909e-11,1.041237e-11,1.219875e-10,7.501796e-11,1.225508e-11,2.75189e-09,4.431975e-12,7.776415e-12,1.314174e-11,6.367319e-12,5.739965e-12,6.897379e-10,1.617736e-11,0.9999999,3.547227e-12,8.979309e-12,3.594578e-11,9.016331e-11,9.644979e-11,2.262643e-12,1.0363e-09,4.691505e-11,7.552487e-08,8.309125e-13,1.391146e-14,1.978723e-10,other,1.0
2,35169325,48b09013185d7d6b_4903a582ec77f16c8e735c3b6113,2017-05-22,2017-06-21,35169325_2017-05-22,30,30.0,48b09013185d7d6b_4903a582ec77f16c8e735c3b6113_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b090131...,2017-06-21 09:04:50,48b09013185d7d6b_4903a582ec77f16c8e735c3b6113_...,BWH,iE33,Philips Medical Systems,0.0,63.571,43.0,68.0,0.036961,0.036961,3.0551130000000005e-17,2.809892e-15,2.350145e-15,1.248953e-15,6.813193e-16,1.31968e-16,1.5158960000000002e-17,5.531801e-16,2.432176e-15,1.10444e-15,2.3735220000000002e-17,3.259565e-17,2.198459e-17,3.822857e-18,1.0,1.035887e-13,3.540316e-14,1.9531859999999998e-19,2.7826560000000004e-17,1.482547e-14,3.240879e-18,4.179081e-16,1.833621e-15,2.044307e-12,8.849739e-14,2.397233e-19,3.46343e-12,other,1.0
3,35169325,48b09013185d7d6b_4903a582ec77f16c8e735c3b6113,2017-05-22,2017-06-21,35169325_2017-05-22,30,30.0,48b09013185d7d6b_4903a582ec77f16c8e735c3b6113_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b090131...,2017-06-21 09:04:50,48b09013185d7d6b_4903a582ec77f16c8e735c3b6113_...,BWH,iE33,Philips Medical Systems,0.0,33.333,80.0,69.0,0.052627,0.052627,1.097689e-10,5.796163e-12,1.006641e-11,4.903411e-12,1.23143e-11,3.893997e-11,5.0939e-12,8.965308e-09,1.104377e-14,2.592585e-12,2.960192e-12,3.967917e-10,1.31349e-12,2.834721e-13,1.106356e-09,1.021883e-11,5.529269e-09,1.25331e-12,2.446791e-11,7.846255e-10,3.712601e-12,7.673641e-13,7.348277e-13,1.418694e-12,8.525948e-11,1.0,8.740278e-15,subcostal,1.0
4,35169325,48b09013185d7d6b_4903a582ec77f16c8e735c3b6113,2017-05-22,2017-06-21,35169325_2017-05-22,30,30.0,48b09013185d7d6b_4903a582ec77f16c8e735c3b6113_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b090131...,2017-06-21 09:04:50,48b09013185d7d6b_4903a582ec77f16c8e735c3b6113_...,BWH,iE33,Philips Medical Systems,0.0,33.333,75.0,238.0,0.019683,0.019683,6.940926e-15,6.337236e-16,2.505338e-13,8.635626e-13,3.2895440000000003e-17,2.113117e-13,1.144237e-09,5.245854e-12,3.376405e-16,6.428665e-12,4.96526e-13,7.955655e-15,3.554735e-14,3.261082e-14,2.549476e-12,2.367511e-15,1.045866e-09,8.027423e-10,7.668919e-12,1.167985e-13,1.0,7.550513e-12,1.392102e-14,7.804513e-16,2.389122e-15,8.321909e-17,2.399615e-13,psax_avz,1.0


In [13]:
view_list_plot = [v for v in view_list if 'a4c' in v]
# For the paper: Make a graph with the days of echo before or after CFR
def style(p):
    # Title 
    p.title.align = 'center'
    p.title.text_font_size = '11pt'
    #p.title.text_font = 'serif'

    # Axis titles
    p.xaxis.axis_label_text_font_size = '11pt'
    p.xaxis.axis_label_text_font_style = 'bold'
    p.yaxis.axis_label_text_font_size = '11pt'
    p.yaxis.axis_label_text_font_style = 'bold'

    # Tick labels
    p.xaxis.major_label_text_font_size = '11pt'
    p.yaxis.major_label_text_font_size = '11pt'
    
    return p

def make_dataset(df = files_cfr, view_list = view_list_plot, range_start = -200, range_end = 200, bin_width = 1):

    arr_df_list = []
    
    for view_idx, view in enumerate(view_list):
        subset = df[df.max_view == view]
        # This should actually be on the study level, not videos
        subset = subset[['study', 'days_post_pet']].drop_duplicates().reset_index(drop = True)

        range_extent = range_end - range_start

        days_hist, edges = np.histogram(subset.days_post_pet,
                                        bins = int(range_extent/bin_width),
                                        range = [range_start, range_end])

        edges += 0.5
        
        arr_df = pd.DataFrame({'studies': days_hist,
                               'freq': days_hist/np.sum(days_hist),
                               'left': edges[:-1], 'right': edges[1:],
                               'center': edges[:-1]+0.5})

        arr_df['f_freq'] = ['%0.5f' % p for p in arr_df.freq]

        arr_df['view'] = view

        arr_df['color'] = Category10[10][view_idx]

        arr_df_list.append(arr_df)

    hist_df = pd.concat(arr_df_list, ignore_index=True, axis = 0)
    hist_df = hist_df.sort_values(['view', 'left'])

    return hist_df

def make_plot(df):
    p = figure(title = 'Time for echocardiography studies after CFR measurement by views',
               x_axis_label = 'Time post-pet [days]',
               y_axis_label = 'Echocardiography studies')
    
    hist_dict = {}
    for v, view in enumerate(df.view.unique()):
    
        df_view = df[df.view == view]
        datasource = ColumnDataSource(df_view) 
        label = view.split('_')[-1]
        hist_dict[view] = p.quad(source = datasource, bottom = 0, top = 'studies', 
                                 left = 'left', right = 'right',
                                 color = 'color', fill_alpha = 0.7, 
                                 hover_fill_color = 'color', hover_fill_alpha = 1.0,
                                 legend_label = label, line_color = 'black', 
                                 name = view, muted_alpha = 0.2)
        
        hist_dict[view].visible = True if view == 'a4c' else False
    
    hover = HoverTool(tooltips = [('view', '@view'),
                                  ('days post-cfr', '@center'),
                                  ('echo studies', '@studies'),
                                  ('proportion', '@f_freq')],
                      mode = 'vline',
                      names = list(df.view.unique()))
    
    p.add_tools(hover)
    
    p.legend.location = 'top_right'
    p.legend.title = 'View: click to hide'
    p.legend.click_policy = 'hide'
    p = style(p)
    
    return p

In [14]:
view_list

['a2c',
 'a2c_laocc',
 'a2c_lvocc_s',
 'a3c',
 'a3c_laocc',
 'a3c_lvocc_s',
 'a4c',
 'a4c_far',
 'a4c_laocc',
 'a4c_lvocc_s',
 'a4c_rv',
 'a4c_rv_laocc',
 'plax_far',
 'plax_lac',
 'plax_laz',
 'plax_laz_ao',
 'plax_plax',
 'psax_avz',
 'psax_az',
 'psax_mv',
 'psax_pap',
 'other',
 'a5c',
 'apex',
 'rvinf',
 'subcostal',
 'suprasternal']

In [15]:
view_list_plot = ['a4c']
hist_df = make_dataset(df = files_cfr, range_start = -30, range_end = 30, bin_width = 1, view_list = view_list_plot)
hist_plot = make_plot(hist_df)
reset_output()
output_notebook()
show(hist_plot)

In [16]:
reset_output()
output_file(os.path.join(cfr_data_root, 'cfr_echo_time_'+cfr_meta_date+'.html'), title = 'cfr_echo_time_delay')
save(hist_plot)

'/mnt/obi0/andreas/data/cfr/cfr_echo_time_200425.html'

In [17]:
files_cfr.head()

Unnamed: 0,mrn,study,pet_date,echo_date,petmrn_identifier,days_post_pet,difference(days),filename,dir,datetime,fileid,institution,model,manufacturer,index,frame_time,number_of_frames,heart_rate,deltaX,deltaY,a2c,a2c_laocc,a2c_lvocc_s,a3c,a3c_laocc,a3c_lvocc_s,a4c,a4c_far,a4c_laocc,a4c_lvocc_s,a4c_rv,a4c_rv_laocc,a5c,apex,other,plax_far,plax_lac,plax_laz,plax_laz_ao,plax_plax,psax_avz,psax_az,psax_mv,psax_pap,rvinf,subcostal,suprasternal,max_view,sum_views
0,35169325,48b09013185d7d6b_4903a582ec77f16c8e735c3b6113,2017-05-22,2017-06-21,35169325_2017-05-22,30,30.0,48b09013185d7d6b_4903a582ec77f16c8e735c3b6113_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b090131...,2017-06-21 09:04:50,48b09013185d7d6b_4903a582ec77f16c8e735c3b6113_...,BWH,iE33,Philips Medical Systems,0.0,33.333,77.0,71.0,0.044038,0.044038,8.544426e-11,1.134038e-13,5.271967e-14,1.336331e-13,1.708275e-15,4.455052e-13,1.0,1.499724e-11,5.666772e-14,6.379312e-14,6.884242e-12,1.625494e-14,1.684185e-12,1.107935e-14,2.403445e-12,6.817977e-16,1.88851e-14,5.909166e-11,1.349804e-14,8.245356e-14,4.105062e-13,4.618445e-11,4.207604e-15,4.772874e-14,9.679026e-17,8.89431e-18,5.028371e-13,a4c,1.0
1,35169325,48b09013185d7d6b_4903a582ec77f16c8e735c3b6113,2017-05-22,2017-06-21,35169325_2017-05-22,30,30.0,48b09013185d7d6b_4903a582ec77f16c8e735c3b6113_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b090131...,2017-06-21 09:04:50,48b09013185d7d6b_4903a582ec77f16c8e735c3b6113_...,BWH,iE33,Philips Medical Systems,0.0,49.49,52.0,71.0,0.04404,0.04404,2.206956e-11,2.73909e-11,1.041237e-11,1.219875e-10,7.501796e-11,1.225508e-11,2.75189e-09,4.431975e-12,7.776415e-12,1.314174e-11,6.367319e-12,5.739965e-12,6.897379e-10,1.617736e-11,0.9999999,3.547227e-12,8.979309e-12,3.594578e-11,9.016331e-11,9.644979e-11,2.262643e-12,1.0363e-09,4.691505e-11,7.552487e-08,8.309125e-13,1.391146e-14,1.978723e-10,other,1.0
2,35169325,48b09013185d7d6b_4903a582ec77f16c8e735c3b6113,2017-05-22,2017-06-21,35169325_2017-05-22,30,30.0,48b09013185d7d6b_4903a582ec77f16c8e735c3b6113_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b090131...,2017-06-21 09:04:50,48b09013185d7d6b_4903a582ec77f16c8e735c3b6113_...,BWH,iE33,Philips Medical Systems,0.0,63.571,43.0,68.0,0.036961,0.036961,3.0551130000000005e-17,2.809892e-15,2.350145e-15,1.248953e-15,6.813193e-16,1.31968e-16,1.5158960000000002e-17,5.531801e-16,2.432176e-15,1.10444e-15,2.3735220000000002e-17,3.259565e-17,2.198459e-17,3.822857e-18,1.0,1.035887e-13,3.540316e-14,1.9531859999999998e-19,2.7826560000000004e-17,1.482547e-14,3.240879e-18,4.179081e-16,1.833621e-15,2.044307e-12,8.849739e-14,2.397233e-19,3.46343e-12,other,1.0
3,35169325,48b09013185d7d6b_4903a582ec77f16c8e735c3b6113,2017-05-22,2017-06-21,35169325_2017-05-22,30,30.0,48b09013185d7d6b_4903a582ec77f16c8e735c3b6113_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b090131...,2017-06-21 09:04:50,48b09013185d7d6b_4903a582ec77f16c8e735c3b6113_...,BWH,iE33,Philips Medical Systems,0.0,33.333,80.0,69.0,0.052627,0.052627,1.097689e-10,5.796163e-12,1.006641e-11,4.903411e-12,1.23143e-11,3.893997e-11,5.0939e-12,8.965308e-09,1.104377e-14,2.592585e-12,2.960192e-12,3.967917e-10,1.31349e-12,2.834721e-13,1.106356e-09,1.021883e-11,5.529269e-09,1.25331e-12,2.446791e-11,7.846255e-10,3.712601e-12,7.673641e-13,7.348277e-13,1.418694e-12,8.525948e-11,1.0,8.740278e-15,subcostal,1.0
4,35169325,48b09013185d7d6b_4903a582ec77f16c8e735c3b6113,2017-05-22,2017-06-21,35169325_2017-05-22,30,30.0,48b09013185d7d6b_4903a582ec77f16c8e735c3b6113_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b090131...,2017-06-21 09:04:50,48b09013185d7d6b_4903a582ec77f16c8e735c3b6113_...,BWH,iE33,Philips Medical Systems,0.0,33.333,75.0,238.0,0.019683,0.019683,6.940926e-15,6.337236e-16,2.505338e-13,8.635626e-13,3.2895440000000003e-17,2.113117e-13,1.144237e-09,5.245854e-12,3.376405e-16,6.428665e-12,4.96526e-13,7.955655e-15,3.554735e-14,3.261082e-14,2.549476e-12,2.367511e-15,1.045866e-09,8.027423e-10,7.668919e-12,1.167985e-13,1.0,7.550513e-12,1.392102e-14,7.804513e-16,2.389122e-15,8.321909e-17,2.399615e-13,psax_avz,1.0
