### Summary statistics for the training, evaluation and testing data sets ###

In [1]:
import os
import glob
import numpy as np
import pandas as pd

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 50)
pd.set_option('display.width', 1000)

In [5]:
cfr_data_root = os.path.normpath('/mnt/obi0/andreas/data/cfr')
tfr_dir = os.path.join(cfr_data_root, 'tfr_200208')
cfr_meta_date = '200208'
meta_dir = os.path.join(cfr_data_root, 'metadata_'+cfr_meta_date)
print(tfr_dir)

/mnt/obi0/andreas/data/cfr/tfr_200208


In [6]:
# TFR .parquet data files
train_files = glob.glob(os.path.join(tfr_dir, 'cfr_resized_a4c_train_200208_*.parquet'))
eval_files = glob.glob(os.path.join(tfr_dir, 'cfr_resized_a4c_eval_200208_*.parquet'))
test_files = glob.glob(os.path.join(tfr_dir, 'cfr_resized_a4c_test_200208_*.parquet'))

In [9]:
print(len(eval_df.filename.unique())/20)

14.1


In [7]:
train_df = pd.concat([pd.read_parquet(file) for file in train_files])
eval_df = pd.concat([pd.read_parquet(file) for file in eval_files])
test_df = pd.concat([pd.read_parquet(file) for file in test_files])
dset = pd.concat([train_df, eval_df, test_df], axis = 0, ignore_index=True).reset_index(drop=True)
dset.head(2)

Unnamed: 0,mrn,study,echo_study_date,reportID,days_post_cfr,subjectid,report_number,cfr_study_date,cfr_report_date,cfr,filename,dir,datetime,fileid,institution,model,manufacturer,frame_time,number_of_frames,heart_rate,deltaX,deltaY,a2c,a2c_laocc,a2c_lvocc_s,...,a4c,a4c_far,a4c_laocc,a4c_lvocc_s,a4c_rv,a4c_rv_laocc,a5c,apex,other,plax_far,plax_lac,plax_laz,plax_laz_ao,plax_plax,psax_avz,psax_az,psax_mv,psax_pap,rvinf,subcostal,suprasternal,max_view,mode,rate,im_array_shape
0,26588848,4905336e6fcdf6f3_4903a58793b3fabb933e8233553c,2012-04-04,122050,-306,5350,EVS0355444,2013-02-04,2012-04-04,1.580292,4905336e6fcdf6f3_4903a58793b3fabb933e8233553c_...,/mnt/obi0/phi/echo/npyFiles/BWH/4905/4905336e6...,2012-04-04 13:51:40,4905336e6fcdf6f3_4903a58793b3fabb933e8233553c_...,BWH,iE33,Philips Medical Systems,35.024,86.0,73.0,0.056903,0.056903,7.194284e-09,2e-06,2.835884e-09,...,0.999232,5.698615e-05,1.437268e-09,4.514912e-10,1.4e-05,5.706585e-08,1.692767e-05,8.631115e-09,1.386226e-16,1.035368e-09,4.143198e-09,5.000943e-08,6.60657e-09,2.315859e-08,1.867279e-09,2e-06,2.79582e-12,0.000596,8.1e-05,1.562224e-10,2.104095e-07,a4c,train,28.6,"[341, 455, 40]"
1,20689774,4903a2b8e4fc0f49_4903a44517b58fa65e5405918608,2006-12-22,87329,308,3538,0625480D,2006-02-17,2006-12-07,1.313668,4903a2b8e4fc0f49_4903a44517b58fa65e5405918608_...,/mnt/obi0/phi/echo/npyFiles/BWH/4903/4903a2b8e...,2006-12-22 08:00:14,4903a2b8e4fc0f49_4903a44517b58fa65e5405918608_...,BWH,Vivid i,GEMS Ultrasound,43.538954,52.0,86.0,0.051151,0.051151,0.000437687,3e-06,3.295374e-07,...,0.720921,1.147139e-08,1.307347e-09,7.238339e-07,0.122096,6.534848e-07,4.589416e-07,1.222173e-06,9.731261e-09,3.142928e-10,3.228629e-07,2.708157e-08,3.039488e-07,0.0003810025,0.001297817,0.000756,1.832382e-08,0.153937,4.4e-05,2.419375e-09,3.992833e-05,a4c,train,23.0,"[222, 325, 40]"


In [5]:
df_list = []
for m in dset['mode'].unique():

    s = {'view': list(dset[dset['mode']==m].max_view.unique()),
         'mode': [m],
         'mrns': [len(dset[dset['mode']==m].mrn.unique())],
         'studies': [len(dset[dset['mode']==m].study.unique())],
         'videos': [len(dset[dset['mode']==m].filename.unique())],
         'unique_cfr_values': [len(dset[dset['mode']==m].cfr.unique())]}
    
    df_list.append(pd.DataFrame(s))

df_stat = pd.concat(df_list, ignore_index=True).reset_index(drop=True)
print(df_stat)

  view   mode  mrns  studies  videos  unique_cfr_values
0  a4c  train   960     1347    3738               1025
1  a4c   eval   101      122     282                102
2  a4c   test   272      356    1044                288


In [6]:
# Add width and height of the images
dset = dset.assign(width = dset.im_array_shape.apply(lambda s: s[1]),
                   height = dset.im_array_shape.apply(lambda s: s[0]))

In [9]:
dset.head(50)

Unnamed: 0,mrn,study,echo_study_date,reportID,days_post_cfr,subjectid,report_number,cfr_study_date,cfr_report_date,cfr,filename,dir,datetime,fileid,institution,model,manufacturer,frame_time,number_of_frames,heart_rate,deltaX,deltaY,a2c,a2c_laocc,a2c_lvocc_s,...,a4c_laocc,a4c_lvocc_s,a4c_rv,a4c_rv_laocc,a5c,apex,other,plax_far,plax_lac,plax_laz,plax_laz_ao,plax_plax,psax_avz,psax_az,psax_mv,psax_pap,rvinf,subcostal,suprasternal,max_view,mode,rate,im_array_shape,width,height
0,15302599,4a13402152a4fc32_4903a584a1b2945bd16771efa32d,2011-08-01,114848,-175,4781,EVS0312663,2012-01-23,2011-08-01,2.182906,4a13402152a4fc32_4903a584a1b2945bd16771efa32d_...,/mnt/obi0/phi/echo/npyFiles/BWH/4a13/4a1340215...,2011-08-01 14:11:51,4a13402152a4fc32_4903a584a1b2945bd16771efa32d_...,BWH,iE33,Philips Medical Systems,33.333,67.0,62.0,0.037483,0.037483,4.665284e-13,2.959916e-12,9.087745e-14,...,1.807556e-16,8.849852e-15,1.370279e-11,1.928055e-14,8.501512e-14,2.65755e-13,1.737023e-13,8.402098e-14,1.719919e-13,4.219386e-15,1.737607e-16,1.341205e-14,6.99821e-14,1.90014e-11,3.043172e-14,3.6863e-10,6.07472e-12,4.850189e-15,1.460998e-12,a4c,train,30.0,"[225, 300, 40]",300,225
1,23148018,49004f61b067c309_4903a44b32e5a53bf896a125b772,2008-09-10,90819,0,2913,EVS0141518,2008-09-10,2008-09-10,1.914136,49004f61b067c309_4903a44b32e5a53bf896a125b772_...,/mnt/obi0/phi/echo/npyFiles/BWH/4900/49004f61b...,2008-09-10 13:01:14,49004f61b067c309_4903a44b32e5a53bf896a125b772_...,BWH,Vivid7,GE Vingmed Ultrasound,40.384615,66.0,67.0,0.046036,0.046036,2.634363e-11,9.816773e-14,1.357744e-15,...,1.360194e-17,6.286842e-15,3.41417e-14,6.305261e-16,5.064456e-13,6.101305e-14,1.084365e-13,7.016771e-15,1.596819e-16,8.263494e-14,5.941183e-18,1.203597e-14,1.019619e-13,1.576242e-15,3.548044e-13,1.056251e-14,9.39736e-15,1.41857e-19,5.691444e-15,a4c,train,24.8,"[200, 293, 40]",293,200
2,3682481,4b78d3959911c192_4903a5858428385836eaaf4d9f45,2010-06-28,104114,0,412,EVS0253832,2010-06-28,2010-06-28,1.442216,4b78d3959911c192_4903a5858428385836eaaf4d9f45_...,/mnt/obi0/phi/echo/npyFiles/BWH/4b78/4b78d3959...,2010-06-28 14:08:27,4b78d3959911c192_4903a5858428385836eaaf4d9f45_...,BWH,Vivid7,GE Vingmed Ultrasound,40.338985,60.0,52.0,0.046036,0.046036,1.564981e-12,1.144871e-12,5.223119e-16,...,2.394679e-15,2.128004e-14,2.352617e-14,7.54701e-17,3.041593e-14,2.278186e-15,4.279166e-15,4.503282e-15,4.02845e-14,1.886286e-14,3.456893e-18,1.953823e-16,8.062656e-13,1.750647e-16,9.538869e-15,1.827365e-15,1.637724e-14,5.176551e-21,3.706251e-14,a4c,train,24.8,"[200, 293, 40]",293,200
3,13213996,4a1534c2f057d641_4903a44b32edb90a51d81ee4f2e4,2008-01-17,73218,-1,1632,0801548J,2008-01-18,2008-01-17,1.458794,4a1534c2f057d641_4903a44b32edb90a51d81ee4f2e4_...,/mnt/obi0/phi/echo/npyFiles/BWH/4a15/4a1534c2f...,2008-01-17 08:30:37,4a1534c2f057d641_4903a44b32edb90a51d81ee4f2e4_...,BWH,Vivid7,GE Vingmed Ultrasound,40.519479,78.0,59.0,0.048593,0.048593,1.074256e-06,7.847069e-07,5.966318e-08,...,2.962402e-06,8.741816e-07,4.471855e-07,4.340298e-10,1.843001e-09,1.488883e-09,1.604725e-07,1.475682e-06,3.278325e-07,1.713866e-10,1.904547e-08,6.782474e-08,1.603255e-07,1.724024e-07,6.273405e-09,5.286298e-08,7.735327e-07,3.067064e-12,1.916954e-06,a4c,train,24.7,"[211, 309, 40]",309,211
4,18348524,4a1ed1ccf626e98a_4903a585855f563fb7d57d7fa263,2010-11-08,111239,-240,1390,EVS0246319,2011-07-06,2010-11-08,1.670886,4a1ed1ccf626e98a_4903a585855f563fb7d57d7fa263_...,/mnt/obi0/phi/echo/npyFiles/BWH/4a1e/4a1ed1ccf...,2010-11-08 15:29:20,4a1ed1ccf626e98a_4903a585855f563fb7d57d7fa263_...,BWH,Vivid7,GE Vingmed Ultrasound,33.47045,92.0,60.0,0.040921,0.040921,4.005575e-06,5.394122e-08,2.230315e-11,...,5.854241e-14,6.554896e-12,2.177058e-12,1.530935e-11,2.152162e-09,4.453775e-10,8.427226e-11,4.623023e-09,6.580172e-09,5.989249e-12,1.187805e-12,1.065206e-10,3.112084e-13,3.846236e-08,1.610078e-10,3.36392e-09,1.077212e-13,1.5396e-14,2.278455e-11,a4c,train,29.9,"[198, 260, 40]",260,198
5,8520496,4b735cd239f07140_4903a584a1b9a68d59fd64940f6a,2011-03-15,109105,-7,3853,EVS0293140,2011-03-22,2011-03-15,1.163441,4b735cd239f07140_4903a584a1b9a68d59fd64940f6a_...,/mnt/obi0/phi/echo/npyFiles/BWH/4b73/4b735cd23...,2011-03-15 14:24:39,4b735cd239f07140_4903a584a1b9a68d59fd64940f6a_...,BWH,iE33,Philips Medical Systems,33.333,68.0,54.0,0.041843,0.041843,3.865878e-12,1.055767e-10,3.101198e-09,...,1.094105e-06,1.585923e-09,3.95106e-05,7.470477e-11,5.616327e-09,6.659596e-10,7.470018e-12,1.039309e-08,1.460244e-09,1.058317e-11,6.807353e-13,9.641952e-08,1.37407e-10,1.718402e-10,5.176279e-10,3.458936e-06,2.384599e-06,3.410218e-13,9.3163e-10,a4c,train,30.0,"[251, 335, 40]",335,251
6,1135375,4b7a86554057d20d_4903a580509f95b79fb3da089fba,2015-08-18,140955,-1,4995,E1242902,2015-08-19,2015-08-18,1.72709,4b7a86554057d20d_4903a580509f95b79fb3da089fba_...,/mnt/obi0/phi/echo/npyFiles/BWH/4b7a/4b7a86554...,2015-08-18 14:04:29,4b7a86554057d20d_4903a580509f95b79fb3da089fba_...,BWH,CX50,Philips Medical Systems,33.058,62.0,73.0,0.037461,0.037461,3.038473e-08,1.429664e-08,1.419681e-10,...,5.288106e-09,1.433853e-08,9.30858e-11,6.986707e-09,8.276005e-10,2.871567e-11,1.517954e-09,4.994703e-11,9.753484e-11,1.10877e-10,4.464591e-12,8.431781e-11,7.486499e-09,5.162294e-05,2.499925e-10,1.407417e-06,2.112871e-10,6.766144e-13,2.784026e-10,a4c,train,30.2,"[225, 300, 40]",300,225
7,16040560,4a10449c483cfaf9_4903a5864e60c48ea7ac9053c226,2013-06-20,124887,0,5225,EVS0489042,2013-06-20,2014-06-19,1.457699,4a10449c483cfaf9_4903a5864e60c48ea7ac9053c226_...,/mnt/obi0/phi/echo/npyFiles/BWH/4a10/4a10449c4...,2013-06-20 09:21:26,4a10449c483cfaf9_4903a5864e60c48ea7ac9053c226_...,BWH,iE33,Philips Medical Systems,33.333,58.0,72.0,0.052629,0.052629,2.157401e-09,3.096872e-09,2.141616e-10,...,4.228585e-11,2.606354e-11,5.061905e-09,3.221907e-10,2.73296e-07,2.935969e-11,1.231039e-11,2.523219e-10,8.673826e-12,1.068685e-07,1.002476e-12,3.446873e-09,3.56482e-08,7.474282e-06,2.326634e-12,4.224281e-07,2.149634e-09,1.028426e-12,1.209845e-08,a4c,train,30.0,"[316, 421, 40]",421,316
8,11985884,4a175a10e893e284_4903a44ab0957d42276740da0152,2009-12-15,93621,281,1384,EVS0152873,2009-03-09,2009-01-13,1.288732,4a175a10e893e284_4903a44ab0957d42276740da0152_...,/mnt/obi0/phi/echo/npyFiles/BWH/4a17/4a175a10e...,2009-12-15 15:26:53,4a175a10e893e284_4903a44ab0957d42276740da0152_...,BWH,Vivid E9,GE Vingmed Ultrasound,40.504386,77.0,60.0,0.056116,0.056116,8.518411e-13,3.806693e-12,2.757796e-13,...,5.743329e-15,1.793595e-12,1.954799e-07,4.257017e-14,2.049543e-11,3.705574e-13,8.787611e-16,2.144251e-12,2.41316e-12,1.271512e-12,5.752714e-15,5.174666e-13,5.579646e-08,1.370285e-12,7.350848e-15,4.116044e-10,2.251413e-10,4.6266290000000007e-17,8.79984e-12,a4c,train,24.7,"[244, 357, 40]",357,244
9,29501400,490af49766c269e3_4903a58050942a0845ef57d7afe6,2015-03-16,141356,-178,6391,EVS0507130,2015-09-10,2014-09-17,1.317734,490af49766c269e3_4903a58050942a0845ef57d7afe6_...,/mnt/obi0/phi/echo/npyFiles/BWH/490a/490af4976...,2015-03-16 13:55:22,490af49766c269e3_4903a58050942a0845ef57d7afe6_...,BWH,Vivid7,GE Vingmed Ultrasound,14.763682,135.0,84.0,0.043478,0.043478,4.989149e-15,3.193988e-15,3.968006e-18,...,1.826562e-16,4.2956640000000007e-17,5.148841e-14,9.021741000000001e-17,1.21988e-16,1.952541e-16,2.157985e-18,5.776285e-16,4.022914e-14,5.59849e-13,1.5695789999999998e-19,2.0903769999999998e-19,9.752112000000001e-17,1.456485e-16,4.236861e-19,4.005132e-16,2.17886e-21,1.597418e-20,7.019311e-16,a4c,train,67.7,"[189, 277, 40]",277,189


In [8]:
max_image_size = (dset.height.max(), dset.width.max())
print('Maximum image height {}'.format(max_image_size[0]))
print('Maximum image width  {}'.format(max_image_size[1]))
image_scale_factor = 299/np.amax(max_image_size)
print('Image scale factor {:.4f}'.format(image_scale_factor))

Maximum image height 398
Maximum image width  530
Image scale factor 0.5642
