### Select patients for CFR model: Split patients in train, val and test sets ###

In [1]:
import os
import numpy as np
import pandas as pd

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 50)
pd.set_option('display.width', 1000)

In [2]:
cfr_data_root = os.path.normpath('/mnt/obi0/andreas/data/cfr')
cfr_meta_date = '200320'
meta_dir = os.path.join(cfr_data_root, 'metadata_'+cfr_meta_date)
print(meta_dir)

/mnt/obi0/andreas/data/cfr/metadata_200320


In [3]:
match_view_filename = 'pet_match365_diff_files_'+cfr_meta_date+'.parquet'
files_cfr = pd.read_parquet(os.path.join(meta_dir, match_view_filename))

print('Total number of patients {}'.format(len(files_cfr.mrn.unique())))
print('Total number of studies  {}'.format(len(files_cfr.study.unique())))
print('Total number of echos    {}'.format(len(files_cfr.filename.unique())))

files_cfr.head()

Total number of patients 1577
Total number of studies  2292
Total number of echos    103956


Unnamed: 0,study,mrn,echo_study_date,mrnstudyid,days_post_cfr,cfr_study_date,petmrn_identifier,post-2018,myocardial_perfusion,global_cfr_calc,unaffected_cfr,rest_mbf_unaff,stress_mbf_unaff,num_unaffected_segs,rest_global_mbf,stress_global_mbf,filename,dir,datetime,fileid,institution,model,manufacturer,index,frame_time,...,a3c,a3c_laocc,a3c_lvocc_s,a4c,a4c_far,a4c_laocc,a4c_lvocc_s,a4c_rv,a4c_rv_laocc,a5c,apex,other,plax_far,plax_lac,plax_laz,plax_laz_ao,plax_plax,psax_avz,psax_az,psax_mv,psax_pap,rvinf,subcostal,suprasternal,max_view
0,48b09016b25a7f36_4903a58dd44b7b81d524809aba5f,35133560,2018-11-02,2618,191,2018-04-25,35133560_2018-04-25,0,normal,1.716143,1.699871,0.934766,1.588982,17,0.923,1.584,48b09016b25a7f36_4903a58dd44b7b81d524809aba5f_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b09016b...,2018-11-02 09:49:36,48b09016b25a7f36_4903a58dd44b7b81d524809aba5f_...,BWH,Vivid E95,GE Vingmed Ultrasound,0.0,33.5,...,2.565391e-08,7.528357e-13,4.774701e-10,2.70874e-12,3.670912e-09,1.567517e-11,1.610227e-10,1.509277e-07,2.56671e-06,1.36324e-13,2.349999e-08,7.267388e-09,2.949915e-12,9.663496e-15,0.0001294855,9.732323e-06,1.612119e-12,0.999858,4.653681e-10,5.331582e-11,3.434301e-11,4.860822e-09,5.479821e-14,2.98651e-08,psax_avz
1,48b09016b25a7f36_4903a58dd44b7b81d524809aba5f,35133560,2018-11-02,2618,191,2018-04-25,35133560_2018-04-25,0,normal,1.716143,1.699871,0.934766,1.588982,17,0.923,1.584,48b09016b25a7f36_4903a58dd44b7b81d524809aba5f_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b09016b...,2018-11-02 09:49:36,48b09016b25a7f36_4903a58dd44b7b81d524809aba5f_...,BWH,Vivid E95,GE Vingmed Ultrasound,0.0,33.371233,...,1.450411e-07,1.366147e-09,3.045583e-11,2.590847e-12,3.514123e-09,1.119247e-12,1.174291e-11,1.653268e-09,6.647998e-09,7.400897e-07,4.554047e-08,2.202479e-12,1.033245e-11,1.524424e-10,1.03291e-10,1.392449e-09,8.828201e-10,5.749271e-07,0.9999073,7.699048e-07,6.217265e-08,8.135313e-09,1.209222e-09,9.03969e-05,psax_az
2,48b09016b25a7f36_4903a58dd44b7b81d524809aba5f,35133560,2018-11-02,2618,191,2018-04-25,35133560_2018-04-25,0,normal,1.716143,1.699871,0.934766,1.588982,17,0.923,1.584,48b09016b25a7f36_4903a58dd44b7b81d524809aba5f_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b09016b...,2018-11-02 09:49:36,48b09016b25a7f36_4903a58dd44b7b81d524809aba5f_...,BWH,Vivid E95,GE Vingmed Ultrasound,0.0,33.533333,...,2.660267e-07,3.19628e-10,2.934359e-09,3.283927e-07,3.254719e-09,1.172012e-07,4.847203e-10,3.966162e-09,1.291069e-09,1.924162e-08,6.683372e-07,0.9999976,1.350526e-09,4.16507e-10,1.1771e-10,1.866018e-09,3.144208e-09,3.432988e-10,9.421681e-10,8.57389e-07,4.335133e-08,1.434178e-10,4.363453e-12,2.205031e-09,other
3,48b09016b25a7f36_4903a58dd44b7b81d524809aba5f,35133560,2018-11-02,2618,191,2018-04-25,35133560_2018-04-25,0,normal,1.716143,1.699871,0.934766,1.588982,17,0.923,1.584,48b09016b25a7f36_4903a58dd44b7b81d524809aba5f_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b09016b...,2018-11-02 09:49:36,48b09016b25a7f36_4903a58dd44b7b81d524809aba5f_...,BWH,Vivid E95,GE Vingmed Ultrasound,0.0,33.70892,...,1.941936e-14,2.563091e-18,2.31556e-14,1.126485e-08,5.550321e-15,1.119701e-19,4.69512e-16,9.429649e-14,1.808776e-17,1.098487e-13,1.432877e-15,2.962554e-15,2.005398e-15,3.412526e-12,1.698335e-14,8.875975e-14,8.122525e-18,1.691228e-16,5.1492060000000005e-17,7.832482e-14,1.353168e-15,1.3699309999999998e-19,3.447312e-22,2.979074e-17,a2c
4,48b09016b25a7f36_4903a58dd44b7b81d524809aba5f,35133560,2018-11-02,2618,191,2018-04-25,35133560_2018-04-25,0,normal,1.716143,1.699871,0.934766,1.588982,17,0.923,1.584,48b09016b25a7f36_4903a58dd44b7b81d524809aba5f_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b09016b...,2018-11-02 09:49:36,48b09016b25a7f36_4903a58dd44b7b81d524809aba5f_...,BWH,Vivid E95,GE Vingmed Ultrasound,0.0,33.403756,...,4.836035e-07,0.9999956,4.835767e-11,1.211003e-13,1.254464e-09,3.868239e-12,6.760433e-14,1.656497e-10,1.586072e-09,2.450811e-11,3.36056e-12,2.185993e-14,4.238282e-11,6.147999e-12,9.961027e-12,3.035175e-11,3.153882e-10,3.031236e-14,4.888586e-13,3.554045e-12,1.007157e-11,7.237515e-15,2.361346e-18,9.15012e-12,a3c_laocc


### Split the patients in train, validate and test sets ###
Although each view might have a little different patient pupulation distribution, because not all views are in each study. However, we want the same MRNS in each data set and for all views so that we can directly compare the performance of the algorithm for the same patients. We can expand the data frame above to add the splits.

In [4]:
def patientsplit(patient_list):

    train_test_split = 0.87
    train_eval_split = 0.90

    # Take a test set from all patients
    patient_list_train = np.random.choice(patient_list,
                                          size = int(np.floor(train_test_split*len(patient_list))),
                                          replace = False)
    patient_list_test = list(set(patient_list).difference(patient_list_train))
    train_test_intersection = set(patient_list_train).intersection(set(patient_list_test)) # This should be empty
    print('Intersection of patient_list_train and patient_list_test:', train_test_intersection)

    # Further separate some patients for evaluation from the training list
    patient_list_eval = np.random.choice(patient_list_train,
                                         size = int(np.ceil((1-train_eval_split)*len(patient_list_train))),
                                         replace = False)

    patient_list_train = set(patient_list_train).difference(patient_list_eval)
    train_eval_intersection = set(patient_list_train).intersection(set(patient_list_eval))
    print('Intersection of patient_list_train and patient_list_eval:', train_eval_intersection)

    # Show the numbers
    print('total patients:', len(patient_list))
    print()
    print('patients in set:', np.sum([len(patient_list_train),
                                     len(patient_list_eval),
                                     len(patient_list_test)]))
    print()
    print('patients in train:', len(patient_list_train))
    print('patients in eval:', len(patient_list_eval))
    print('patients in test:', len(patient_list_test))

    return patient_list_train, patient_list_eval, patient_list_test

In [5]:
# Get a patient list
patient_list = list(files_cfr.sample(frac=1).mrn.unique())
patient_list_train, patient_list_eval, patient_list_test = patientsplit(patient_list)

patient_split = {'train': patient_list_train,
                 'eval': patient_list_eval,
                 'test': patient_list_test}

print('Patient IDs in train:', len(patient_split['train']))
print('Patient IDs in eval:', len(patient_split['eval']))
print('Patient IDs in test:', len(patient_split['test']))

print()

print('Intersection train-test:', set(patient_split['train']).intersection(set(patient_split['test'])))
print('Intersection train-eval:', set(patient_split['train']).intersection(set(patient_split['eval'])))
print('Intersection eval-test:', set(patient_split['eval']).intersection(set(patient_split['test'])))

Intersection of patient_list_train and patient_list_test: set()
Intersection of patient_list_train and patient_list_eval: set()
total patients: 1577

patients in set: 1577

patients in train: 1233
patients in eval: 138
patients in test: 206
Patient IDs in train: 1233
Patient IDs in eval: 138
Patient IDs in test: 206

Intersection train-test: set()
Intersection train-eval: set()
Intersection eval-test: set()


In [6]:
# Add mode column to files_cfr
split_list = []
for dset in patient_split.keys():
    dset_mrn_list = list(patient_split[dset])
    split_list.append(pd.DataFrame({'mrn': dset_mrn_list,
                                    'mode': [dset]*len(dset_mrn_list)}))

split_df = pd.concat(split_list, ignore_index = True)

files_cfr_dset = files_cfr.merge(right = split_df, on = 'mrn', how = 'left').\
                    sample(frac = 1).\
                    reset_index(drop = True)

In [12]:
files_cfr_dset.head(5)

Unnamed: 0,study,mrn,echo_study_date,mrnstudyid,days_post_cfr,cfr_study_date,petmrn_identifier,post-2018,myocardial_perfusion,global_cfr_calc,unaffected_cfr,rest_mbf_unaff,stress_mbf_unaff,num_unaffected_segs,rest_global_mbf,stress_global_mbf,filename,dir,datetime,fileid,institution,model,manufacturer,index,frame_time,...,a3c_laocc,a3c_lvocc_s,a4c,a4c_far,a4c_laocc,a4c_lvocc_s,a4c_rv,a4c_rv_laocc,a5c,apex,other,plax_far,plax_lac,plax_laz,plax_laz_ao,plax_plax,psax_avz,psax_az,psax_mv,psax_pap,rvinf,subcostal,suprasternal,max_view,mode
0,4907bd6a793c5dfb_4903a44ab0957c74bdc28dddda6a,24425639,2009-12-01,2176,0,2009-12-01,24425639_2009-12-01,0,abnormal,0.867978,0.9496,0.625,0.5935,2,0.712,0.618,4907bd6a793c5dfb_4903a44ab0957c74bdc28dddda6a_...,/mnt/obi0/phi/echo/npyFiles/BWH/4907/4907bd6a7...,2009-12-01 09:24:20,4907bd6a793c5dfb_4903a44ab0957c74bdc28dddda6a_...,BWH,SEQUOIA,ACUSON,0.0,50.0,...,5.414357e-06,1.473872e-10,1.898307e-07,8.002351e-08,2.771213e-07,3.452406e-08,6.587521e-10,1.146736e-08,3.621772e-11,1.756975e-08,0.9997528,9.051162e-11,9.608114e-12,1.609156e-05,4.862192e-09,8.306589e-11,1.033188e-07,1.904285e-11,1.966672e-08,3.865196e-13,2.80494e-08,0.0002246652,6.678317e-14,other,train
1,4a1fef8550ab5fac_4903a58793b1d4ea10e563d3c312,19925890,2012-06-01,1690,-6,2012-06-07,19925890_2012-06-07,0,probably normal,1.975533,1.952924,1.139364,2.225091,11,1.267,2.503,4a1fef8550ab5fac_4903a58793b1d4ea10e563d3c312_...,/mnt/obi0/phi/echo/npyFiles/BWH/4a1f/4a1fef855...,2012-06-01 08:57:02,4a1fef8550ab5fac_4903a58793b1d4ea10e563d3c312_...,BWH,iE33,Philips Medical Systems,0.0,33.333,...,2.66575e-06,1.692631e-06,0.003556277,1.184485e-07,4.992927e-06,0.0005516922,0.06170321,1.3431e-05,0.002961617,4.383549e-07,2.368193e-07,5.644827e-07,0.0001115734,1.650258e-05,4.697899e-07,0.0001005547,4.296219e-05,0.9225407,0.007673036,0.000590088,9.560555e-05,3.424057e-06,1.924276e-05,psax_az,train
2,4906a55b3fa77797_4903a582ec73c1cabe94c3bc18fd,25447152,2017-02-10,2335,-221,2017-09-19,25447152_2017-09-19,0,normal,1.785173,1.783524,1.193265,2.128217,17,1.187,2.119,4906a55b3fa77797_4903a582ec73c1cabe94c3bc18fd_...,/mnt/obi0/phi/echo/npyFiles/BWH/4906/4906a55b3...,2017-02-10 08:55:57,4906a55b3fa77797_4903a582ec73c1cabe94c3bc18fd_...,BWH,iE33,Philips Medical Systems,0.0,33.333,...,3.064899e-14,2.958344e-14,2.712769e-16,1.458904e-12,1.754679e-16,1.942319e-16,2.052172e-15,3.228106e-12,2.579245e-14,4.323973e-14,2.39512e-20,3.361422e-15,1.865457e-12,1.135847e-18,2.768832e-14,5.079228e-13,1.396718e-15,1.796831e-09,2.614285e-08,1.738536e-15,1.404608e-11,2.602839e-10,1.0,suprasternal,train
3,4b735f4c9d24823e_4903a582ec793e5ee8274c880255,8692840,2017-08-29,761,-7,2017-09-05,8692840_2017-09-05,0,abnormal,1.479348,1.516408,1.028884,1.560209,3,0.92,1.361,4b735f4c9d24823e_4903a582ec793e5ee8274c880255_...,/mnt/obi0/phi/echo/npyFiles/BWH/4b73/4b735f4c9...,2017-08-29 13:57:30,4b735f4c9d24823e_4903a582ec793e5ee8274c880255_...,BWH,iE33,Philips Medical Systems,0.0,33.333,...,3.437058e-07,2.204923e-07,1.561161e-06,1.266473e-06,2.231622e-10,1.863277e-07,2.431857e-09,3.974089e-09,0.0006134118,2.213535e-08,0.001242059,4.340756e-05,4.089084e-05,1.394139e-06,3.244499e-08,2.802697e-10,4.473797e-06,0.9955176,9.934554e-09,0.002520887,2.260318e-07,3.567147e-07,9.462863e-07,psax_az,test
4,4907bff5d7e77ff9_4903a584a09f1f388fcd23f1fbcc,24606139,2011-10-03,2208,104,2011-06-21,24606139_2011-06-21,0,abnormal,2.259209,2.292405,0.702222,1.609778,9,0.733,1.656,4907bff5d7e77ff9_4903a584a09f1f388fcd23f1fbcc_...,/mnt/obi0/phi/echo/npyFiles/BWH/4907/4907bff5d...,2011-10-03 14:13:48,4907bff5d7e77ff9_4903a584a09f1f388fcd23f1fbcc_...,BWH,iE33,Philips Medical Systems,0.0,58.235,...,1.927104e-13,3.440073e-15,2.156906e-11,4.248504e-16,4.654161e-15,8.76494e-14,4.7149860000000006e-17,6.605088e-15,6.175958e-14,2.038013e-15,1.0,9.887183e-14,2.673864e-14,5.768422e-13,3.815655e-13,1.201616e-12,1.15451e-16,1.448722e-15,1.409059e-15,2.092804e-11,6.073014e-13,4.959488e-16,4.718697e-15,other,eval


In [13]:
files_cfr_dset.columns

Index(['study', 'mrn', 'echo_study_date', 'mrnstudyid', 'days_post_cfr', 'cfr_study_date', 'petmrn_identifier', 'post-2018', 'myocardial_perfusion', 'global_cfr_calc', 'unaffected_cfr', 'rest_mbf_unaff', 'stress_mbf_unaff', 'num_unaffected_segs', 'rest_global_mbf', 'stress_global_mbf', 'filename', 'dir', 'datetime', 'fileid', 'institution', 'model', 'manufacturer', 'index', 'frame_time', 'number_of_frames', 'heart_rate', 'deltaX', 'deltaY', 'a2c', 'a2c_laocc', 'a2c_lvocc_s', 'a3c', 'a3c_laocc', 'a3c_lvocc_s', 'a4c', 'a4c_far', 'a4c_laocc', 'a4c_lvocc_s', 'a4c_rv', 'a4c_rv_laocc', 'a5c', 'apex', 'other', 'plax_far', 'plax_lac', 'plax_laz', 'plax_laz_ao', 'plax_plax', 'psax_avz', 'psax_az', 'psax_mv', 'psax_pap', 'rvinf', 'subcostal', 'suprasternal', 'max_view', 'mode'], dtype='object')

In [14]:
# Prepare the final set that we will use for writing TFR files. We don't want any rows with NAs in some columns.
no_na_columns = ['study', 'mrn', 'echo_study_date', 'mrnstudyid', 
                 'unaffected_cfr', 'stress_mbf_unaff', 'rest_mbf_unaff']
files_tfr = files_cfr_dset.dropna(subset=no_na_columns)

print('Dropped {} rows.'.format(files_cfr_dset.shape[0]-files_tfr.shape[0]))
# Add some other information that we need and shuffle the whole thing
files_tfr = files_tfr.assign(rate = np.round(1/files_tfr.frame_time*1e3, decimals = 1))
files_tfr = files_tfr.sample(frac = 1)

Dropped 0 rows.


In [15]:
dset_filename = 'tfr_files_dset_BWH_'+cfr_meta_date+'.parquet'
files_tfr.to_parquet(os.path.join(meta_dir, dset_filename))
print(files_tfr.shape)
files_tfr.head()

(103956, 59)


Unnamed: 0,study,mrn,echo_study_date,mrnstudyid,days_post_cfr,cfr_study_date,petmrn_identifier,post-2018,myocardial_perfusion,global_cfr_calc,unaffected_cfr,rest_mbf_unaff,stress_mbf_unaff,num_unaffected_segs,rest_global_mbf,stress_global_mbf,filename,dir,datetime,fileid,institution,model,manufacturer,index,frame_time,...,a3c_lvocc_s,a4c,a4c_far,a4c_laocc,a4c_lvocc_s,a4c_rv,a4c_rv_laocc,a5c,apex,other,plax_far,plax_lac,plax_laz,plax_laz_ao,plax_plax,psax_avz,psax_az,psax_mv,psax_pap,rvinf,subcostal,suprasternal,max_view,mode,rate
85982,4a1041a96a6027ca_4903a444c23a2b4bd8607d7ff1b0,16552929,2007-10-11,1384,0,2007-10-11,16552929_2007-10-11,0,abnormal,1.210859,1.260635,0.8886,1.1202,5,0.792,0.959,4a1041a96a6027ca_4903a444c23a2b4bd8607d7ff1b0_...,/mnt/obi0/phi/echo/npyFiles/BWH/4a10/4a1041a96...,2007-10-11 11:01:03,4a1041a96a6027ca_4903a444c23a2b4bd8607d7ff1b0_...,BWH,Vivid7,GE Vingmed Ultrasound,0.0,33.816424,...,4.238678e-15,4.212624e-14,7.953578e-14,2.595388e-13,8.58405e-15,2.497634e-15,1.822248e-14,1.47141e-14,1.322987e-14,0.9999996,1.750281e-14,1.98628e-12,3.047733e-16,1.795013e-11,3.883832e-12,3.750108e-15,2.713339e-15,3.174328e-11,8.481864e-10,3.131155e-17,2.4378850000000002e-17,3.73075e-07,other,train,29.6
10203,4900490cbeca1305_4903a44ab12fcfca7ba9692a3793,23748668,2009-04-13,2108,-2,2009-04-15,23748668_2009-04-15,0,abnormal,0.795437,0.820887,1.781,1.462,1,1.271,1.011,4900490cbeca1305_4903a44ab12fcfca7ba9692a3793_...,/mnt/obi0/phi/echo/npyFiles/BWH/4900/4900490cb...,2009-04-13 11:41:22,4900490cbeca1305_4903a44ab12fcfca7ba9692a3793_...,BWH,iE33,Philips Medical Systems,0.0,33.333,...,0.001962777,1.365048e-07,5.152581e-05,0.6387029,8.481436e-06,7.072853e-06,0.0001808832,4.897693e-05,0.05059526,1.209158e-09,0.002534719,0.0002194041,1.266521e-07,1.356073e-07,4.683977e-05,1.239621e-06,0.1331697,0.1366137,0.001824745,0.03310743,3.279151e-06,0.0001353323,a4c_laocc,train,30.0
18488,4903a6794462007f_4903a585855e2e2fd9de59986b19,20223210,2010-10-05,1724,0,2010-10-05,20223210_2010-10-05,0,normal,2.026385,1.992942,0.783471,1.561412,17,0.758,1.536,4903a6794462007f_4903a585855e2e2fd9de59986b19_...,/mnt/obi0/phi/echo/npyFiles/BWH/4903/4903a6794...,2010-10-05 12:59:16,4903a6794462007f_4903a585855e2e2fd9de59986b19_...,BWH,Vivid7,GE Vingmed Ultrasound,0.0,40.062891,...,3.219439e-08,7.092292e-10,1.669775e-10,1.142723e-08,1.304425e-08,0.0003148783,6.199999e-08,2.276956e-07,2.722717e-10,0.0005536863,2.989144e-10,0.0001082421,7.560487e-09,2.151631e-08,3.502969e-08,0.0001423083,1.819318e-07,2.980794e-09,0.4379345,9.425228e-07,0.5609441,4.704922e-08,subcostal,train,25.0
7715,48b79e4032a19858_4903a58c1bc9938dc984abcc765e,32227324,2019-06-19,2577,40,2019-05-10,32227324_2019-05-10,1,normal,2.16,2.16,0.58,1.25,17,0.58,1.25,48b79e4032a19858_4903a58c1bc9938dc984abcc765e_...,/mnt/obi0/phi/echo/npyFiles/BWH/48b7/48b79e403...,2019-06-19 14:13:37,48b79e4032a19858_4903a58c1bc9938dc984abcc765e_...,BWH,Vivid E95,GE Vingmed Ultrasound,0.0,20.08658,...,6.115003e-15,5.918337e-18,5.928353e-16,4.2342599999999997e-19,1.201615e-14,4.14546e-12,1.9958870000000002e-17,4.459599e-17,2.140246e-16,2.25824e-17,2.208828e-10,3.38804e-10,5.190876e-18,5.778398000000001e-17,1.0,1.883388e-14,1.2906820000000001e-18,1.465339e-17,1.613272e-14,5.832214e-16,7.528517e-18,7.440888999999999e-19,plax_plax,test,49.8
52907,49028d199c52e980_4903a444c23bd0a6594aad09cc1c,21782644,2007-11-08,1855,219,2007-04-03,21782644_2007-04-03,0,abnormal,1.079336,1.700328,0.406,0.690333,3,0.542,0.585,49028d199c52e980_4903a444c23bd0a6594aad09cc1c_...,/mnt/obi0/phi/echo/npyFiles/BWH/4902/49028d199...,2007-11-08 13:10:47,49028d199c52e980_4903a444c23bd0a6594aad09cc1c_...,BWH,iE33,Philips Medical Systems,0.0,56.3,...,1.88078e-12,9.137463e-12,2.779099e-12,7.004369e-13,3.549967e-13,4.201546e-13,1.276405e-10,1.152038e-08,1.102726e-12,1.0,9.329537e-14,3.88205e-11,6.552194e-13,7.972331e-12,8.098727e-12,5.289369e-15,1.142572e-08,3.891063e-08,2.799873e-10,4.661629e-13,1.777731e-12,7.362033e-11,other,train,17.8


In [16]:
minrate = 20
minframes = 40
minduration = minframes/minrate
print('Minmum rate:      {}'.format(minrate))
print('Minimum frames:   {}'.format(minframes))
print('Minimum duration: {}'.format(minduration))
#minduration = 1.5
#minframes = int(np.ceil(minrate*minduration))

maxrows = files_tfr.shape[0]
rows_disqualified = files_tfr[(files_tfr.rate < minrate) | (files_tfr.number_of_frames<minframes)].shape[0]
rows_qualified = files_tfr[(files_tfr.rate >= minrate) & (files_tfr.number_of_frames>=minframes)].shape[0]

print('Disqualified Rows {}, fraction:{:.1f}'.format(rows_disqualified, 
                                                     np.round(rows_disqualified/maxrows*100), decimals=1))

print('Qualified Rows {}, fraction:{:.1f}'.format(rows_qualified, 
                                                     np.round(rows_qualified/maxrows*100), decimals=1))

Minmum rate:      20
Minimum frames:   40
Minimum duration: 2.0
Disqualified Rows 28337, fraction:27.0
Qualified Rows 75618, fraction:73.0
