In [1]:
import os
import pandas as pd
import pickle
import glob
from scipy.stats import spearmanr

# Custom imports
from werdich_cfr.models.Modeltrainer_Inc2 import VideoTrainer
from werdich_cfr.tfutils.tfutils import use_gpu_devices

In [2]:
# Directories and files
cfr_dir = os.path.normpath('/mnt/obi0/andreas/data/cfr')
pred_dir = os.path.join(cfr_dir, 'predictions')
best_models_filename = 'cfr_correlations_bestmodels_30FPS.parquet'
meta_date = '200617'
meta_dir = os.path.join(cfr_dir, 'metadata_'+meta_date)
view = 'a4c'
fc_nodes = 1
dset_list = ['cfr', 'mbf_ammonia', 'mbf_rubidium']
tracer_list = ['ammonia', 'rubidium']

physical_devices, device_list = use_gpu_devices(gpu_device_string='0,1,2,3')

AVAILABLE GPUs:
PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')
PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')
PhysicalDevice(name='/physical_device:GPU:2', device_type='GPU')
PhysicalDevice(name='/physical_device:GPU:3', device_type='GPU')
TRAIN DEVICE LIST:
/GPU:0
/GPU:1
/GPU:2
/GPU:3


In [3]:
def get_file_list(tfr_data_dir, meta_date, dset, view, mode):
    file_pattern = os.path.join(tfr_data_dir, dset+'_'+view+'_'+mode+'_'+meta_date+'_*.tfrecords')
    file_list = sorted(glob.glob(file_pattern))
    print(mode)
    print(*file_list, sep='\n')
    return file_list

In [4]:
best_models = pd.read_parquet(os.path.join(pred_dir, best_models_filename))
best_models = best_models.drop(['view', 'gpu'], axis=1)
print(best_models.iloc[0].checkpoint_file)
print()
model_name_list = sorted(list(best_models.model_name.unique()))
print(*model_name_list, sep = '\n')
best_models

/mnt/obi0/andreas/data/cfr/log/mbf_rubidium_a4c_dgx-1_fc1_stress_global_mbf/mbf_rubidium_a4c_dgx-1_fc1_stress_global_mbf_chkpt_300.h5

cfr_a4c_dgx-1_fc1_global_cfr_calc
cfr_a4c_dgx-1_fc1_rest_global_mbf
cfr_a4c_dgx-1_fc1_stress_global_mbf
mbf_ammonia_a4c_dgx-1_fc1_rest_global_mbf
mbf_ammonia_a4c_dgx-1_fc1_stress_global_mbf
mbf_rubidium_a4c_dgx-1_fc1_rest_global_mbf
mbf_rubidium_a4c_dgx-1_fc1_stress_global_mbf


Unnamed: 0,model_name,model_output,epoch,checkpoint_file,spear_cor,spear_p,pear_cor,pear_p,n_samples,dset
6,mbf_rubidium_a4c_dgx-1_fc1_stress_global_mbf,stress_global_mbf,300,/mnt/obi0/andreas/data/cfr/log/mbf_rubidium_a4...,0.589223,2.572355e-43,0.518113,3.305941e-32,449,mbf_rubidium
5,mbf_rubidium_a4c_dgx-1_fc1_rest_global_mbf,rest_global_mbf,300,/mnt/obi0/andreas/data/cfr/log/mbf_rubidium_a4...,0.498421,1.432274e-29,0.490814,1.3478420000000001e-28,449,mbf_rubidium
4,mbf_ammonia_a4c_dgx-1_fc1_stress_global_mbf,stress_global_mbf,100,/mnt/obi0/andreas/data/cfr/log/mbf_ammonia_a4c...,0.417114,1.689962e-28,0.448963,2.8945170000000003e-33,644,mbf_ammonia
3,mbf_ammonia_a4c_dgx-1_fc1_rest_global_mbf,rest_global_mbf,200,/mnt/obi0/andreas/data/cfr/log/mbf_ammonia_a4c...,0.347719,9.703219e-20,0.350881,4.2895299999999996e-20,644,mbf_ammonia
2,cfr_a4c_dgx-1_fc1_stress_global_mbf,stress_global_mbf,100,/mnt/obi0/andreas/data/cfr/log/cfr_a4c_dgx-1_f...,0.547178,1.5549299999999999e-86,0.539896,7.314193e-84,1095,cfr
1,cfr_a4c_dgx-1_fc1_rest_global_mbf,rest_global_mbf,300,/mnt/obi0/andreas/data/cfr/log/cfr_a4c_dgx-1_f...,0.42834,4.368486999999999e-50,0.472019,7.6326390000000004e-62,1095,cfr
0,cfr_a4c_dgx-1_fc1_global_cfr_calc,global_cfr_calc,150,/mnt/obi0/andreas/data/cfr/log/cfr_a4c_dgx-1_f...,0.291996,5.776872e-23,0.319546,2.047023e-27,1095,cfr


In [5]:
model_name = model_name_list[6]

log_dir = os.path.join(cfr_dir, 'log', model_name)
checkpoint_file = best_models[best_models.model_name==model_name].checkpoint_file.values[0]
dset = best_models[best_models.model_name==model_name].dset.values[0]
tfr_data_dir = os.path.join(cfr_dir, 'tfr_' + meta_date, dset)

model_dict_file = os.path.join(log_dir, model_name+'_model_dict.pkl')
with open(model_dict_file, mode='rb') as fl:
    model_dict = pickle.load(fl)

features_dict_file = os.path.join(tfr_data_dir, 'global_pet_echo_dataset_'+meta_date+'.pkl')
with open(features_dict_file, mode='rb') as fl:
    feature_dict = pickle.load(fl)

print(f'Model name: {model_name}')
print(f'dset      : {dset}')
print(f'Checkpoint: {os.path.basename(checkpoint_file)}')

Model name: mbf_rubidium_a4c_dgx-1_fc1_stress_global_mbf
dset      : mbf_rubidium
Checkpoint: mbf_rubidium_a4c_dgx-1_fc1_stress_global_mbf_chkpt_300.h5


In [6]:
# Test data
test_file_list = get_file_list(tfr_data_dir=tfr_data_dir, 
                               meta_date=meta_date, 
                               dset=dset, 
                               view=view, 
                               mode='test')

parquet_file_list = [file.replace('.tfrecords', '.parquet') for file in test_file_list]
df_test = pd.concat([pd.read_parquet(file) for file in parquet_file_list]).reset_index(drop=True)

test
/mnt/obi0/andreas/data/cfr/tfr_200617/mbf_rubidium/mbf_rubidium_a4c_test_200617_0.tfrecords
/mnt/obi0/andreas/data/cfr/tfr_200617/mbf_rubidium/mbf_rubidium_a4c_test_200617_1.tfrecords
/mnt/obi0/andreas/data/cfr/tfr_200617/mbf_rubidium/mbf_rubidium_a4c_test_200617_2.tfrecords
/mnt/obi0/andreas/data/cfr/tfr_200617/mbf_rubidium/mbf_rubidium_a4c_test_200617_3.tfrecords
/mnt/obi0/andreas/data/cfr/tfr_200617/mbf_rubidium/mbf_rubidium_a4c_test_200617_4.tfrecords
/mnt/obi0/andreas/data/cfr/tfr_200617/mbf_rubidium/mbf_rubidium_a4c_test_200617_5.tfrecords
/mnt/obi0/andreas/data/cfr/tfr_200617/mbf_rubidium/mbf_rubidium_a4c_test_200617_6.tfrecords
/mnt/obi0/andreas/data/cfr/tfr_200617/mbf_rubidium/mbf_rubidium_a4c_test_200617_7.tfrecords


In [7]:
# Run predictions from the checkpoint file
VT = VideoTrainer(log_dir=log_dir, 
                  model_dict=model_dict, 
                  train_dict=None, 
                  feature_dict=feature_dict)

pred = VT.predict_on_test(test_tfr_file_list=test_file_list,
                          checkpoint_file=checkpoint_file,
                          batch_size=8)

Extracting true labels from testset.
Samples: 449, steps: 57


In [8]:
pred.head()

Unnamed: 0,stress_global_mbf,mbf_rubidium_a4c_dgx-1_fc1_stress_global_mbf_chkpt_300
0,2.195,1.607079
1,2.864,1.644604
2,1.483,1.660823
3,1.792,1.642384
4,0.826,1.38721


In [9]:
response_col = pred.columns[0]
predict_col = pred.columns[1]

pred1 = pred.rename(columns={response_col: response_col+'_tfr'})
df_test_pred = pd.concat([df_test, pred1], axis=1).reset_index(drop=True)

df_test_pred.sample(10)[[response_col, response_col+'_tfr', predict_col]]

Unnamed: 0,stress_global_mbf,stress_global_mbf_tfr,mbf_rubidium_a4c_dgx-1_fc1_stress_global_mbf_chkpt_300
253,1.098,1.098,0.992944
385,2.43,2.43,1.786815
111,1.504,1.504,1.810531
73,1.77,1.77,1.10183
135,2.77,2.77,2.280268
302,1.575,1.575,2.370618
235,1.925,1.925,1.633217
378,1.657,1.657,1.1417
165,2.315,2.315,1.795912
272,1.931,1.931,1.839215


In [10]:
# Calculate correlation
s = spearmanr(df_test_pred[response_col], df_test_pred[predict_col])
print(f'Correlation :        {s[0]}')
print(f'Correlation from df: {best_models[best_models.model_name==model_name].spear_cor.values[0]}')

# Save the test set
test_dir = os.path.join(cfr_dir, 'predictions_echodata', 'testset')
test_file = os.path.join(test_dir, model_name+'_test.parquet')
print(test_file)
df_test_pred.to_parquet(test_file)

Correlation :        0.5892193665892739
Correlation from df: 0.5892225017933277
/mnt/obi0/andreas/data/cfr/predictions_echodata/testset/mbf_rubidium_a4c_dgx-1_fc1_stress_global_mbf_test.parquet


In [11]:
df_test_pred.head()

Unnamed: 0,rest_global_mbf,stress_global_mbf,global_cfr_calc,post-2018,tracer_obi,mrn,study,pet_date,echo_date,petmrn_identifier,days_post_pet,pet_measurement,difference(days),filename,dir,datetime,file_base,identifier,frame_time,number_of_frames,heart_rate,deltaX,deltaY,a2c,a2c_laocc,a2c_lvocc_s,a3c,a3c_laocc,a3c_lvocc_s,a4c,a4c_far,a4c_laocc,a4c_lvocc_s,a4c_rv,a4c_rv_laocc,a5c,apex,other,plax_far,plax_lac,plax_laz,plax_laz_ao,plax_plax,psax_avz,psax_az,psax_mv,psax_pap,rvinf,subcostal,suprasternal,year_month,study_full_time,institution,model,manufacturer,max_view,sum_views,dset_mode,rate,dur,im_array_shape,stress_global_mbf_tfr,mbf_rubidium_a4c_dgx-1_fc1_stress_global_mbf_chkpt_300
0,1.342,2.195,1.635618,0,rubidium,21331939,490289f86cc1378f_4903a585842b97304764982f250d,2010-05-13,2010-05-13,21331939_2010-05-13,0,1.0,0.0,490289f86cc1378f_4903a585842b97304764982f250d_...,/mnt/obi0/phi/echo/npyFiles/BWH/4902/490289f86...,2010-05-13 10:36:43,490289f86cc1378f_4903a585842b97304764982f250d_...,490289f86cc1378f_4903a585842b97304764982f250d_...,33.333334,60.0,60.0,0.038363,0.038363,1.559618e-09,6.400482e-08,1.876453e-08,7.622704e-06,6.137491e-05,8.983325e-08,0.910787,1.93407e-08,1.561255e-06,2.48581e-05,0.005255863,1.610388e-06,0.002492928,0.003189819,1.091652e-05,2.773943e-06,2.310039e-06,2.444413e-06,5.966709e-09,8.343986e-07,0.00588777,2.736725e-08,2.366156e-06,0.07226657,2.419601e-08,3.65246e-08,1.001161e-06,2010.0,20100513103643,BWH,Vivid7,GE Vingmed Ultrasound,a4c,1.0,test,30.0,2.0,"[125, 183, 40]",2.195,1.607079
1,1.966,2.864,1.456765,0,rubidium,8253478,4b735bf068fe5ddc_4903a44517b6077dd755b2b320cc,2006-12-12,2006-11-21,8253478_2006-12-12,-21,1.0,21.0,4b735bf068fe5ddc_4903a44517b6077dd755b2b320cc_...,/mnt/obi0/phi/echo/npyFiles/BWH/4b73/4b735bf06...,2006-11-21 15:31:30,4b735bf068fe5ddc_4903a44517b6077dd755b2b320cc_...,4b735bf068fe5ddc_4903a44517b6077dd755b2b320cc_...,33.0,90.0,82.0,0.071429,0.057143,2.664972e-09,3.012356e-05,2.764545e-08,1.479852e-06,2.175705e-09,5.661547e-09,0.999764,2.4237e-09,0.000199868,9.407246e-07,7.043044e-10,3.428551e-08,3.843846e-08,2.947771e-07,8.182737e-10,2.181522e-09,1.210226e-08,8.902253e-11,1.216287e-11,8.415716e-10,2.51234e-07,2.028115e-09,7.922345e-07,3.37412e-07,3.682174e-07,3.351339e-12,1.860874e-06,2006.0,20061121153130,BWH,,Philips_Medical_Systems,a4c,1.0,test,30.3,2.97,"[184, 257, 40]",2.864,1.644604
2,0.665,1.483,2.230075,0,rubidium,4321857,4b7f0ffa098a4545_4903a44b32e462237f65164f577f,2008-08-18,2008-08-18,4321857_2008-08-18,0,1.0,0.0,4b7f0ffa098a4545_4903a44b32e462237f65164f577f_...,/mnt/obi0/phi/echo/npyFiles/BWH/4b7f/4b7f0ffa0...,2008-08-18 15:27:15,4b7f0ffa098a4545_4903a44b32e462237f65164f577f_...,4b7f0ffa098a4545_4903a44b32e462237f65164f577f_...,19.250587,146.0,66.0,0.043165,0.043165,0.0002859573,1.393786e-07,2.930263e-07,0.0001369609,4.227226e-06,5.074497e-07,0.998467,4.188292e-07,1.163288e-05,8.392039e-06,2.192693e-05,1.053669e-08,0.0009802699,3.656742e-05,3.149152e-06,8.755391e-08,1.338205e-07,3.707338e-07,5.850531e-08,5.185903e-08,1.079682e-06,6.401889e-09,1.116743e-05,3.063248e-08,2.866044e-05,2.482404e-11,2.818732e-07,2008.0,20080818152715,BWH,Vivid i,GEMS Ultrasound,a4c,1.0,test,51.9,2.810586,"[137, 206, 40]",1.483,1.660823
3,1.909,1.792,0.938711,0,rubidium,6649008,4b7d72b675fda876_4903a44ab12a96407f77af05c0ab,2009-01-14,2009-01-04,6649008_2009-01-14,-10,1.0,10.0,4b7d72b675fda876_4903a44ab12a96407f77af05c0ab_...,/mnt/obi0/phi/echo/npyFiles/BWH/4b7d/4b7d72b67...,2009-01-04 10:25:17,4b7d72b675fda876_4903a44ab12a96407f77af05c0ab_...,4b7d72b675fda876_4903a44ab12a96407f77af05c0ab_...,33.333,91.0,93.0,0.035325,0.035325,2.129095e-06,1.217314e-07,4.230819e-11,3.271909e-09,4.014568e-08,4.848537e-12,0.999992,2.512625e-09,2.123504e-12,1.822786e-11,8.573318e-10,4.317697e-11,1.163014e-09,1.606825e-07,2.271981e-06,2.43974e-10,2.988325e-10,4.036726e-09,1.673901e-11,1.756412e-09,4.190206e-08,5.676226e-09,3.191272e-13,1.646743e-08,3.57563e-06,1.699927e-08,2.808266e-11,2009.0,20090104102517,BWH,iE33,Philips Medical Systems,a4c,1.0,test,30.0,3.033303,"[159, 212, 40]",1.792,1.642384
4,0.733,0.826,1.126876,0,rubidium,23244098,49004c5f71c67723_4903a585842c23b4aae71d17d5da,2009-03-31,2010-02-09,23244098_2009-03-31,315,1.0,315.0,49004c5f71c67723_4903a585842c23b4aae71d17d5da_...,/mnt/obi0/phi/echo/npyFiles/BWH/4900/49004c5f7...,2010-02-09 20:09:28,49004c5f71c67723_4903a585842c23b4aae71d17d5da_...,49004c5f71c67723_4903a585842c23b4aae71d17d5da_...,33.333,44.0,52.0,0.039451,0.039451,7.114963e-11,1.101786e-09,6.846071e-12,4.912556e-12,5.758315e-14,2.792355e-12,1.0,1.288405e-10,8.373915e-12,1.737186e-12,3.022311e-11,5.659432e-12,1.793367e-07,7.024913e-11,1.477121e-10,8.220571e-15,1.145449e-12,1.164976e-11,7.55107e-13,1.576331e-13,7.704648e-12,2.39772e-13,1.091949e-12,5.695646e-11,4.368047e-14,2.403327e-14,2.792633e-15,2010.0,20100209200928,BWH,iE33,Philips Medical Systems,a4c,1.0,test,30.0,1.466652,"[178, 237, 40]",0.826,1.38721


In [12]:
best_models

Unnamed: 0,model_name,model_output,epoch,checkpoint_file,spear_cor,spear_p,pear_cor,pear_p,n_samples,dset
6,mbf_rubidium_a4c_dgx-1_fc1_stress_global_mbf,stress_global_mbf,300,/mnt/obi0/andreas/data/cfr/log/mbf_rubidium_a4...,0.589223,2.572355e-43,0.518113,3.305941e-32,449,mbf_rubidium
5,mbf_rubidium_a4c_dgx-1_fc1_rest_global_mbf,rest_global_mbf,300,/mnt/obi0/andreas/data/cfr/log/mbf_rubidium_a4...,0.498421,1.432274e-29,0.490814,1.3478420000000001e-28,449,mbf_rubidium
4,mbf_ammonia_a4c_dgx-1_fc1_stress_global_mbf,stress_global_mbf,100,/mnt/obi0/andreas/data/cfr/log/mbf_ammonia_a4c...,0.417114,1.689962e-28,0.448963,2.8945170000000003e-33,644,mbf_ammonia
3,mbf_ammonia_a4c_dgx-1_fc1_rest_global_mbf,rest_global_mbf,200,/mnt/obi0/andreas/data/cfr/log/mbf_ammonia_a4c...,0.347719,9.703219e-20,0.350881,4.2895299999999996e-20,644,mbf_ammonia
2,cfr_a4c_dgx-1_fc1_stress_global_mbf,stress_global_mbf,100,/mnt/obi0/andreas/data/cfr/log/cfr_a4c_dgx-1_f...,0.547178,1.5549299999999999e-86,0.539896,7.314193e-84,1095,cfr
1,cfr_a4c_dgx-1_fc1_rest_global_mbf,rest_global_mbf,300,/mnt/obi0/andreas/data/cfr/log/cfr_a4c_dgx-1_f...,0.42834,4.368486999999999e-50,0.472019,7.6326390000000004e-62,1095,cfr
0,cfr_a4c_dgx-1_fc1_global_cfr_calc,global_cfr_calc,150,/mnt/obi0/andreas/data/cfr/log/cfr_a4c_dgx-1_f...,0.291996,5.776872e-23,0.319546,2.047023e-27,1095,cfr


In [13]:
# Save the best models df (remove the last two columns because they are garbage)
best_models.to_parquet(os.path.join(test_dir, best_models_filename))