### Verify predictions using script for pipeline ###

In [18]:
import os
import glob
import pickle
import pandas as pd

### Prepare a test set using some BWH videos ###

In [2]:
az_project_dir = os.path.normpath('/mnt/obi0/sgoto/AZ_Project')
az_npy_dir = os.path.join(az_project_dir, 'npyFiles')
azmeta_file_name = 'metadata.tsv'
azmeta_file = os.path.join(az_project_dir, azmeta_file_name) 

In [3]:
# Load the meta data
azmeta = pd.read_csv(azmeta_file, sep='\t')
azmeta = azmeta.dropna(subset=['frametime']).reset_index(drop=True)
azmeta_file_list = sorted(list(azmeta.filename.unique()))
print(f'Meta file contains {len(azmeta_file_list)} file with complete meta data.')
display(azmeta.head())
# Load .npy file list
az_file_list = glob.glob(os.path.join(az_npy_dir, '*.npy.lz4'))
# Compare with meta data
az_file_name_list = [os.path.basename(file).rsplit('.')[0] for file in az_file_list]
set(az_file_name_list).symmetric_difference(azmeta_file_list)

Meta file contains 108 file with complete meta data.


Unnamed: 0.1,Unnamed: 0,filename,frametime,deltaX,deltaY
0,0,KX00002F,17.363,-0.033333,0.033333
1,3,KX000037,18.69,-0.031667,0.031667
2,6,KX00001D,16.646,-0.031667,0.031667
3,9,KX000005,16.646,-0.031667,0.031667
4,12,KX0000C5,16.49,-0.028333,0.028333


set()

In [36]:
# For development, use BWH videos with known predictions
data_root = os.path.normpath('/mnt/obi0/andreas/data')
dev_dir = os.path.join(data_root, 'cfr_AZ') 

predictions_dir = os.path.join(data_root, 'cfr', 'predictions_echodata/FirstEchoEvents2repeat/cfr_models_30fps')
predictions_file_name = 'cfr_a4c_dgx-1_fc1_global_cfr_calc.parquet'
predictions_file = os.path.join(predictions_dir, predictions_file_name)
pred_df = pd.read_parquet(predictions_file)
pred_df = pred_df.sort_values('filename')
display(pred_df.head())

Unnamed: 0,filename,model_output,dataset,predictions,model_name,checkpoint,dir,study,mrn,datetime,...,rvinf,subcostal,suprasternal,year_month,study_full_time,institution,model,manufacturer,max_view,sum_views
0,48b4c0c643454479_4903a58051d59c1632a2417b35c5_...,global_cfr_calc,cfr,1.596508,cfr_a4c_dgx-1_fc1_global_cfr_calc,cfr_a4c_dgx-1_fc1_global_cfr_calc_chkpt_150.h5,/mnt/obi0/phi/echo/npyFiles/BWH/48b4/48b4c0c64...,48b4c0c643454479_4903a58051d59c1632a2417b35c5,31689912,2015-11-19 11:12:41,...,4.799932e-09,8.77287e-12,3.521227e-07,2015.0,20151119111241,BWH,Vivid E9,GE Vingmed Ultrasound,a4c,1.0
3,48b4c0c64adf5e8d_4903a58051d59c17833b0801157e_...,global_cfr_calc,cfr,2.038057,cfr_a4c_dgx-1_fc1_global_cfr_calc,cfr_a4c_dgx-1_fc1_global_cfr_calc_chkpt_150.h5,/mnt/obi0/phi/echo/npyFiles/BWH/48b4/48b4c0c64...,48b4c0c64adf5e8d_4903a58051d59c17833b0801157e,31680713,2015-11-18 09:27:43,...,1.773409e-11,4.054678e-14,3.027763e-13,2015.0,20151118092743,BWH,iE33,Philips Medical Systems,a4c,1.0
2,48b4c0c64adf5e8d_4903a58051d59c17833b0801157e_...,global_cfr_calc,cfr,1.508264,cfr_a4c_dgx-1_fc1_global_cfr_calc,cfr_a4c_dgx-1_fc1_global_cfr_calc_chkpt_150.h5,/mnt/obi0/phi/echo/npyFiles/BWH/48b4/48b4c0c64...,48b4c0c64adf5e8d_4903a58051d59c17833b0801157e,31680713,2015-11-18 09:27:43,...,8.558158e-07,9.62203e-11,8.219134e-10,2015.0,20151118092743,BWH,iE33,Philips Medical Systems,a4c,1.0
4,48b4c0c64adf5e8d_4903a58051d59c17833b0801157e_...,global_cfr_calc,cfr,1.867593,cfr_a4c_dgx-1_fc1_global_cfr_calc,cfr_a4c_dgx-1_fc1_global_cfr_calc_chkpt_150.h5,/mnt/obi0/phi/echo/npyFiles/BWH/48b4/48b4c0c64...,48b4c0c64adf5e8d_4903a58051d59c17833b0801157e,31680713,2015-11-18 09:27:43,...,5.78447e-10,5.43536e-12,1.863309e-12,2015.0,20151118092743,BWH,iE33,Philips Medical Systems,a4c,1.0
5,48b4c0c64adf5e8d_4903a58051d59c17833b0801157e_...,global_cfr_calc,cfr,2.144364,cfr_a4c_dgx-1_fc1_global_cfr_calc,cfr_a4c_dgx-1_fc1_global_cfr_calc_chkpt_150.h5,/mnt/obi0/phi/echo/npyFiles/BWH/48b4/48b4c0c64...,48b4c0c64adf5e8d_4903a58051d59c17833b0801157e,31680713,2015-11-18 09:27:43,...,1.089719e-10,8.384961e-14,3.23797e-12,2015.0,20151118092743,BWH,iE33,Philips Medical Systems,a4c,1.0


In [58]:
# Lets use the first 24 video files
n=24
video_name_list = list(pred_df.filename.unique())[:n]
pred_cols = ['filename', 'frame_time', 'deltaX', 'deltaY', 'dir', 'predictions']
video_meta_df = pred_df[pred_df.filename.isin(video_name_list)][pred_cols]
video_meta_df = video_meta_df.rename(columns={'predictions': 'prev_preds'}).reset_index(drop=True)

# We also want to add some videos that did not qualify
disqualified_file_name = 'BWH_2015-06-01_2015-11-30_FirstEcho_a4c_disqualified.parquet'
disqualified_file = os.path.join(data_root, 'cfr', 'predictions_echodata/FirstEchoEvents2repeat', 
                                 disqualified_file_name)
disqualified_df = pd.read_parquet(disqualified_file)

# Make a list of disqualified videos which we can append to the predictions
# They will drop out, but we want to test this
dis_n = 5
dis_cols = ['filename', 'frame_time', 'deltaX', 'deltaY', 'dir']
disqualified_videos = disqualified_df.iloc[:dis_n]
disqualified_videos = disqualified_videos[dis_cols]

# Concat with the videos that worked
video_meta_df = pd.concat([video_meta_df, disqualified_videos], axis = 0, ignore_index=True)
display(video_meta_df.tail(6))

Unnamed: 0,filename,frame_time,deltaX,deltaY,dir,prev_preds
23,48b4c0c8d243aaca_4903a58051d59c1632a30312c769_...,33.333,0.037483,0.037483,/mnt/obi0/phi/echo/npyFiles/BWH/48b4/48b4c0c8d...,1.761056
24,48b093000d7f_4903a580509f95b92b2105b5e972_Imag...,33.333,0.041843,0.041843,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b093000...,
25,48b093000d7f_4903a580509f95b92b2105b5e972_Imag...,33.333,0.041843,0.041843,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b093000...,
26,48b093000d7f_4903a580509f95b92b2105b5e972_Imag...,33.333,0.041843,0.041843,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b093000...,
27,48b093000d7f_4903a580509f95b92b2105b5e972_Imag...,33.333,0.041843,0.041843,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b093000...,
28,48b093000d7f_4903a580509f95b92b2105b5e972_Imag...,33.333,0.041843,0.041843,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b093000...,


In [59]:
# Lets save this short list of video files so that we can develop the script for the forward pass
video_meta_file = os.path.join(dev_dir, 'bwh_metadata.parquet')
video_meta_df.to_parquet(video_meta_file)
print(video_meta_file)

/mnt/obi0/andreas/data/cfr_AZ/bwh_metadata.parquet


In [61]:
# After the preprocessing step, we can download the disqualified video names.
# These should be the same
disqualified_filename = os.path.basename(video_meta_file).split('.')[0] + '_disqualified.parquet'
echo_df_disqualified = pd.read_parquet(os.path.join(cfr_project_dir, disqualified_filename))
display(echo_df_disqualified)

Unnamed: 0,filename,frame_time,deltaX,deltaY,dir,prev_preds,err
0,48b093000d7f_4903a580509f95b92b2105b5e972_Imag...,33.333,0.041843,0.041843,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b093000...,,video_len
1,48b093000d7f_4903a580509f95b92b2105b5e972_Imag...,33.333,0.041843,0.041843,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b093000...,,video_len
2,48b093000d7f_4903a580509f95b92b2105b5e972_Imag...,33.333,0.041843,0.041843,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b093000...,,video_len
3,48b093000d7f_4903a580509f95b92b2105b5e972_Imag...,33.333,0.041843,0.041843,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b093000...,,video_len
4,48b093000d7f_4903a580509f95b92b2105b5e972_Imag...,33.333,0.041843,0.041843,/mnt/obi0/phi/echo/npyFiles/BWH/48b0/48b093000...,,video_len


In [62]:
print(predictions_file_name)

cfr_a4c_dgx-1_fc1_global_cfr_calc.parquet


In [74]:
display(video_meta_df[['filename', 'prev_preds']].dropna().sort_values('filename'))

Unnamed: 0,filename,prev_preds
0,48b4c0c643454479_4903a58051d59c1632a2417b35c5_...,1.596508
1,48b4c0c64adf5e8d_4903a58051d59c17833b0801157e_...,2.038057
2,48b4c0c64adf5e8d_4903a58051d59c17833b0801157e_...,1.508264
3,48b4c0c64adf5e8d_4903a58051d59c17833b0801157e_...,1.867593
4,48b4c0c64adf5e8d_4903a58051d59c17833b0801157e_...,2.144364
5,48b4c0c64adf5e8d_4903a58051d59c17833b0801157e_...,2.103778
6,48b4c0c64fd048e6_4903a58051d59c1632a302331c54_...,2.442384
7,48b4c0c64fd048e6_4903a58051d59c1632a302331c54_...,2.347572
8,48b4c0c64fd048e6_4903a58051d59c1632a302331c54_...,2.201815
9,48b4c0c7a013c343_4903a58051d59f9b0d8cb70b61df_...,1.626981


In [69]:
# Check the model outputs
model_output_file = 'bwh_metadata_cfr_global_cfr_calc.parquet'
model_output_dir = os.path.join(dev_dir, 'predictions')
predict_df = pd.read_parquet(os.path.join(model_output_dir, model_output_file))
display(predict_df[['filename', 'predictions']].sort_values('filename'))

Unnamed: 0,filename,predictions
0,48b4c0c643454479_4903a58051d59c1632a2417b35c5_...,1.596508
1,48b4c0c64adf5e8d_4903a58051d59c17833b0801157e_...,2.038056
2,48b4c0c64adf5e8d_4903a58051d59c17833b0801157e_...,1.508264
3,48b4c0c64adf5e8d_4903a58051d59c17833b0801157e_...,1.867593
4,48b4c0c64adf5e8d_4903a58051d59c17833b0801157e_...,2.144364
5,48b4c0c64adf5e8d_4903a58051d59c17833b0801157e_...,2.103778
6,48b4c0c64fd048e6_4903a58051d59c1632a302331c54_...,2.442384
7,48b4c0c64fd048e6_4903a58051d59c1632a302331c54_...,2.347572
8,48b4c0c64fd048e6_4903a58051d59c1632a302331c54_...,2.201815
9,48b4c0c7a013c343_4903a58051d59f9b0d8cb70b61df_...,1.626981


### Rename the model parameter dictionaries ###

In [5]:
# Model parameters: Load them from unique files instead of the log dirs
cfr_project_dir = os.path.normpath('/mnt/obi0/andreas/data/cfr_AZ')
model_dir = os.path.join(cfr_project_dir, 'models')
checkpoint_list = 'cfr_correlations_bestmodels_30FPS.parquet'
checkpoint_df = pd.read_parquet(os.path.join(cfr_project_dir, 'models', checkpoint_list))
model_list = sorted(list(checkpoint_df.model_name.unique()))
display(checkpoint_df)

Unnamed: 0,model_name,model_output,epoch,checkpoint_file,spear_cor,spear_p,pear_cor,pear_p,n_samples,dset,view,gpu
6,mbf_rubidium_a4c_dgx-1_fc1_stress_global_mbf,stress_global_mbf,300,/mnt/obi0/andreas/data/cfr/log/mbf_rubidium_a4...,0.589223,2.572355e-43,0.518113,3.305941e-32,449,mbf_rubidium,rubidium,a4c
5,mbf_rubidium_a4c_dgx-1_fc1_rest_global_mbf,rest_global_mbf,300,/mnt/obi0/andreas/data/cfr/log/mbf_rubidium_a4...,0.498421,1.432274e-29,0.490814,1.3478420000000001e-28,449,mbf_rubidium,rubidium,a4c
4,mbf_ammonia_a4c_dgx-1_fc1_stress_global_mbf,stress_global_mbf,100,/mnt/obi0/andreas/data/cfr/log/mbf_ammonia_a4c...,0.417114,1.689962e-28,0.448963,2.8945170000000003e-33,644,mbf_ammonia,ammonia,a4c
3,mbf_ammonia_a4c_dgx-1_fc1_rest_global_mbf,rest_global_mbf,200,/mnt/obi0/andreas/data/cfr/log/mbf_ammonia_a4c...,0.347719,9.703219e-20,0.350881,4.2895299999999996e-20,644,mbf_ammonia,ammonia,a4c
2,cfr_a4c_dgx-1_fc1_stress_global_mbf,stress_global_mbf,100,/mnt/obi0/andreas/data/cfr/log/cfr_a4c_dgx-1_f...,0.547178,1.5549299999999999e-86,0.539896,7.314193e-84,1095,cfr,a4c,dgx-1
1,cfr_a4c_dgx-1_fc1_rest_global_mbf,rest_global_mbf,300,/mnt/obi0/andreas/data/cfr/log/cfr_a4c_dgx-1_f...,0.42834,4.368486999999999e-50,0.472019,7.6326390000000004e-62,1095,cfr,a4c,dgx-1
0,cfr_a4c_dgx-1_fc1_global_cfr_calc,global_cfr_calc,150,/mnt/obi0/andreas/data/cfr/log/cfr_a4c_dgx-1_f...,0.291996,5.776872e-23,0.319546,2.047023e-27,1095,cfr,a4c,dgx-1


In [6]:
model_name = model_list[4]
print(model_name)

mbf_ammonia_a4c_dgx-1_fc1_stress_global_mbf


In [7]:
model_s = checkpoint_df[checkpoint_df.model_name == model_name].iloc[0]
dset = model_s.dset
model_name = model_s.model_name
print(model_name)

mbf_ammonia_a4c_dgx-1_fc1_stress_global_mbf


In [29]:
# FEATURE DICT: Depends on dset only
meta_date = '200617'
cfr_data_root = os.path.normpath('/mnt/obi0/andreas/data/cfr')
dset_list = sorted(list(checkpoint_df.dset.unique()))
print(dset_list)
dset = dset_list[0]
for dset in dset_list:
    tfr_dir = os.path.join(cfr_data_root, 'tfr_'+meta_date, dset)
    feature_dict_file = glob.glob(os.path.join(tfr_dir, '*.pkl'))[0]
    feature_dict_dset_file_name = 'feature_dict_'+'tfr_'+meta_date+'_'+dset+'.pkl'
    feature_dict_dset_file = os.path.join(model_dir, feature_dict_dset_file_name)

    print(f'dset: {dset}')
    print(f'ORIGINAL feature_dict file: {feature_dict_file}')
    print(f'NEW feature_dict file:      {feature_dict_dset_file_name}')

    with open(feature_dict_file, 'rb') as fl:
        feature_dict = pickle.load(fl)
    print(feature_dict)
    
    print()

    # Save the feature_dict with the new name in the model_dir
    #with open(feature_dict_dset_file, 'wb') as fl:
    #    pickle.dump(feature_dict, fl, protocol=pickle.HIGHEST_PROTOCOL)

# OK, these are obviously all the same, so we need only one
feature_dict_dset_file_name = 'feature_dict_'+'tfr_'+meta_date+'.pkl'
feature_dict_dset_file = os.path.join(model_dir, feature_dict_dset_file_name)
with open(feature_dict_dset_file, 'wb') as fl:
    pickle.dump(feature_dict, fl, protocol=pickle.HIGHEST_PROTOCOL)

['cfr', 'mbf_ammonia', 'mbf_rubidium']
dset: cfr
ORIGINAL feature_dict file: /mnt/obi0/andreas/data/cfr/tfr_200617/cfr/global_pet_echo_dataset_200617.pkl
NEW feature_dict file:      feature_dict_tfr_200617_cfr.pkl
{'array': ['image', 'shape'], 'float': ['rest_global_mbf', 'stress_global_mbf', 'global_cfr_calc'], 'int': ['record'], 'features': ['image', 'shape', 'rest_global_mbf', 'stress_global_mbf', 'global_cfr_calc', 'record']}

dset: mbf_ammonia
ORIGINAL feature_dict file: /mnt/obi0/andreas/data/cfr/tfr_200617/mbf_ammonia/global_pet_echo_dataset_200617.pkl
NEW feature_dict file:      feature_dict_tfr_200617_mbf_ammonia.pkl
{'array': ['image', 'shape'], 'float': ['rest_global_mbf', 'stress_global_mbf', 'global_cfr_calc'], 'int': ['record'], 'features': ['image', 'shape', 'rest_global_mbf', 'stress_global_mbf', 'global_cfr_calc', 'record']}

dset: mbf_rubidium
ORIGINAL feature_dict file: /mnt/obi0/andreas/data/cfr/tfr_200617/mbf_rubidium/global_pet_echo_dataset_200617.pkl
NEW feature_

In [34]:
# Model dict depends on 

for model_name in model_list:

    log_dir = os.path.join(cfr_data_root, 'log', model_name)
    model_dict_file_name = model_name+'_model_dict.pkl'

    model_dict_file = os.path.join(log_dir, model_dict_file_name)
    model_dict_file_model = os.path.join(model_dir, model_dict_file_name)

    with open(model_dict_file, 'rb') as fl:
        model_dict = pickle.load(fl)

    # Save this to the model directory
    with open(model_dict_file_model, 'wb') as fl:
        pickle.dump(model_dict, fl, protocol=pickle.HIGHEST_PROTOCOL)

    print(model_name)
    print(model_dict_file)
    print(model_dict_file_model)
    print(model_dict)
    print()


cfr_a4c_dgx-1_fc1_global_cfr_calc
/mnt/obi0/andreas/data/cfr/log/cfr_a4c_dgx-1_fc1_global_cfr_calc/cfr_a4c_dgx-1_fc1_global_cfr_calc_model_dict.pkl
/mnt/obi0/andreas/data/cfr_AZ/models/cfr_a4c_dgx-1_fc1_global_cfr_calc_model_dict.pkl
{'name': 'cfr_a4c_dgx-1_fc1_global_cfr_calc', 'im_size': (299, 299, 1), 'im_scale_factor': 1.177, 'max_frame_time_ms': 33.34, 'n_frames': 40, 'filters': 64, 'fc_nodes': 1, 'model_output': 'global_cfr_calc', 'kernel_init': <tensorflow.python.ops.init_ops_v2.GlorotNormal object at 0x7f870f69f610>, 'bias_init': <tensorflow.python.ops.init_ops_v2.Zeros object at 0x7f870f69f990>}

cfr_a4c_dgx-1_fc1_rest_global_mbf
/mnt/obi0/andreas/data/cfr/log/cfr_a4c_dgx-1_fc1_rest_global_mbf/cfr_a4c_dgx-1_fc1_rest_global_mbf_model_dict.pkl
/mnt/obi0/andreas/data/cfr_AZ/models/cfr_a4c_dgx-1_fc1_rest_global_mbf_model_dict.pkl
{'name': 'cfr_a4c_dgx-1_fc1_rest_global_mbf', 'im_size': (299, 299, 1), 'im_scale_factor': 1.177, 'max_frame_time_ms': 33.34, 'n_frames': 40, 'filters': 