### Verify predictions using script for pipeline ###

In [22]:
import os
import glob
import pandas as pd

In [16]:
az_project_dir = os.path.normpath('/mnt/obi0/sgoto/AZ_Project')
az_npy_dir = os.path.join(az_project_dir, 'npyFiles')
azmeta_file_name = 'metadata.tsv'
azmeta_file = os.path.join(az_project_dir, azmeta_file_name) 

In [32]:
# Load the meta data
azmeta = pd.read_csv(azmeta_file, sep='\t')
azmeta = azmeta.dropna(subset=['frametime']).reset_index(drop=True)
azmeta_file_list = sorted(list(azmeta.filename.unique()))
print(f'Meta file contains {len(azmeta_file_list)} file with complete meta data.')
display(azmeta.head())
# Load .npy file list
az_file_list = glob.glob(os.path.join(az_npy_dir, '*.npy.lz4'))
# Compare with meta data
az_file_name_list = [os.path.basename(file).rsplit('.')[0] for file in az_file_list]
set(az_file_name_list).symmetric_difference(azmeta_file_list)

Meta file contains 108 file with complete meta data.


Unnamed: 0.1,Unnamed: 0,filename,frametime,deltaX,deltaY
0,0,KX00002F,17.363,-0.033333,0.033333
1,3,KX000037,18.69,-0.031667,0.031667
2,6,KX00001D,16.646,-0.031667,0.031667
3,9,KX000005,16.646,-0.031667,0.031667
4,12,KX0000C5,16.49,-0.028333,0.028333


set()

In [49]:
# For development, use BWH videos with known predictions
data_root = os.path.normpath('/mnt/obi0/andreas/data')
dev_dir = os.path.join(data_root, 'cfr_AZ') 

predictions_dir = os.path.join(data_root, 'cfr', 'predictions_echodata/FirstEchoEvents2repeat/cfr_models_30fps')
predictions_file_name = 'cfr_a4c_dgx-1_fc1_global_cfr_calc.parquet'
predictions_file = os.path.join(predictions_dir, predictions_file_name)
pred_df = pd.read_parquet(predictions_file)
pred_df = pred_df.sort_values('filename')
display(pred_df.head())

Unnamed: 0,filename,model_output,dataset,predictions,model_name,checkpoint,dir,study,mrn,datetime,...,rvinf,subcostal,suprasternal,year_month,study_full_time,institution,model,manufacturer,max_view,sum_views
0,48b4c0c643454479_4903a58051d59c1632a2417b35c5_...,global_cfr_calc,cfr,1.596508,cfr_a4c_dgx-1_fc1_global_cfr_calc,cfr_a4c_dgx-1_fc1_global_cfr_calc_chkpt_150.h5,/mnt/obi0/phi/echo/npyFiles/BWH/48b4/48b4c0c64...,48b4c0c643454479_4903a58051d59c1632a2417b35c5,31689912,2015-11-19 11:12:41,...,4.799932e-09,8.77287e-12,3.521227e-07,2015.0,20151119111241,BWH,Vivid E9,GE Vingmed Ultrasound,a4c,1.0
3,48b4c0c64adf5e8d_4903a58051d59c17833b0801157e_...,global_cfr_calc,cfr,2.038057,cfr_a4c_dgx-1_fc1_global_cfr_calc,cfr_a4c_dgx-1_fc1_global_cfr_calc_chkpt_150.h5,/mnt/obi0/phi/echo/npyFiles/BWH/48b4/48b4c0c64...,48b4c0c64adf5e8d_4903a58051d59c17833b0801157e,31680713,2015-11-18 09:27:43,...,1.773409e-11,4.054678e-14,3.027763e-13,2015.0,20151118092743,BWH,iE33,Philips Medical Systems,a4c,1.0
2,48b4c0c64adf5e8d_4903a58051d59c17833b0801157e_...,global_cfr_calc,cfr,1.508264,cfr_a4c_dgx-1_fc1_global_cfr_calc,cfr_a4c_dgx-1_fc1_global_cfr_calc_chkpt_150.h5,/mnt/obi0/phi/echo/npyFiles/BWH/48b4/48b4c0c64...,48b4c0c64adf5e8d_4903a58051d59c17833b0801157e,31680713,2015-11-18 09:27:43,...,8.558158e-07,9.62203e-11,8.219134e-10,2015.0,20151118092743,BWH,iE33,Philips Medical Systems,a4c,1.0
4,48b4c0c64adf5e8d_4903a58051d59c17833b0801157e_...,global_cfr_calc,cfr,1.867593,cfr_a4c_dgx-1_fc1_global_cfr_calc,cfr_a4c_dgx-1_fc1_global_cfr_calc_chkpt_150.h5,/mnt/obi0/phi/echo/npyFiles/BWH/48b4/48b4c0c64...,48b4c0c64adf5e8d_4903a58051d59c17833b0801157e,31680713,2015-11-18 09:27:43,...,5.78447e-10,5.43536e-12,1.863309e-12,2015.0,20151118092743,BWH,iE33,Philips Medical Systems,a4c,1.0
5,48b4c0c64adf5e8d_4903a58051d59c17833b0801157e_...,global_cfr_calc,cfr,2.144364,cfr_a4c_dgx-1_fc1_global_cfr_calc,cfr_a4c_dgx-1_fc1_global_cfr_calc_chkpt_150.h5,/mnt/obi0/phi/echo/npyFiles/BWH/48b4/48b4c0c64...,48b4c0c64adf5e8d_4903a58051d59c17833b0801157e,31680713,2015-11-18 09:27:43,...,1.089719e-10,8.384961e-14,3.23797e-12,2015.0,20151118092743,BWH,iE33,Philips Medical Systems,a4c,1.0


In [51]:
# Lets use the first 24 video files
n=24
video_name_list = list(pred_df.filename.unique())[:n]
pred_cols = ['filename', 'frame_time', 'deltaX', 'deltaY', 'dir', 'predictions']
video_meta_df = pred_df[pred_df.filename.isin(video_name_list)][pred_cols]
video_meta_df = video_meta_df.rename(columns={'predictions': 'prev_preds'}).reset_index(drop=True)
display(video_meta_df.head(2))
# Lets save this short list of video files so that we can develop the script for the forward pass
video_meta_file = os.path.join(dev_dir, 'bwh_metadata.parquet')
video_meta_df.to_parquet(video_meta_file)
print(video_meta_file)

Unnamed: 0,filename,frame_time,deltaX,deltaY,dir,prev_preds
0,48b4c0c643454479_4903a58051d59c1632a2417b35c5_...,17.940552,0.044297,0.044297,/mnt/obi0/phi/echo/npyFiles/BWH/48b4/48b4c0c64...,1.596508
1,48b4c0c64adf5e8d_4903a58051d59c17833b0801157e_...,33.333,0.04404,0.04404,/mnt/obi0/phi/echo/npyFiles/BWH/48b4/48b4c0c64...,2.038057


/mnt/obi0/andreas/data/cfr_AZ/bwh_metadata.parquet
