In [1]:
import os
import glob
import numpy as np
import pandas as pd
from scipy.stats import spearmanr

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 50)
pd.set_option('display.width', 1000)

from bokeh.io import output_notebook, reset_output, show, output_file, save
from bokeh.plotting import figure
from bokeh.layouts import column, row, gridplot
from bokeh.models import ColumnDataSource, HoverTool, Legend

from bokeh.palettes import Category10

In [33]:
log_dir = os.path.normpath('/mnt/obi0/andreas/data/cfr/log')
df1_file = os.path.join(log_dir, 'meta200304_restmbf_0311gpu2', 'cfr_resized75_a4c_test_200304.parquet')
df1 = pd.read_parquet(df1_file)
df1 = df1.assign(model='restmbf150')
df1 = df1.assign(n_epoch=150)

df2_file = os.path.join(log_dir, 'meta200304_restmbf_aug_0313gpu2', 'meta200304_restmbf_aug_0313gpu2.parquet')
df2 = pd.read_parquet(df2_file)
df2 = df2.assign(model='restmbf_aug500')
df2 = df2.assign(n_epoch=500)


df3_file = os.path.join(log_dir, 'meta200304_restmbf_aug_0313gpu2', 
                        'meta200304_restmbf_aug_0313gpu2_ep300_.parquet')
df3 = pd.read_parquet(df3_file)
df3 = df3.assign(model='restmbf_aug300')
df3 = df3.assign(n_epoch=300)

test_df = pd.concat([df1, df2, df3], ignore_index=True).reset_index(drop=True)

In [34]:
test_df.columns

Index(['study', 'mrn', 'echo_study_date', 'mrnstudyid', 'days_post_cfr', 'cfr_study_date', 'petmrn_identifier', 'post-2018', 'myocardial_perfusion', 'global_cfr_calc', 'unaffected_cfr', 'rest_mbf_unaff', 'stress_mbf_unaff', 'num_unaffected_segs', 'rest_global_mbf', 'stress_global_mbf', 'filename', 'dir', 'datetime', 'fileid', 'institution', 'model', 'manufacturer', 'index', 'frame_time', 'number_of_frames', 'heart_rate', 'deltaX', 'deltaY', 'a2c', 'a2c_laocc', 'a2c_lvocc_s', 'a3c', 'a3c_laocc', 'a3c_lvocc_s', 'a4c', 'a4c_far', 'a4c_laocc', 'a4c_lvocc_s', 'a4c_rv', 'a4c_rv_laocc', 'a5c', 'apex', 'other', 'plax_far', 'plax_lac', 'plax_laz', 'plax_laz_ao', 'plax_plax', 'psax_avz', 'psax_az', 'psax_mv', 'psax_pap', 'rvinf', 'subcostal', 'suprasternal', 'max_view', 'mode', 'rate', 'im_array_shape', 'label', 'pred', 'n_epoch'], dtype='object')

In [35]:
test_df[['rest_mbf_unaff', 'label', 'pred']].head()

Unnamed: 0,rest_mbf_unaff,label,pred
0,1.08,1.08,0.802714
1,0.534,0.534,0.732137
2,1.21325,1.21325,0.935583
3,1.1392,1.1392,0.827884
4,0.797625,0.797625,1.134737


In [36]:
df_list = []
label_column = 'label'
for m in test_df.model.unique():

    s = {'view': list(test_df[test_df.model==m].model.unique()),
         'model': [m],
         'mrns': [len(test_df[test_df.model==m].mrn.unique())],
         'studies': [len(test_df[test_df.model==m].study.unique())],
         'videos': [len(test_df[test_df.model==m].filename.unique())],
         'unique_score_values': [len(test_df[test_df.model==m][label_column].unique())]}
    
    df_list.append(pd.DataFrame(s))

df_stat = pd.concat(df_list, ignore_index=True).reset_index(drop=True)
print(df_stat)

             view           model  mrns  studies  videos  unique_score_values
0      restmbf150      restmbf150   156      211     585                  167
1  restmbf_aug500  restmbf_aug500   156      211     585                  167
2  restmbf_aug300  restmbf_aug300   156      211     585                  167


In [37]:
def style(p):
    # Title 
    p.title.align = 'center'
    p.title.text_font_size = '11pt'
    #p.title.text_font = 'serif'

    # Axis titles
    p.xaxis.axis_label_text_font_size = '11pt'
    p.xaxis.axis_label_text_font_style = 'bold'
    p.yaxis.axis_label_text_font_size = '11pt'
    p.yaxis.axis_label_text_font_style = 'bold'

    # Tick labels
    p.xaxis.major_label_text_font_size = '11pt'
    p.yaxis.major_label_text_font_size = '11pt'
    
    return p

In [38]:
test_df = test_df.astype({'label': 'float32',
                          'pred': 'float32'})
test_df.dtypes

study                           object
mrn                              int64
echo_study_date         datetime64[ns]
mrnstudyid                       int64
days_post_cfr                    int64
cfr_study_date          datetime64[ns]
petmrn_identifier               object
post-2018                        int64
myocardial_perfusion            object
global_cfr_calc                float64
unaffected_cfr                 float64
rest_mbf_unaff                 float64
stress_mbf_unaff               float64
num_unaffected_segs              int64
rest_global_mbf                float64
stress_global_mbf              float64
filename                        object
dir                             object
datetime                datetime64[ns]
fileid                          object
institution                     object
model                           object
manufacturer                    object
index                          float64
frame_time                     float64
number_of_frames         

In [39]:
def make_plot(df):
    p=figure(title='Model predictions (rest_mbf_unaff)',
             x_axis_label = 'True',
             y_axis_label = 'Predicted',
             x_range=(0.5,1.5),
             y_range=(0.5,1.5))
             
    plt_dict = {}
    for m, model in enumerate(df.model.unique()):
        df_model = df[df.model==model]
        datasource=ColumnDataSource(df_model)
        plt_dict[model] = p.cross(source=datasource,
                                  x='label',
                                  y='pred',
                                  size=5,
                                  line_color=Category10[10][m+2],
                                  legend_label=model,
                                  name=model)
        plt_dict[model].visible = True if model == '0311gpu2' else False
        
    #diag = p.line([0,3], [0,3],
    #              line_color='black',
    #              line_width=1,
    #              line_dash='dashed')
    
    p.legend.location='top_right'
    p.legend.title='model: click to hide'
    p.legend.click_policy='hide'
    p=style(p)
    
    return p

In [40]:
# Calculate correlation coefficients
model_stats_df = pd.DataFrame()
for model in test_df.model.unique():
    x = test_df[test_df.model==model].label
    y = test_df[test_df.model==model].pred
    spear = spearmanr(x, y)
    model_dict = {'model': [model],
                  'n': [len(x)],
                  'spear_c': [spear.correlation],
                  'spear_p': [spear.pvalue]}
    
    model_stats_df = pd.concat([model_stats_df, pd.DataFrame(model_dict)], ignore_index=True)

In [41]:
print(model_stats_df)

            model    n   spear_c       spear_p
0      restmbf150  585  0.434786  2.244055e-28
1  restmbf_aug500  585  0.332009  1.608445e-16
2  restmbf_aug300  585  0.323464  1.027706e-15


In [42]:
pred_plot = make_plot(test_df)
reset_output()
output_notebook()
show(pred_plot)

In [18]:
reset_output()
output_file(os.path.join(log_dir, 'restmbf_scatter.html'), title = '200208a4')
save(pred_plot)

'/mnt/obi0/andreas/data/cfr/log/restmbf_scatter.html'