In [1]:
import os
import sys
import glob

import tensorflow as tf
import numpy as np
import pandas as pd
import keras
from keras.backend.tensorflow_backend import set_session

import matplotlib.pyplot as plt

import nibabel as nib

Using TensorFlow backend.


In [2]:
def plot(img):
    plt.figure()
    plt.imshow(img[:, 40, :], cmap='gray')
    
def load_nii(path):
    nii = nib.load(path)
    vol = nii.get_data().astype(np.float32)
    return np.squeeze(vol)

In [3]:
run_dirs = glob.glob('../runs/*/predict')
run_dirs = sorted(run_dirs)

print('found {} runs'.format(len(run_dirs)), '\n')

for i, run in enumerate(run_dirs):
    print(i, run, '\n')

found 4 runs 

0 ../runs/gan_20190416_0011_gpu=1_bs=4_enc=[16_32_32_32]_dec=[32_32_32_32_16_8]_cbn=16_lr=0.0001_b1=0.0_b2=0.9_ep=0.1_pl=25.0_vr=1.0_ti=True_is=5_cs=5_rf=10_rs=25_glw=[100_10_100_1]_clw=[1_1_10]_tag=brains_regular_gen/predict 

1 ../runs/gan_20190418_1124_gpu=4_bs=4_enc=[16_32_64_128]_dec=[128_64_32_32_16_8]_cbn=16_lr=0.0001_b1=0.0_b2=0.9_ep=0.1_pl=25.0_vr=1.0_ti=True_is=5_cs=5_rf=10_rs=25_glw=[100.0_10.0_5000.0_1.0]_clw=[1_1_10]_tag=brains_deep_high_prec_loss/predict 

2 ../runs/gan_20190418_1255_gpu=2_bs=4_enc=[16_32_32_32]_dec=[32_32_32_32_16_8]_cbn=16_lr=0.0001_b1=0.0_b2=0.9_ep=0.1_pl=25.0_vr=1.0_ti=True_is=5_cs=5_rf=10_rs=25_glw=[100.0_10.0_5000.0_1.0]_clw=[1_1_10]_tag=brains_reg_gen_high_prec/predict 

3 ../runs/gan_20190419_0002_gpu=0_bs=4_enc=[16_32_32_32]_dec=[32_32_32_32_16_8]_cbn=16_lr=0.0001_b1=0.0_b2=0.9_ep=0.1_pl=25.0_vr=1.0_ti=False_is=5_cs=5_rf=10_rs=25_glw=[100.0_10.0_0.0_1.0]_clw=[1_1_10]_tag=brains_reg_gen_no_ti/predict 



In [4]:
# set run dir
run_dir = run_dirs[3]

pre_dirs = glob.glob(os.path.join(glob.escape(run_dir), '*', '*'))
pre_dirs = sorted(pre_dirs)

print('found {} pre_dirs'.format(len(pre_dirs)))

for i, pre_dir in enumerate(pre_dirs):
    #print(i, os.path.basename(pre_dir))
    print(i, pre_dir.split('/')[-2] + '/' + pre_dir.split('/')[-1])

found 4 pre_dirs
0 test/gen_050_04
1 test/gen_150_04
2 test/gen_250_04
3 test/gen_400_04


In [5]:
# generator config, run_dir, etc...
pre_dir = pre_dirs[2]

csv_path = os.path.join(pre_dir, 'meta.csv')
csv = pd.read_csv(csv_path)

print('found meta.csv: {}'.format(csv.shape))

found meta.csv: (271, 38)


In [6]:
# classifier
clf_dir = '../../../../backup/agemorph/models/clf/clf_20190330_1549_gpu=4_bs=8_lr=0.001_b1=0.9_b2=0.999_ep=0.1_bn=True_ds=1_lw=[1.0]/'
model_name = 'clf_100'
model_file = os.path.join(clf_dir, model_name+'.h5')

if os.path.isfile(model_file):
    print('found model file')

found model file


In [7]:
# gpu config
gpu_id = 5

if gpu_id is not None:                                                     
    gpu = '/gpu:' + str(gpu_id)                                            
    os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)                       
    config = tf.ConfigProto()                                              
    config.gpu_options.allow_growth = True                                 
    config.allow_soft_placement = True                                     
    set_session(tf.Session(config=config))                                 
else:                                                                      
    gpu = '/cpu:0'

In [8]:
print('starting predict')

with tf.device(gpu):
    
    # load classifier model
    classifier = keras.models.load_model(model_file)
    
    for i, (_, row) in enumerate(csv.iterrows()):
        
        if i % 10 == 0:
            print('step', i)
        
        xr = load_nii(row['img_path_0'])[None, ..., None]
        yr = load_nii(row['img_path_1'])[None, ..., None]
        yf = load_nii(row['img_path_yf'])[None, ..., None]
        
        batch = np.concatenate([xr, yr, yf], axis=0)
        
        pred = classifier.predict([batch])
        
        csv.loc[csv.img_id_0 == row['img_id_0'], 'pAD_xr'] = pred[0, 1]
        csv.loc[csv.img_id_0 == row['img_id_0'], 'pAD_yr'] = pred[1, 1]
        csv.loc[csv.img_id_0 == row['img_id_0'], 'pAD_yf'] = pred[2, 1]
        
print('finished predict')

csv_out_path = os.path.join(pre_dir, model_name+'.csv')

print('writing csv to {}'.format(csv_out_path))

csv.to_csv(csv_out_path, index=False)

starting predict
step 0
step 10
step 20
step 30
step 40
step 50
step 60
step 70
step 80
step 90
step 100
step 110
step 120
step 130
step 140
step 150
step 160
step 170
step 180
step 190
step 200
step 210
step 220
step 230
step 240
step 250
step 260
step 270
finished predict
writing csv to ../runs/gan_20190419_0002_gpu=0_bs=4_enc=[16_32_32_32]_dec=[32_32_32_32_16_8]_cbn=16_lr=0.0001_b1=0.0_b2=0.9_ep=0.1_pl=25.0_vr=1.0_ti=False_is=5_cs=5_rf=10_rs=25_glw=[100.0_10.0_0.0_1.0]_clw=[1_1_10]_tag=brains_reg_gen_no_ti/predict/test/gen_250_04/clf_100.csv


In [9]:
clf_csvs = glob.glob(os.path.join(glob.escape(pre_dir), '[!meta]*.csv'))
clf_csvs = sorted(clf_csvs)

for i, clf_csv in enumerate(clf_csvs):
    print(i, os.path.basename(clf_csv))

0 clf_100.csv


In [10]:
csv_path = clf_csvs[0]
csv = pd.read_csv(csv_path)
print(csv.shape)

(271, 41)


In [11]:
scsv = csv[csv.pat_dx_1 == 2]
print('s: xr -> yr:', (scsv.pAD_yr - scsv.pAD_xr).mean())
print('s: xr -> yf:', (scsv.pAD_yf - scsv.pAD_xr).mean())

pcsv = csv[csv.pat_dx_1 == 3]
print('p: xr -> yr:', (pcsv.pAD_yr - pcsv.pAD_xr).mean())
print('p: xr -> yf:', (pcsv.pAD_yf - pcsv.pAD_xr).mean())

s: xr -> yr: 0.12592504433178195
s: xr -> yf: 0.11167344633953892
p: xr -> yr: 0.4026707866918259
p: xr -> yf: 0.2197435671624414


In [12]:
print(csv[['pAD_xr', 'pAD_yr', 'pAD_yf', 'pat_dx_1']].mean())
csv[['pAD_xr', 'pAD_yr', 'pAD_yf', 'pat_dx_1']].head(10)*100

pAD_xr      0.190828
pAD_yr      0.416831
pAD_yf      0.302534
pat_dx_1    2.361624
dtype: float64


Unnamed: 0,pAD_xr,pAD_yr,pAD_yf,pat_dx_1
0,5.985183e-07,1e-05,0.000618,200.0
1,99.93588,99.885106,97.904825,200.0
2,0.008202813,0.010353,8.282738,200.0
3,0.003657352,0.363139,24.198538,200.0
4,3.502458e-07,0.012852,0.016321,200.0
5,2.743703e-08,0.042301,0.012934,300.0
6,1.518643,0.018634,7.457645,300.0
7,0.1455752,44.121897,80.367893,300.0
8,93.6878,99.799275,41.987798,300.0
9,1.230723e-05,0.000216,0.00579,200.0


In [51]:
def get_metric(csv_in, col, t, metric):

    true_pos = ((csv_in[col] > t) & (csv_in.pat_dx_1 == 3)).sum()
    true_neg = ((csv_in[col] <= t) & (csv_in.pat_dx_1 == 2)).sum()
    false_pos = ((csv_in[col] > t) & (csv_in.pat_dx_1 == 2)).sum()
    false_neg = ((csv_in[col] <= t) & (csv_in.pat_dx_1 == 3)).sum()
    
    true = true_pos + true_neg
    false = false_pos + false_neg

    if metric == 'f1':
        prec = true_pos / (true_pos + false_pos)
        reca = true_pos / (true_pos + false_neg)

        f1_score = 2 * (prec * reca) / (prec + reca)
        m = f1_score
        
    elif metric == 'acc':
        acc_p = true_pos / (true_pos + false_neg)
        acc_s = true_neg / (true_neg + false_pos)

        balanced_acc = (acc_p + acc_s) / 2
        m = balanced_acc
    
    return (round(m, 3), true, false)

In [52]:
def get_best_threshold(csv_in, col, metric):
    l = []
    
    for t in np.arange(0, 1, 0.01):
        m = get_metric(csv_in, col, t, metric)
        l.append((*m, t))
        
    return max(l)[3]

In [53]:
def get_split_metric(csv_in, col, cv_split, metric):
    
    csv_split = csv_in[csv_in.cv_split == cv_split]
    csv_other = csv_in[csv_in.cv_split != cv_split]
    
    t = get_best_threshold(csv_other, col, metric)
    
    m = get_metric(csv_split, col, t, metric)
    print('split {}: {}'.format(cv_split, m))
    
    return m

In [54]:
def get_cv_metric(csv_in, col, metric):
    l = [get_split_metric(csv_in, col, s, metric)[0] for s in range(5)]
    return np.array(l).mean()

In [55]:
# create 5 folds
csv = csv.sample(frac=1).reset_index(drop=True)

pMCI = csv[csv.pat_dx_1 == 3].img_id_0.values
sMCI = csv[csv.pat_dx_1 == 2].img_id_0.values

p_bins = np.linspace(0, len(pMCI), 6).astype(int)[1:-1]
s_bins = np.linspace(0, len(sMCI), 6).astype(int)[1:-1]

p_splits = np.split(pMCI, p_bins)
s_splits = np.split(sMCI, s_bins)

In [57]:
# write folds to csv
for i, (p_split, s_split) in enumerate(zip(p_splits, s_splits)):
    csv.loc[csv.img_id_0.isin(p_split), 'cv_split'] = i
    csv.loc[csv.img_id_0.isin(s_split), 'cv_split'] = i

In [58]:
get_cv_metric(csv, 'pAD_xr', 'f1')

split 0: (0.682, 39, 14)
split 1: (0.596, 36, 19)
split 2: (0.595, 38, 15)
split 3: (0.558, 36, 19)
split 4: (0.45, 33, 22)


0.5762

In [59]:
get_cv_metric(csv, 'pAD_yr', 'f1')

split 0: (0.654, 35, 18)
split 1: (0.622, 38, 17)
split 2: (0.667, 39, 14)
split 3: (0.655, 36, 19)
split 4: (0.683, 42, 13)


0.6561999999999999

In [60]:
get_cv_metric(csv, 'pAD_yf', 'f1')

split 0: (0.667, 37, 16)
split 1: (0.636, 39, 16)
split 2: (0.531, 30, 23)
split 3: (0.591, 37, 18)
split 4: (0.55, 37, 18)


0.595

In [61]:
get_cv_metric(csv, 'pAD_xr', 'acc')

split 0: (0.748, 39, 14)
split 1: (0.664, 36, 19)
split 2: (0.687, 38, 15)
split 3: (0.643, 36, 19)
split 4: (0.568, 33, 22)


0.662

In [62]:
get_cv_metric(csv, 'pAD_yr', 'acc')

split 0: (0.712, 35, 18)
split 1: (0.693, 38, 17)
split 2: (0.736, 39, 14)
split 3: (0.707, 36, 19)
split 4: (0.75, 42, 13)


0.7196

In [63]:
get_cv_metric(csv, 'pAD_yf', 'acc')

split 0: (0.73, 37, 16)
split 1: (0.696, 39, 16)
split 2: (0.592, 30, 23)
split 3: (0.668, 37, 18)
split 4: (0.646, 37, 18)


0.6664