In [1]:
import keras.backend as K
from keras.models import Model
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from keras.utils import np_utils
from sklearn.metrics import confusion_matrix, f1_score, accuracy_score

import cnn_builder as cbuild
import cnn_methods as cfunc
import config
import csv
import helper_fxns as hf
import importlib
import matplotlib.pyplot as plt
import numpy as np
import operator
import os
import pandas as pd
import random
%matplotlib inline

Using TensorFlow backend.


In [36]:
importlib.reload(config)
importlib.reload(cfunc)
importlib.reload(hf)
importlib.reload(cbuild)
C = config.Config()

In [None]:
def run_all():
    """Reruns everything. Meant for overnight runs."""
    
    import dr_methods as drm
    import voi_methods as vm
    
    C = config.Config()
    drm.load_all_vois(C)
    
    intensity_df = drm.load_ints(C)
    intensity_df.to_csv(C.int_df_path, index=False)
    
    
    for cls in C.classes_to_include:
        if not os.path.exists(C.orig_dir + cls):
            os.makedirs(C.orig_dir + cls)
        if not os.path.exists(C.aug_dir + cls):
            os.makedirs(C.aug_dir + cls)
        if not os.path.exists(C.crops_dir + cls):
            os.makedirs(C.crops_dir + cls)
            
    final_size = C.dims

    voi_df_art = pd.read_csv(C.art_voi_path)
    voi_df_ven = pd.read_csv(C.ven_voi_path)
    voi_df_eq = pd.read_csv(C.eq_voi_path)
    intensity_df = pd.read_csv(C.int_df_path)
    
    small_vois = {}
    small_vois = vm.extract_vois(small_vois, C, voi_df_art, voi_df_ven, voi_df_eq, intensity_df)

    with open(C.small_voi_path, 'w', newline='') as csv_file:
        writer = csv.writer(csv_file)
        for key, value in small_vois.items():
            writer.writerow([key, value])
            
    # scaled imgs
    t = time.time()
    for cls in C.classes_to_include:
        for fn in os.listdir(C.crops_dir + cls):
            img = np.load(C.crops_dir + cls + "\\" + fn)
            unaug_img = vm.resize_img(img, C.dims, small_vois[fn[:-4]])
            np.save(C.orig_dir + cls + "\\" + fn, unaug_img)
    print(time.time()-t)
    
    # augmented imgs
    t = time.time()
    for cls in C.classes_to_include:
        vm.parallel_augment(cls, small_vois, C)
        print(cls, time.time()-t)
        
    for cls in C.classes_to_include:
        vm.save_all_vois(cls, C)
        
    cbuild.overnight_run(C)

In [37]:
model = cbuild.build_cnn(C, 'adam')#Adam(lr=.005)
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_13 (InputLayer)            (None, 36, 36, 12, 1) 0                                            
____________________________________________________________________________________________________
input_14 (InputLayer)            (None, 36, 36, 12, 1) 0                                            
____________________________________________________________________________________________________
input_15 (InputLayer)            (None, 36, 36, 12, 1) 0                                            
____________________________________________________________________________________________________
conv3d_16 (Conv3D)               (None, 32, 32, 10, 64 1792        input_13[0][0]                   
___________________________________________________________________________________________

model = cbuild.run_cnn(model, C)

In [38]:
nb_classes = len(C.classes_to_include)
voi_df = pd.read_csv(C.art_voi_path)
orig_data_dict, num_samples = cfunc.collect_unaug_data(C, voi_df)
print(num_samples)

avg_X2 = {}
for cls in C.classes_to_include:
    avg_X2[cls] = np.mean(orig_data_dict[cls][1], axis=0)

{'colorectal': 72, 'cholangio': 56, 'fnh': 59, 'cyst': 75, 'hcc': 86, 'hemangioma': 56}


In [34]:
with open(C.small_voi_path, 'r') as csv_file:
    reader = csv.reader(csv_file)
    small_vois = dict(reader)
for key in small_vois:
    if key[:key.find('_')] != '11907521':
        small_vois[key] = [int(x) for x in small_vois[key][1:-1].split(', ')]

In [35]:
small_vois

{'11907521_0': '[38, 62, 43, 58, 11, 21]',
 '11957947_0': [53, 111, 48, 101, 19, 40],
 '12023378_0': [43, 57, 41, 60, 13, 20],
 '12042703_0': [39, 61, 37, 64, 14, 18],
 '12042703_1': [39, 62, 39, 62, 15, 18],
 '12042703_2': [41, 59, 40, 60, 14, 19],
 '12042703_3': [40, 60, 41, 60, 14, 19],
 '12082222_0': [46, 55, 45, 55, 14, 18],
 '12082222_1': [48, 53, 47, 54, 14, 19],
 '12090000_0': [36, 65, 38, 63, 11, 22],
 '12111877_0': [43, 57, 42, 58, 14, 19],
 '12170979_0': [37, 63, 40, 61, 14, 18],
 '12177834_0': [38, 62, 39, 61, 12, 26],
 '12206539_0': [74, 156, 98, 206, 10, 22],
 '12206657_0': [43, 58, 44, 56, 13, 19],
 '12207268_0': [44, 56, 41, 59, 12, 20],
 '12207268_1': [47, 53, 46, 54, 14, 18],
 '12207268_2': [46, 54, 46, 54, 14, 19],
 '12229245_0': [46, 55, 43, 57, 15, 18],
 '12239783_0': [38, 63, 41, 60, 12, 21],
 '12249331_0': [35, 66, 36, 64, 12, 26],
 '12271995_0': [43, 57, 44, 57, 14, 19],
 '12289361_0': [42, 58, 44, 57, 13, 20],
 '12302576_0': [44, 57, 44, 57, 14, 18],
 '12302576

In [39]:
train_ids = {} #filenames of training set originals
test_ids = {} #filenames of test set
X_test = []
X2_test = []
Y_test = []
Z_test = []
X_train_orig = []
X2_train_orig = []
Y_train_orig = []
Z_train_orig = []

train_samples = {}

for cls_num, cls in enumerate(orig_data_dict):
    cls_num = C.classes_to_include.index(cls)

    train_samples[cls] = round(num_samples[cls]*C.train_frac)

    order = np.random.permutation(list(range(num_samples[cls])))
    train_ids[cls] = list(orig_data_dict[cls][2][order[:train_samples[cls]]])
    test_ids[cls] = list(orig_data_dict[cls][2][order[train_samples[cls]:]])

    X_test = X_test + list(orig_data_dict[cls][0][order[train_samples[cls]:]])
    X2_test = X2_test + list(orig_data_dict[cls][1][order[train_samples[cls]:]])
    Y_test = Y_test + [[0] * cls_num + [1] + [0] * (nb_classes - cls_num - 1)] * \
                        (num_samples[cls] - train_samples[cls])
    Z_test = Z_test + test_ids[cls]

    X_train_orig = X_train_orig + list(orig_data_dict[cls][0][order[:train_samples[cls]]])
    X2_train_orig = X2_train_orig + list(orig_data_dict[cls][1][order[:train_samples[cls]]])
    Y_train_orig = Y_train_orig + [[0] * cls_num + [1] + [0] * (nb_classes - cls_num - 1)] * \
                        (train_samples[cls])
    Z_train_orig = Z_train_orig + train_ids[cls]

    print("%s has %d samples for training (%d after augmentation) and %d for testing" %
          (cls, train_samples[cls], train_samples[cls] * C.aug_factor, num_samples[cls] - train_samples[cls]))

#Y_test = np_utils.to_categorical(Y_test, nb_classes)
#Y_train_orig = np_utils.to_categorical(Y_train_orig, nb_classes)
X_test = [np.array(X_test), np.array(X2_test)]
X_train_orig = [np.array(X_train_orig), np.array(X2_train_orig)]

Y_test = np.array(Y_test)
Y_train_orig = np.array(Y_train_orig)

Z_test = np.array(Z_test)
Z_train_orig = np.array(Z_train_orig)

X_test = cfunc.separate_phases(X_test)
X_train_orig = cfunc.separate_phases(X_train_orig)

colorectal has 58 samples for training (2900 after augmentation) and 14 for testing
cholangio has 45 samples for training (2250 after augmentation) and 11 for testing
fnh has 47 samples for training (2350 after augmentation) and 12 for testing
cyst has 60 samples for training (3000 after augmentation) and 15 for testing
hcc has 69 samples for training (3450 after augmentation) and 17 for testing
hemangioma has 45 samples for training (2250 after augmentation) and 11 for testing


train_generator = train_generator_func()
model_pretrain.fit_generator(train_generator, steps_per_epoch=120, epochs=50)#, callbacks=[early_stopping])

In [41]:
#early_stopping = EarlyStopping(monitor='acc', min_delta=0.01, patience=4)
train_generator = cbuild.train_generator_func(C, train_ids, voi_df, avg_X2, n=5, n_art=4)
hist = model.fit_generator(train_generator, steps_per_epoch=200, epochs=15)#, callbacks=[early_stopping])

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [None]:
plt.show(hist.history['loss'])

hist = model.fit(X_train2, Y_train2, batch_size=32, epochs=200, validation_data=(X_test, Y_test), callbacks=[early_stopping])

In [92]:
voi_df_art = pd.read_csv(C.art_voi_path)
voi_df_ven = pd.read_csv(C.ven_voi_path)
voi_df_eq = pd.read_csv(C.eq_voi_path)

with open(C.small_voi_path, 'r') as csv_file:
    reader = csv.reader(csv_file)
    small_vois = dict(reader)
for key in small_vois:
    small_vois[key] = [int(x) for x in small_vois[key][1:-1].split(', ')]

In [None]:
cbuild.overnight_run(C)

In [None]:
C.output_img_dir = "Z:\\OUTPUT\\12-08-2d-bn"
C.classes_to_include

In [42]:
#plot_with_bbox(fn_list[2], cls_mapping[wrong_guesses[2]])
Y_pred = model.predict(X_test)
y_true = np.array([max(enumerate(x), key=operator.itemgetter(1))[0] for x in Y_test])
y_pred = np.array([max(enumerate(x), key=operator.itemgetter(1))[0] for x in Y_pred])

cm = confusion_matrix(y_true, y_pred)
f1 = f1_score(y_true, y_pred, average="weighted")

#cfunc.save_output(Z_test, y_pred, y_true, voi_df_art, small_vois, C.classes_to_include, C)

In [None]:
Y_pred = model.predict(X_train_orig)
y_true = np.array([max(enumerate(x), key=operator.itemgetter(1))[0] for x in Y_train_orig])
y_pred = np.array([max(enumerate(x), key=operator.itemgetter(1))[0] for x in Y_pred])

cm = confusion_matrix(y_true, y_pred)
f1 = f1_score(y_true, y_pred, average="weighted")

#cfunc.save_output(Z_train_orig, y_pred, y_true, voi_df_art, small_vois, C.classes_to_include, C, save_dir="Z:\\OUTPUT\\12-08-3d\\training")

In [83]:
cm

array([[13,  0,  2, 41,  2, 11],
       [ 0,  0,  0, 24, 21,  0],
       [ 0,  0, 10, 47,  1,  0],
       [ 0,  0,  0, 58,  2,  0],
       [ 0,  0,  0, 37,  8,  0],
       [ 0,  0,  0, 11,  0, 36]], dtype=int64)

In [44]:
y_true_simp, y_pred_simp, _ = cfunc.condense_cm(y_true, y_pred, C.classes_to_include)

In [45]:
accuracy_score(y_true_simp, y_pred_simp)

0.76249999999999996

In [43]:
accuracy_score(y_true, y_pred)

0.72499999999999998

2d: 77%/84%, 74%/80%
3d: 78%/89%
2d bn: 77%/82%, 
3d bn: 84%/87%, 74%/83%
3d bn no artif: 84%/87%, 74%/83%
3d bn/elu/dilation/3conv/64-128-100-100: 80%/86%, 82%/88%, 84%/91%

fn_list = Z_test[~np.equal(y_pred, y_true)]
wrong_guesses = np.array(y_pred)[~np.equal(y_pred, y_true)]

activ = model2.predict(X_train)
#activ = model2.predict(np.expand_dims(X_train[10],axis=0))

import operator
#print("Ground truth:", [C.classes_to_include[max(enumerate(x), key=operator.itemgetter(1))[0]] for x in Y_val[::30]])
Y_ = model.predict(X_test)
print("Predictions:", [C.classes_to_include[max(enumerate(x), key=operator.itemgetter(1))[0]] + " (%.5f%% probability)" % (max(x)*100) for x in Y_])