In [3]:
import keras.backend as K
from keras.models import Model
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from keras.utils import np_utils
from sklearn.metrics import confusion_matrix, f1_score, accuracy_score

import cnn_builder as cbuild
import cnn_methods as cfunc
import config
import csv
import helper_fxns as hf
import importlib
import matplotlib.pyplot as plt
import numpy as np
import operator
import os
import pandas as pd
import random
%matplotlib inline

In [4]:
importlib.reload(config)
importlib.reload(cfunc)
importlib.reload(hf)
importlib.reload(cbuild)
C = config.Config()

In [5]:
model = cbuild.build_cnn(C, 'adam')#Adam(lr=0.01))
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, 36, 36, 12, 1) 0                                            
____________________________________________________________________________________________________
input_2 (InputLayer)             (None, 36, 36, 12, 1) 0                                            
____________________________________________________________________________________________________
input_3 (InputLayer)             (None, 36, 36, 12, 1) 0                                            
____________________________________________________________________________________________________
conv3d_1 (Conv3D)                (None, 34, 34, 11, 64 1216        input_1[0][0]                    
___________________________________________________________________________________________

In [190]:
model = cbuild.run_cnn(model, C)

{'fnh': 67, 'cyst': 75, 'hcc': 89, 'hemangioma': 56, 'colorectal': 49, 'cholangio': 56}
fnh has 54 samples for training (2700 after augmentation) and 13 for testing
cyst has 60 samples for training (3000 after augmentation) and 15 for testing
hcc has 71 samples for training (3550 after augmentation) and 18 for testing
hemangioma has 45 samples for training (2250 after augmentation) and 11 for testing
colorectal has 39 samples for training (1950 after augmentation) and 10 for testing
cholangio has 45 samples for training (2250 after augmentation) and 11 for testing
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50

KeyboardInterrupt: 

In [6]:
nb_classes = len(C.classes_to_include)
voi_df = pd.read_csv(C.art_voi_path)
intensity_df = pd.read_csv(C.int_df_path)
#intensity_df.loc[intensity_df["art_int"] == 0, "art_int"] = np.mean(intensity_df[intensity_df["art_int"] > 0]["art_int"])
#intensity_df.loc[intensity_df["ven_int"] == 0, "ven_int"] = np.mean(intensity_df[intensity_df["ven_int"] > 0]["ven_int"])
#intensity_df.loc[intensity_df["eq_int"] == 0, "eq_int"] = np.mean(intensity_df[intensity_df["eq_int"] > 0]["eq_int"])

orig_data_dict, num_samples = cfunc.collect_unaug_data(C.classes_to_include, C, voi_df, intensity_df)
print(num_samples)

train_ids = {} #filenames of training set originals
test_ids = {} #filenames of test set
X_test = []
X2_test = []
Y_test = []
Z_test = []
X_train_orig = []
X2_train_orig = []
Y_train_orig = []
Z_train_orig = []

train_samples = {}

for cls_num, cls in enumerate(orig_data_dict):
    cls_num = C.classes_to_include.index(cls)

    train_samples[cls] = round(num_samples[cls]*C.train_frac)

    order = np.random.permutation(list(range(num_samples[cls])))
    train_ids[cls] = list(orig_data_dict[cls][2][order[:train_samples[cls]]])
    test_ids[cls] = list(orig_data_dict[cls][2][order[train_samples[cls]:]])

    X_test = X_test + list(orig_data_dict[cls][0][order[train_samples[cls]:]])
    X2_test = X2_test + list(orig_data_dict[cls][1][order[train_samples[cls]:]])
    Y_test = Y_test + [[0] * cls_num + [1] + [0] * (nb_classes - cls_num - 1)] * \
                        (num_samples[cls] - train_samples[cls])
    Z_test = Z_test + test_ids[cls]

    X_train_orig = X_train_orig + list(orig_data_dict[cls][0][order[:train_samples[cls]]])
    X2_train_orig = X2_train_orig + list(orig_data_dict[cls][1][order[:train_samples[cls]]])
    Y_train_orig = Y_train_orig + [[0] * cls_num + [1] + [0] * (nb_classes - cls_num - 1)] * \
                        (train_samples[cls])
    Z_train_orig = Z_train_orig + train_ids[cls]

    print("%s has %d samples for training (%d after augmentation) and %d for testing" %
          (cls, train_samples[cls], train_samples[cls] * C.aug_factor, num_samples[cls] - train_samples[cls]))

#Y_test = np_utils.to_categorical(Y_test, nb_classes)
#Y_train_orig = np_utils.to_categorical(Y_train_orig, nb_classes)
X_test = [np.array(X_test), np.array(X2_test)]
X_train_orig = [np.array(X_train_orig), np.array(X2_train_orig)]

Y_test = np.array(Y_test)
Y_train_orig = np.array(Y_train_orig)

Z_test = np.array(Z_test)
Z_train_orig = np.array(Z_train_orig)


avg_X2 = {}
for cls in C.classes_to_include:
    avg_X2[cls] = np.mean(orig_data_dict[cls][1], axis=0)


def train_generator_func(C, train_ids, intensity_df, voi_df, n=12, n_art=0):
    """n is the number of samples from each class, n_art is the number of artificial samples"""
    classes_to_include = C.classes_to_include
    
    num_classes = len(classes_to_include)
    while True:
        x1 = np.empty(((n+n_art)*num_classes, C.dims[0], C.dims[1], C.dims[2], C.nb_channels))
        x2 = np.empty(((n+n_art)*num_classes, 2))
        y = np.zeros(((n+n_art)*num_classes, num_classes))

        train_cnt = 0
        for cls in classes_to_include:
            img_fns = os.listdir(C.aug_dir+cls)
            while n>0:
                img_fn = random.choice(img_fns)
                if img_fn[:img_fn.rfind('_')] + ".npy" in train_ids[cls]:
                    x1[train_cnt] = np.load(C.aug_dir+cls+"\\"+img_fn)
                    x1[train_cnt] = cfunc.rescale_int(x1[train_cnt],
                                          intensity_df[intensity_df["AccNum"] == img_fn[:img_fn.find('_')]])

                    row = voi_df[(voi_df["Filename"] == img_fn[:img_fn.find('_')] + ".npy") &
                                 (voi_df["lesion_num"] == int(img_fn[img_fn.find('_')+1:img_fn.rfind('_')]))]
                    x2[train_cnt] = [(float(row["real_dx"]) * float(row["real_dy"]) * float(row["real_dz"])) ** (1/3) / 50,
                                        max(float(row["real_dx"]), float(row["real_dy"])) / float(row["real_dz"])]
                    
                    y[train_cnt][C.classes_to_include.index(cls)] = 1
                    
                    train_cnt += 1
                    if train_cnt % n == 0:
                        break
            
                        
            if n_art>0:
                img_fns = os.listdir(C.artif_dir+cls)
                for _ in range(n_art):
                    img_fn = random.choice(img_fns)
                    x1[train_cnt] = np.load(C.artif_dir + cls + "\\" + img_fn)
                    x2[train_cnt] = avg_X2[cls]
                    y[train_cnt][C.classes_to_include.index(cls)] = 1

                    train_cnt += 1
        
        yield cfunc.separate_phases([np.array(x1), np.array(x2)]), np.array(y) #[np.array(x1), np.array(x2)], np.array(y) #

{'hcc': 52, 'fnh': 67, 'cholangio': 56, 'hemangioma': 56, 'cyst': 74, 'colorectal': 49}
hcc has 42 samples for training (2100 after augmentation) and 10 for testing
fnh has 54 samples for training (2700 after augmentation) and 13 for testing
cholangio has 45 samples for training (2250 after augmentation) and 11 for testing
hemangioma has 45 samples for training (2250 after augmentation) and 11 for testing
cyst has 59 samples for training (2950 after augmentation) and 15 for testing
colorectal has 39 samples for training (1950 after augmentation) and 10 for testing


train_generator = train_generator_func()
model_pretrain.fit_generator(train_generator, steps_per_epoch=120, epochs=50)#, callbacks=[early_stopping])

In [18]:
#early_stopping = EarlyStopping(monitor='acc', min_delta=0.01, patience=4)
train_generator = train_generator_func(C, train_ids, intensity_df, voi_df, n=12, n_art=0)
model.fit_generator(train_generator, steps_per_epoch=100, epochs=50)#, callbacks=[early_stopping])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50

KeyboardInterrupt: 

hist = model.fit(X_train2, Y_train2, batch_size=32, epochs=200, validation_data=(X_test, Y_test), callbacks=[early_stopping])

In [8]:
voi_df_art = pd.read_csv(C.art_voi_path)
voi_df_ven = pd.read_csv(C.ven_voi_path)
voi_df_eq = pd.read_csv(C.eq_voi_path)

with open(C.small_voi_path, 'r') as csv_file:
    reader = csv.reader(csv_file)
    small_vois = dict(reader)
    
for key in small_vois:
    small_vois[key] = [int(x) for x in small_vois[key][1:-1].split(', ')]

In [9]:
C.classes_to_include

['hcc', 'cholangio', 'colorectal', 'cyst', 'hemangioma', 'fnh']

In [10]:
X_test = cfunc.separate_phases(X_test)
X_train_orig = cfunc.separate_phases(X_train_orig)

In [11]:
C.output_img_dir = "Z:\\OUTPUT\\12-06"

In [19]:
#plot_with_bbox(fn_list[2], cls_mapping[wrong_guesses[2]])
Y_pred = model.predict(X_test)
y_true = np.array([max(enumerate(x), key=operator.itemgetter(1))[0] for x in Y_test])
y_pred = np.array([max(enumerate(x), key=operator.itemgetter(1))[0] for x in Y_pred])

cm = confusion_matrix(y_true, y_pred)
f1 = f1_score(y_true, y_pred, average="weighted")

cfunc.save_output(Z_test, y_pred, y_true, voi_df_art, small_vois, C.classes_to_include, C)

In [73]:
Y_pred = model.predict(X_train_orig)
y_true = np.array([max(enumerate(x), key=operator.itemgetter(1))[0] for x in Y_train_orig])
y_pred = np.array([max(enumerate(x), key=operator.itemgetter(1))[0] for x in Y_pred])

cm = confusion_matrix(y_true, y_pred)
f1 = f1_score(y_true, y_pred, average="weighted")

cfunc.save_output(Z_train_orig, y_pred, y_true, voi_df_art, small_vois, C.classes_to_include, C)

In [23]:
cm

array([[ 5,  1,  0,  0,  0,  4],
       [ 0,  7,  3,  0,  1,  0],
       [ 0,  1,  7,  0,  2,  0],
       [ 0,  0,  0, 14,  1,  0],
       [ 0,  0,  0,  0, 11,  0],
       [ 2,  1,  0,  0,  0, 10]], dtype=int64)

In [21]:
y_true_simp, y_pred_simp, _ = cfunc.condense_cm(y_true, y_pred, C.classes_to_include)

In [22]:
accuracy_score(y_true_simp, y_pred_simp)

0.84285714285714286

In [20]:
accuracy_score(y_true, y_pred)

0.77142857142857146

4 cls: 94/94%, 86/88%, 86%/90%
5 cls: 80%/89%, 72%/86%, 84%/86%

6 cls no artificial, separate phases: 75%/84%
6 cls with artificial, separate phases:

In [365]:
fn_list = Z_test[~np.equal(y_pred, y_true)]
wrong_guesses = np.array(y_pred)[~np.equal(y_pred, y_true)]

In [409]:
Z_test[~np.equal(y_pred, y_true)]

array(['E100815762_0.npy', '12239513_0.npy', '12042703_2.npy'],
      dtype='<U16')

In [21]:
voi_df_art[voi_df_art["Filename"] == "12673726.npy"]

Unnamed: 0,Filename,x1,x2,y1,y2,z1,z2,cls,flipz,dx,dy,dz,id
119,12673726.npy,105,134,76,101,40,53,hemangioma,True,29,25,13,126737261057640
120,12673726.npy,127,139,103,117,44,51,hemangioma,True,12,14,7,1267372612710344


In [22]:
voi_df_ven[voi_df_ven["id"] == "1267372612710344"]

Unnamed: 0,id,x1,x2,y1,y2,z1,z2,dx,dy,dz
74,1267372612710344,127,139,103,117,44,52,12,14,8


activ = model2.predict(X_train)
#activ = model2.predict(np.expand_dims(X_train[10],axis=0))

In [316]:
import operator
#print("Ground truth:", [C.classes_to_include[max(enumerate(x), key=operator.itemgetter(1))[0]] for x in Y_val[::30]])
Y_ = model.predict(X_test)
print("Predictions:", [C.classes_to_include[max(enumerate(x), key=operator.itemgetter(1))[0]] + " (%.5f%% confidence)" % (max(x)*100) for x in Y_])

Predictions: ['hcc (95.75861% confidence)', 'hcc (99.98515% confidence)', 'hcc (56.77554% confidence)', 'hcc (97.68384% confidence)', 'hcc (96.73136% confidence)', 'hcc (99.99979% confidence)', 'hcc (99.99988% confidence)', 'hcc (99.71866% confidence)', 'cyst (63.61439% confidence)', 'cyst (51.41591% confidence)', 'hcc (99.65365% confidence)', 'hcc (52.72004% confidence)', 'hcc (57.44457% confidence)', 'cyst (99.82330% confidence)', 'cyst (83.69393% confidence)']


In [151]:
early_stopping = EarlyStopping(monitor='acc', min_delta=0.01, patience=4)


model.summary(line_length=120)

________________________________________________________________________________________________________________________
Layer (type)                           Output Shape               Param #       Connected to                            
input_31 (InputLayer)                  (None, 36, 36, 12, 1)      0                                                     
________________________________________________________________________________________________________________________
input_32 (InputLayer)                  (None, 36, 36, 12, 1)      0                                                     
________________________________________________________________________________________________________________________
input_33 (InputLayer)                  (None, 36, 36, 12, 1)      0                                                     
________________________________________________________________________________________________________________________
conv3d_36 (Conv3D)              

In [86]:
a=time.time()
Y_ = model.predict(X_val)
print(time.time()-a)

0.20054101943969727


In [136]:
import matplotlib.pyplot as plt
%matplotlib inline