In [19]:
import os
import sys
import numpy as np
import cv2
from keras import backend as KBackend
from keras.models import load_model
from keras.preprocessing.image import ImageDataGenerator

import matplotlib.pyplot as plt
import seaborn as sns
%pylab inline

print('Keras backend:', KBackend.backend())
print('Keras image format:', KBackend.image_data_format(), '\n')

Populating the interactive namespace from numpy and matplotlib
Keras backend: tensorflow
Keras image format: channels_last 



In [2]:
model = load_model('log_cnn/rgb-32C3-32C3-64C5-F32-F321_E69_L0.64_A0.66.hdf5')
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_268 (Conv2D)          (None, 158, 158, 32)      896       
_________________________________________________________________
activation_376 (Activation)  (None, 158, 158, 32)      0         
_________________________________________________________________
max_pooling2d_143 (MaxPoolin (None, 79, 79, 32)        0         
_________________________________________________________________
conv2d_269 (Conv2D)          (None, 77, 77, 32)        9248      
_________________________________________________________________
activation_377 (Activation)  (None, 77, 77, 32)        0         
_________________________________________________________________
max_pooling2d_144 (MaxPoolin (None, 38, 38, 32)        0         
_________________________________________________________________
conv2d_270 (Conv2D)          (None, 36, 36, 64)        18496     
__________

In [126]:
test_cancer_dir = 'tmp_test_cancer'
test_fibro_dir = 'tmp_test_fibro'

cancer_ids = sorted(os.listdir(test_cancer_dir))
fibro_ids = sorted(os.listdir(test_fibro_dir))

test_cancer_imnum = 0
for i in cancer_ids:
    test_cancer_imnum += len(os.listdir(os.path.join(test_cancer_dir, str(i))))

test_fibro_imnum = 0
for i in fibro_ids:
    test_fibro_imnum += len(os.listdir(os.path.join(test_fibro_dir, str(i))))

print('test can total:', test_cancer_imnum, 'for', len(cancer_ids), 'patients')
print('test fib total:', test_fibro_imnum, 'for', len(fibro_ids), 'patients')

test can total: 1771 for 68 patients
test fib total: 349 for 33 patients


In [122]:
test_data_generator = ImageDataGenerator()

In [127]:
def gen_predictions(cat_dir, p_id, catres_dir):
    pgen = test_data_generator.flow_from_directory(os.path.join(cat_dir),
                                                   target_size=(160, 160),
                                                   classes=[p_id],
                                                   batch_size=len(os.listdir(os.path.join(cat_dir, p_id))),
                                                   shuffle=False)
    
    pred = model.predict_generator(generator=pgen, steps=1)
    
    f = open(os.path.join(catres_dir, p_id + '.lst'), 'w')
    for i in pred:
        f.write('{}, '.format(i[0]))
    f.close()

In [88]:
for pid in cancer_ids:
    gen_predictions(test_cancer_dir, pid, os.path.join('stat_test_cancer', 'values'))
    
for pid in fibro_ids:
    gen_predictions(test_fibro_dir, pid, os.path.join('stat_test_fibro', 'values'))

Found 51 images belonging to 1 classes.
Found 72 images belonging to 1 classes.
Found 47 images belonging to 1 classes.
Found 37 images belonging to 1 classes.
Found 42 images belonging to 1 classes.
Found 46 images belonging to 1 classes.
Found 51 images belonging to 1 classes.
Found 34 images belonging to 1 classes.
Found 38 images belonging to 1 classes.
Found 47 images belonging to 1 classes.
Found 52 images belonging to 1 classes.
Found 50 images belonging to 1 classes.
Found 52 images belonging to 1 classes.
Found 51 images belonging to 1 classes.
Found 50 images belonging to 1 classes.
Found 44 images belonging to 1 classes.
Found 50 images belonging to 1 classes.
Found 61 images belonging to 1 classes.
Found 61 images belonging to 1 classes.
Found 36 images belonging to 1 classes.
Found 47 images belonging to 1 classes.
Found 48 images belonging to 1 classes.
Found 53 images belonging to 1 classes.
Found 56 images belonging to 1 classes.
Found 55 images belonging to 1 classes.


In [124]:
with open('statistics/cancer_train_pred_distribution.lst') as o:
    can_values = [float(x.split(',')[0]) for x in o.read().split()]
can_values = np.array(can_values)

with open('statistics/fibro_train_pred_distribution.lst') as o:
    fib_values = [float(x.split(',')[0]) for x in o.read().split()]
fib_values = np.array(fib_values)

print(can_values.shape, fib_values.shape)

(1632,) (1392,)


In [33]:
figure(figsize=(8, 6))

fig1 = plt.figure()
sns_plot1 = sns.distplot(can_values, rug=True, kde=True)
plt.savefig('stat_test_cancer/train_pred_distribution.png')
plt.close(fig1)

fig2 = plt.figure()
sns_plot2 = sns.distplot(fib_values, rug=True, kde=True)
plt.savefig('stat_test_fibro/train_pred_distribution.png')
plt.close(fig2)

<matplotlib.figure.Figure at 0x7f50201e9860>

In [89]:
for pid in cancer_ids:
    with open('stat_test_cancer/values/{}.lst'.format(pid)) as o:
        pval = [float(x.split(',')[0]) for x in o.read().split()]
    pval = np.array(pval)
    
    fig = plt.figure()
    sns_plot = sns.distplot(pval, rug=True, kde=True)
    sns_plot.figure.savefig('stat_test_cancer/plots/{}.png'.format(pid))
    plt.close(fig)
    
for pid in fibro_ids:
    with open('stat_test_fibro/values/{}.lst'.format(pid)) as o:
        pval = [float(x.split(',')[0]) for x in o.read().split()]
    pval = np.array(pval)
    
    fig = plt.figure()
    sns_plot = sns.distplot(pval, rug=True, kde=True)
    sns_plot.figure.savefig('stat_test_fibro/plots/{}.png'.format(pid))
    plt.close(fig)

In [135]:
from scipy.stats import ks_2samp

def kolmogorov_pred(val_path, canvals, fibvals):
    with open(val_path) as o:
        pvals = [float(x.split(',')[0]) for x in o.read().split()]
    pvals = np.array(pvals)
        
    
    s_can = ks_2samp(canvals, pvals)
    s_fib = ks_2samp(fibvals, pvals)
    
    d_can, p_can = s_can[0], s_can[1]
    d_fib, p_fib = s_fib[0], s_fib[1]
    
    answer = None # 0 = cancer, 1 = fibro
    
    
    if abs(p_can - p_fib) < 0.01:
        if d_can < d_fib:
            answer = 0
        else:
            answer = 1
    else:
        if p_can > p_fib:
            answer = 0
        else:
            answer = 1    
    
    return s_can, s_fib, answer

def mean_pred(val_path, canvals, fibvals):
    with open(val_path) as o:
        pvals = [float(x.split(',')[0]) for x in o.read().split()]
    pvals = np.array(pvals)
    
    can = np.zeros(pvals.shape)
    fib = np.zeros(pvals.shape)
    
    can[pvals <= 0.5] = 1
    fib[pvals >  0.5] = 1
    
    can_pred = np.count_nonzero(can)
    fib_pred = np.count_nonzero(fib)
    
    if can_pred > fib_pred:
        return 0
    else:
        return 1

In [136]:
ac, bc = [], []
fc = open('ks_cancer.csv', 'w')
fc.write('pid, dcan, pcan, dfib, pfib, prediction,\n')
for pid in cancer_ids:
    c, f, a = kolmogorov_pred(os.path.join('stat_test_cancer', 'values', '{}.lst'.format(pid)),
                              can_values, fib_values)
    b = mean_pred(os.path.join('stat_test_cancer', 'values', '{}.lst'.format(pid)),
                  can_values, fib_values)
    fc.write('{}, {:.6f}, {:.6f}, {:.6f}, {:.6f}, {}, {}\n'.format(pid, c[0], c[1], f[0], f[1], a, b))
    ac.append(a)
    bc.append(b)
nom = len(ac) - sum(ac)
den = len(ac)
fc.write('ks can correct: {}/{} ({:.1f}%)'.format(nom, den, float(nom)/float(den) * 100.))
fc.close()

af, bf = [], []
ff = open('ks_fibro.csv', 'w')
ff.write('pid, dcan, pcan, dfib, pfib, prediction,\n')
for pid in fibro_ids:
    c, f, a = kolmogorov_pred(os.path.join('stat_test_fibro', 'values', '{}.lst'.format(pid)),
                              can_values, fib_values)
    b = mean_pred(os.path.join('stat_test_fibro', 'values', '{}.lst'.format(pid)),
                  can_values, fib_values)
    ff.write('{}, {:.6f}, {:.6f}, {:.6f}, {:.6f}, {}, {}\n'.format(pid, c[0], c[1], f[0], f[1], a, b))
    af.append(a)
    bf.append(b)
nom = sum(af)
den = len(af)
ff.write('ks fib correct: {}/{} ({:.1f}%)'.format(nom, den, float(nom)/float(den) * 100.))
ff.close()

print('ks can correct:', len(ac) - sum(ac), '/', len(ac))
print('ks fib correct:', sum(af), '/', len(af))

print('m can correct:', len(bc) - sum(bc), '/', len(bc))
print('m fib correct:', sum(bf), '/', len(bf))

ks can correct: 48 / 68
ks fib correct: 21 / 33
m can correct: 40 / 68
m fib correct: 23 / 33


In [84]:
ks_2samp(can_values, fib_values)

Ks_2sampResult(statistic=0.25798681541582147, pvalue=3.0021127124198237e-44)

In [96]:
print(can_values.shape, fib_values.shape)
print(np.min(can_values), np.min(fib_values))

(1632,) (1392,)
0.357087939978 0.352834403515


In [140]:
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

datagen = ImageDataGenerator(horizontal_flip=True, 
                             vertical_flip=True, 
                             rotation_range=90,
                             fill_mode='nearest')

img = load_img('dataset/cancer/404_2_1.png')  # this is a PIL image
#img = cv2.imread()
x = img_to_array(img)  # this is a Numpy array with shape (3, 160, 160)
x = x.reshape((1,) + x.shape)  # this is a Numpy array with shape (1, 3, 160, 160)

# the .flow() command below generates batches of randomly transformed images
# and saves the results to the `preview/` directory
i = 0
for batch in datagen.flow(x, batch_size=1,
                          save_to_dir='figures', save_prefix='aug', save_format='png'):
    i += 1
    if i > 20:
        break  # otherwise the generator would loop indefinitely