In [1]:
from __future__ import division
import sys
sys.path.append('../src')
import params
import numpy as np
import os
from tqdm import tqdm

model_folder = '../models/'

model_filename = '1472001110_stacked/1472001110_stacked_epoch224.npz'
model_path = os.path.join(model_folder, model_filename)

params.params = params.Params(['../config/default.ini'] + 
                              ['../config/stacked.ini', '../config/titania_stacked.ini'])
from params import params as P
import theano
import theano.tensor as T
import lasagne
import resnet

import patch_sampling
from cparallel import ContinuousParallelBatchIterator


Loaded configurations from (in order) ['../src/../config/default.ini', '../src/../config/notebook.ini']
Loaded configurations from (in order) ['../config/default.ini', '../config/stacked.ini', '../config/titania_stacked.ini']
Couldn't import dot_parser, loading of dot files will not be possible.


Using gpu device 3: GeForce GTX TITAN X (CNMeM is disabled, cuDNN 5005)


OpenCV 2 NOT AVAILABLE, using skimage/scipy.ndimage instead


In [2]:
input_var = T.tensor4('inputs')
train_generator, validation_generator = patch_sampling.prepare_custom_sampler(mini_subset=False, override_cache_size=1)

Loading train samplers
Loading validation samplers
Loading samplers took 41.1890621185 seconds.


In [3]:
# DEFINE AND LOAD NETWORK


input_var = T.tensor4('inputs')
target_var = T.ivector('targets')

net = resnet.ResNet_FullPre_Wide(input_var, 4, 2)
all_layers = lasagne.layers.get_all_layers(net)
net = all_layers[-3]
net = resnet.ResNet_Stacked(net)

with np.load(model_path) as f:
    param_values = [f['arr_%d' % i] for i in range(len(f.files))]

lasagne.layers.set_all_param_values(net, param_values)

In [4]:
predict_fn = resnet.define_predict(net, input_var, target_var)

In [5]:
batch_size = 50
X = [batch_size]*1000

batch_gen = ContinuousParallelBatchIterator(validation_generator, 
                                            ordered=False, batch_size=1, multiprocess=False, n_producers=2)
batch_gen.append(X)

In [None]:
all_inputs = []
all_targets = []
all_filenames = []

all_loss = []
all_pred = []
all_binary_pred = []


In [None]:
for i, batch in enumerate(tqdm(batch_gen(40))):
    inputs, targets, filenames = batch
    
    loss, acc, pred_binary, pred = predict_fn(inputs, targets)
    
    all_inputs += list(inputs)
    all_targets += list(targets)
    all_filenames += list(filenames)
    
    all_loss += list(loss)
    all_pred += list(pred)
    all_binary_pred += list(pred_binary)
    

  2%|▎         | 1/40 [00:14<09:42, 14.92s/it]

In [None]:
misclassified = np.array(all_binary_pred) != np.array(all_targets)
print "Amount of wrong labels", np.sum(misclassified), "out of", len(all_inputs)

wrong_images = np.array(all_inputs)[misclassified]
wrong_labels = np.array(all_binary_pred)[misclassified]
actual_labels = np.array(all_targets)[misclassified]
losses = np.array(all_loss)[misclassified]

wrong_images = util.unzero_center(wrong_images, P.MEAN_PIXEL)
wrong_images = wrong_images.transpose(0,2,3,1)

#Order by loss
sorted_order = np.argsort(losses)[::-1]

wrong_images = wrong_images[sorted_order]
wrong_labels = wrong_labels[sorted_order]
actual_labels = actual_labels[sorted_order]
losses = losses[sorted_order]

In [None]:
%matplotlib inline


import matplotlib.pyplot as plt
import util
from dataset import label_name


for plot_number in range(0, len(wrong_images), 16):
    
    if plot_number//16 >= 2: #Lets not plot too many
        break
        
    f, axarr = plt.subplots(4,4,figsize=(16,16))
    
    for i in range(min(16, len(wrong_images)-plot_number)):
        
        x = int(i%4)
        y = int(i/4)
        
        i = plot_number+i
        axarr[y,x].imshow(wrong_images[i])
        axarr[y,x].set_title(label_name(wrong_labels[i])+", is actually " + label_name(actual_labels[i]) + " " + str(losses[i]))
        axarr[y,x].axis('off')
        
    plt.subplots_adjust(wspace = -0.2, hspace=0.12)
    plt.show()

In [None]:
import sklearn.metrics
cm = sklearn.metrics.confusion_matrix(all_targets, all_binary_pred)
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

label_names = ['Benign', 'DCIS', 'IDC']

plt.figure(figsize=(8,8))
plt.imshow(cm_normalized, interpolation='nearest')
plt.xticks(np.arange(0,3), label_names)
plt.yticks(np.arange(0,3), label_names)
plt.tight_layout()
plt.colorbar()
plt.show()

cm_normalized


In [None]:
# From https://gist.github.com/zachguo/10296432
def print_cm(cm, labels, hide_zeroes=False, hide_diagonal=False, hide_threshold=None):
    """pretty print for confusion matrixes"""
    columnwidth = max([len(x) for x in labels]+[7]) # 7 is value length
    empty_cell = " " * columnwidth
    # Print header
    print "    " + empty_cell,
    for label in labels: 
        print "%{0}s".format(columnwidth) % label,
    print
    # Print rows
    for i, label1 in enumerate(labels):
        print "    %{0}s".format(columnwidth) % label1,
        for j in range(len(labels)): 
            cell = "%{0}.3f".format(columnwidth) % cm[i, j]
            if hide_zeroes:
                cell = cell if float(cm[i, j]) != 0 else empty_cell
            if hide_diagonal:
                cell = cell if i != j else empty_cell
            if hide_threshold:
                cell = cell if cm[i, j] > hide_threshold else empty_cell
            print cell,
        print 
    
print_cm(cm_normalized, label_names)