In [None]:
%matplotlib inline

import sys
sys.path.insert(0, '/home/paul/.conda/envs/tensorflow/lib/python3.6/site-packages')
#sys.path.insert(0, '/usr/local/lib/python3.5/dist-packages')

import glob
import os
import itertools
import re
from collections import Counter
import csv

import numpy
import tensorflow
import keras
import sklearn.metrics

import matplotlib.pyplot
import pandas
import seaborn

import deepometry.model

In [None]:
def _shape(pathname):
    return numpy.load(pathname).shape


def load(pathnames, labels, patient_to_exclude):

    #print('Before exclusion: ',len(pathnames))
    #pathnames = [x for x in pathnames if patient_to_exclude not in x]
    #print('After exclusion: ',len(pathnames))

    x = numpy.empty((len(pathnames),) + _shape(pathnames[0]), dtype=numpy.uint8)

    y = numpy.empty((len(pathnames),), dtype=numpy.uint8)

    label_to_index = {label: index for index, label in enumerate(sorted(labels))}

    for index, pathname in enumerate(pathnames):
        if (os.path.isfile(pathname) == True):

            label = os.path.split(os.path.dirname(pathname))[-1]

            x[index] = numpy.load(pathname)

            y[index] = label_to_index[label]

    return x, y

In [None]:
def sample(directories):

    pathnames = []

    for directory in directories:
        subdirectories = sorted(glob.glob(os.path.join(directory, "*")))

        subdirectory_pathnames = [glob.glob(os.path.join(subdirectory, "*")) for subdirectory in subdirectories]

        nsamples = max([len(pathnames) for pathnames in subdirectory_pathnames])
        #nsamples = 200000

        pathnames += [list(numpy.random.permutation(pathnames)[:nsamples]) for pathnames in subdirectory_pathnames]

    pathnames = sum(pathnames, [])

    return pathnames

In [None]:
def get_class_weights(y):
    counter = Counter(y)
    majority = max(counter.values())
    return  {cls: float(majority/count) for cls, count in counter.items()}

In [None]:
def collect_pathnames(directories, labels):

    pathnames = []

    for directory in directories:
        subdirectories = sorted(glob.glob(os.path.join(directory, "*")))
        
        # transform the files of the same label into directory
        subdirectory_pathnames = [glob.glob("{}/*.npy".format(subdirectory)) for subdirectory in subdirectories ]      

        nsamples = max([len(pathnames) for pathnames in subdirectory_pathnames])

        pathnames += [list(numpy.random.permutation(pathnames)[:nsamples]) for pathnames in subdirectory_pathnames]

    pathnames = sum(pathnames, [])

    return pathnames

In [None]:
def load_include(pathnames, labels, patient_to_include):

    print('All cells in treated patients: ',len(pathnames))
    pathnames = [x for x in pathnames if patient_to_include in x]
    print('Cells in this patient: ',len(pathnames))

    x = numpy.empty((len(pathnames),) + _shape(pathnames[0]), dtype=numpy.uint8)

    y = numpy.empty((len(pathnames),), dtype=numpy.uint8)

    label_to_index = {label: index for index, label in enumerate(sorted(labels))}

    for index, pathname in enumerate(pathnames):
        if (os.path.isfile(pathname) == True):

            label = os.path.split(os.path.dirname(pathname))[-1]

            x[index] = numpy.load(pathname)

            y[index] = label_to_index[label]

    return x, y

In [None]:
labels = ["Leukemic", "Normal", "Others"]

In [None]:
directories = ["/parsed_data/"]

In [None]:
samples = sample(directories)
len(samples)

In [None]:
patients_to_test = ['157pres','157day8','157day15','171pres','171day11','172pres','172day29','175pres','175day8','177pres', '177day8']
#patients_to_test = ['177day8']
#selected_to_train = [x for x in samples if numpy.all([not z in x for z in patients_to_test])]


In [None]:
# build session running on GPU 1
configuration = tensorflow.ConfigProto()
configuration.gpu_options.allow_growth = True
configuration.gpu_options.visible_device_list = "0"
session = tensorflow.Session(config = configuration)

In [None]:
def load_include_all(pathnames, labels, patients_to_include):

    print('All cells in treated patients: ',len(pathnames))
    pathnames = [x for x in pathnames for patient_to_include in patients_to_include if patient_to_include in x]
    print('Cells in all selected patient: ',len(pathnames))

    x = numpy.empty((len(pathnames),) + _shape(pathnames[0]), dtype=numpy.uint8)

    y = numpy.empty((len(pathnames),), dtype=numpy.uint8)

    label_to_index = {label: index for index, label in enumerate(sorted(labels))}

    for index, pathname in enumerate(pathnames):
        if (os.path.isfile(pathname) == True):

            label = os.path.split(os.path.dirname(pathname))[-1]

            x[index] = numpy.load(pathname)

            y[index] = label_to_index[label]

    return x, y

x_test, y_test = load_include_all(samples, labels, patients_to_test)        

In [None]:
xx = numpy.delete(x_test[:,:,:,6:],-2,-1)

del(x_test)
x_test = xx

In [None]:
print("Testing set: ", x_test.shape)
del(xx)  

In [None]:
model = deepometry.model.Model(shape=x_test.shape[1:], units=len(labels))

model.compile()

model_directory = str('/models/resnet_drop_' + str(6) + '_channels')

model.model.load_weights(os.path.join(model_directory,'model.h5'))        

In [None]:
evaluate_metrics = model.evaluate(x_test, y_test, batch_size=256, verbose=1)


predicted = model.predict(
    batch_size=50,
    x=x_test
)

predicted = numpy.argmax(predicted, axis=1)
# predicted = numpy.argmax(predicted, -1)
# expected = numpy.argmax(y[:, :], -1)
expected = y_test  

In [None]:
confusion = sklearn.metrics.confusion_matrix(expected, predicted)

norm_confusion = confusion.astype('float') / confusion.sum(axis=1)[:, numpy.newaxis]

norm_confusion = pandas.DataFrame(norm_confusion)

matplotlib.pyplot.figure(figsize=(12, 8))

seaborn.heatmap(norm_confusion, annot=True)
seaborn.set(font_scale = 1.5)

matplotlib.pyplot.savefig( os.path.join(model_directory, str( 'confusion_matrix_all_tested_patients.eps')) , format='eps', dpi=300)


with open(os.path.join(model_directory, str( 'metrics_all_tested_patients.csv')), "w") as metrics_csv:
    metrics_writer = csv.writer(metrics_csv)
    metrics_writer.writerow(model.model.metrics_names)
    metrics_writer.writerow(evaluate_metrics)

    numpy.save(os.path.join(model_directory, str( 'confusion_matrix_all_tested_patients.npy')), confusion)             

del(x_test)
#keras.backend.clear_session()    
    

In [None]:
for i in [6]: 
    
    for patient_to_test in patients_to_test:

        x_test, y_test = load_include(samples, labels, patient_to_test)        

#         xx = x_test[:,:,:,i:]
        xx = numpy.delete(x_test[:,:,:,6:],-2,-1)

        del(x_test)
        x_test = xx
        print('Testing : ',patient_to_test)
        print("Testing set: ", x_test.shape)
        del(xx)
        

        model = deepometry.model.Model(shape=x_test.shape[1:], units=len(labels))

        model.compile()

        model_directory = str('/models/resnet_drop_' + str(i) + '_channels')

        model.model.load_weights(os.path.join(model_directory,'model.h5'))        
        
        
        evaluate_metrics = model.evaluate(x_test, y_test, batch_size=256, verbose=1)

        
        predicted = model.predict(
            batch_size=50,
            x=x_test
        )

        predicted = numpy.argmax(predicted, axis=1)
        # predicted = numpy.argmax(predicted, -1)
        # expected = numpy.argmax(y[:, :], -1)
        expected = y_test        
               
        confusion = sklearn.metrics.confusion_matrix(expected, predicted)

        norm_confusion = confusion.astype('float') / confusion.sum(axis=1)[:, numpy.newaxis]

        norm_confusion = pandas.DataFrame(norm_confusion)

        matplotlib.pyplot.figure(figsize=(12, 8))

        seaborn.heatmap(norm_confusion, annot=True)
        
        matplotlib.pyplot.savefig( os.path.join(model_directory, str( 'confusion_matrix_'+ patient_to_test +'.eps')) , format='eps', dpi=300)
              

        with open(os.path.join(model_directory, str( 'metrics_'+ patient_to_test +'.csv')), "w") as metrics_csv:
            metrics_writer = csv.writer(metrics_csv)
            metrics_writer.writerow(model.model.metrics_names)
            metrics_writer.writerow(evaluate_metrics)

            numpy.save(os.path.join(model_directory, str( 'confusion_matrix_'+ patient_to_test +'.npy')), confusion)             

        del(x_test)
        keras.backend.clear_session()    
    