In [None]:
%matplotlib inline

# To specify the site-packages location:
import sys
sys.path.insert(0, '/home/.conda/envs/tensorflow/lib/python3.6/site-packages')

import glob
import os
import itertools
import re
from collections import Counter
import csv

import numpy
import tensorflow
import keras
import sklearn.metrics

import matplotlib.pyplot
import pandas
import seaborn

import deepometry.model

In [None]:
def _shape(pathname):
    return numpy.load(pathname).shape


def load(pathnames, labels, patient_to_exclude):

    x = numpy.empty((len(pathnames),) + _shape(pathnames[0]), dtype=numpy.uint8)

    y = numpy.empty((len(pathnames),), dtype=numpy.uint8)

    label_to_index = {label: index for index, label in enumerate(sorted(labels))}

    for index, pathname in enumerate(pathnames):
        if (os.path.isfile(pathname) == True):

            label = os.path.split(os.path.dirname(pathname))[-1]

            x[index] = numpy.load(pathname)

            y[index] = label_to_index[label]

    return x, y

In [None]:
def sample(directories):

    pathnames = []

    for directory in directories:
        subdirectories = sorted(glob.glob(os.path.join(directory, "*")))

        subdirectory_pathnames = [glob.glob(os.path.join(subdirectory, "*")) for subdirectory in subdirectories]

        #nsamples = max([len(pathnames) for pathnames in subdirectory_pathnames])
        #nsamples = min([len(pathnames) for pathnames in subdirectory_pathnames])
        nsamples = 3000

        pathnames += [list(numpy.random.permutation(pathnames)[:nsamples]) for pathnames in subdirectory_pathnames]

    pathnames = sum(pathnames, [])

    return pathnames

In [None]:
def get_class_weights(y):
    counter = Counter(y)
    majority = max(counter.values())
    return  {cls: float(majority/count) for cls, count in counter.items()}

In [None]:
def collect_pathnames(directories, labels):

    pathnames = []

    for directory in directories:
        subdirectories = sorted(glob.glob(os.path.join(directory, "*")))
        
        # transform the files of the same label into directory
        subdirectory_pathnames = [glob.glob("{}/*.npy".format(subdirectory)) for subdirectory in subdirectories ]      

        nsamples = max([len(pathnames) for pathnames in subdirectory_pathnames if '157pres' in pathnames])

        pathnames += [list(numpy.random.permutation(pathnames)[:nsamples]) for pathnames in subdirectory_pathnames]

    pathnames = sum(pathnames, [])

    return pathnames

In [None]:
def load_include(pathnames, labels, patient_to_include):

    print('All cells in treated patients: ',len(pathnames))
    #pathnames = [x for x in pathnames if patient_to_include in x]
    print('Cells in this patient: ',len(pathnames))

    x = numpy.empty((len(pathnames),) + _shape(pathnames[0]), dtype=numpy.uint8)

    y = numpy.empty((len(pathnames),), dtype=numpy.uint8)

    label_to_index = {label: index for index, label in enumerate(sorted(labels))}

    for index, pathname in enumerate(pathnames):
        if (os.path.isfile(pathname) == True):

            label = os.path.split(os.path.dirname(pathname))[-1]

            x[index] = numpy.load(pathname)

            y[index] = label_to_index[label]

    return x, y

In [None]:
labels = ["Leukemic", "Normal", "Others"]

In [None]:
directories = ["/parsed_data/"]

In [None]:
samples = sample(directories)
len(samples)

In [None]:
#patients_to_test = ['157pres','157day8','157day15','171pres','171day11','172pres','172day29','175pres','175day8','177pres','177day8']
patients_to_test = ['157pres']
selected_to_train = [x for x in samples if numpy.all([not z in x for z in patients_to_test])]

len(selected_to_train)

In [None]:
def save_metadata_label(predicted,true,labels,file):
    with open(file, 'w') as f:
        f.write('Predicted\tTrue\n')
        for i in range(predicted.shape[0]):              
            f.write('{}\t{}\n'.format( labels[predicted[i]] , labels[true[i]]))      

In [None]:
from keras.layers import *
from keras.models import Sequential
from sklearn import preprocessing
import cv2    

In [None]:
# Drop 6 channels, what left are bright-field and dark-field

for i in [6]:

    # build session running on GPU 1
    configuration = tensorflow.ConfigProto()
    configuration.gpu_options.allow_growth = True
    configuration.gpu_options.visible_device_list = "1"
    session = tensorflow.Session(config = configuration)

    # apply session
    keras.backend.set_session(session)        
  
    
    for patient_to_test in patients_to_test:

        x_test, y_test = load_include(samples, labels, patient_to_test)        

        xx = x_test[:,:,:,i:]

        #del(x_test)
        #x_test = xx
        print('Testing : ',patient_to_test)
        print("Testing set: ", xx.shape)
        #del(xx)       

        model = deepometry.model.Model(shape=xx.shape[1:], units=len(labels))

        model.compile()

        model_directory = str('/models/resnet_drop_' + str(i) + '_channels')

        model.model.load_weights(os.path.join(model_directory,'model.h5'))      
        

        predicted = model.predict(
            batch_size=50,
            x=xx
        )

        predicted = numpy.argmax(predicted, axis=1)
        save_metadata_label(predicted,y_test,labels,'metadata_label_'+patient_to_test+'.tsv')
        

        layers = model.model.layers
        abstract_model = None # Clear cached abstract_model
        abstract_model = Sequential([layers[-2]])

        extracted_features = abstract_model.predict(
            batch_size=50,
            x=xx
        )
        
        numpy.save('scaled_features_'+patient_to_test+'.npy' , extracted_features)
        # Optional: scale features before saving
        #numpy.save('scaled_features_'+patient_to_test+'.npy' , preprocessing.scale(extracted_features))

In [None]:
def images_to_sprite(data):
    """Creates the sprite image along with any necessary padding
    Args:
      data: NxHxW[x3] tensor containing the images.
    Returns:
      data: Properly shaped HxWx3 image with any necessary padding.
    """
    if len(data.shape) == 3:
        data = np.tile(data[...,np.newaxis], (1,1,1,3))
    data = data.astype(np.float32)
    min = np.min(data.reshape((data.shape[0], -1)), axis=1)
    data = (data.transpose(1,2,3,0) - min).transpose(3,0,1,2)
    max = np.max(data.reshape((data.shape[0], -1)), axis=1)
    data = (data.transpose(1,2,3,0) / max).transpose(3,0,1,2)
    # Inverting the colors seems to look better for MNIST
    #data = 1 - data

    n = int(np.ceil(np.sqrt(data.shape[0])))
    padding = ((0, n ** 2 - data.shape[0]), (0, 0),
            (0, 0)) + ((0, 0),) * (data.ndim - 3)
    data = np.pad(data, padding, mode='constant',
            constant_values=0)
    # Tile the individual thumbnails into an image.
    data = data.reshape((n, n) + data.shape[1:]).transpose((0, 2, 1, 3)
            + tuple(range(4, data.ndim + 1)))
    data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:])
    data = (data * 255).astype(np.uint8)
    return data

In [None]:
a = numpy.empty((x_test.shape[0],48,48,3))

In [None]:
a[:,:,:,0] = x_test[:,:,:,0]
a[:,:,:,1] = x_test[:,:,:,7]
a[:,:,:,2] = x_test[:,:,:,7]

In [None]:
a.shape

In [None]:
sprite = images_to_sprite(a)
cv2.imwrite('sprite.png', sprite)