In [None]:
from tf_weldon import WeldonPooling

In [None]:
base_model.summary()

In [3]:
from keras.applications.resnet50 import ResNet50
from keras.preprocessing import image
from keras.applications.resnet50 import preprocess_input, decode_predictions
import numpy as np
import tensorflow as tf

model = ResNet50(include_top=False, weights='imagenet', pooling=None)
for i, layer in enumerate(model.layers):
    print(i, layer)
#model.summary()

0 <keras.engine.topology.InputLayer object at 0x7f8abc431b70>
1 <keras.layers.convolutional.ZeroPadding2D object at 0x7f8abc431860>
2 <keras.layers.convolutional.Conv2D object at 0x7f8abc431978>
3 <keras.layers.normalization.BatchNormalization object at 0x7f8abc4310b8>
4 <keras.layers.core.Activation object at 0x7f8abc4317f0>
5 <keras.layers.pooling.MaxPooling2D object at 0x7f8abc41ec88>
6 <keras.layers.convolutional.Conv2D object at 0x7f8abc427ac8>
7 <keras.layers.normalization.BatchNormalization object at 0x7f89eeb1d780>
8 <keras.layers.core.Activation object at 0x7f89eee67320>
9 <keras.layers.convolutional.Conv2D object at 0x7f89ee21f9b0>
10 <keras.layers.normalization.BatchNormalization object at 0x7f89ee212c50>
11 <keras.layers.core.Activation object at 0x7f89ec3622e8>
12 <keras.layers.convolutional.Conv2D object at 0x7f89ec3733c8>
13 <keras.layers.convolutional.Conv2D object at 0x7f89ec2d42e8>
14 <keras.layers.normalization.BatchNormalization object at 0x7f89ec33eac8>
15 <keras.l

In [7]:
for i, layer in enumerate(model.layers[142:]):
    print(i, layer.name)

for layer in model.layers[:142]:
    layer.trainable = False
    
model.summary()

0 res5a_branch2a
1 bn5a_branch2a
2 activation_139
3 res5a_branch2b
4 bn5a_branch2b
5 activation_140
6 res5a_branch2c
7 res5a_branch1
8 bn5a_branch2c
9 bn5a_branch1
10 add_46
11 activation_141
12 res5b_branch2a
13 bn5b_branch2a
14 activation_142
15 res5b_branch2b
16 bn5b_branch2b
17 activation_143
18 res5b_branch2c
19 bn5b_branch2c
20 add_47
21 activation_144
22 res5c_branch2a
23 bn5c_branch2a
24 activation_145
25 res5c_branch2b
26 bn5c_branch2b
27 activation_146
28 res5c_branch2c
29 bn5c_branch2c
30 add_48
31 activation_147
32 avg_pool
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            (None, None, None, 3 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, None, None, 3 0        

In [None]:
import torch
import torch.nn as nn
from torch.autograd import Function


class WeldonPool2dFunction(Function):

    def __init__(self, kmax, kmin):
        super(WeldonPool2dFunction, self).__init__()
        self.kmax = kmax
        self.kmin = kmin

    def get_number_of_instances(self, k, n):
        if k <= 0:
            return 0
        elif k < 1:
            return round(k * n)
        elif k > n:
            return int(n)
        else:
            return int(k)

    def forward(self, input):
        # get batch information
        batch_size = input.size(0)
        num_channels = input.size(1)
        h = input.size(2)
        w = input.size(3)

        # get number of regions
        n = h * w

        # get the number of max and min instances
        kmax = self.get_number_of_instances(self.kmax, n)
        kmin = self.get_number_of_instances(self.kmin, n)

        # sort scores
        sorted, indices = input.new(), input.new().long()
        torch.sort(input.view(batch_size, num_channels, n), dim=2, descending=True, out=(sorted, indices))

        # compute scores for max instances
        self.indices_max = indices.narrow(2, 0, kmax)
        output = sorted.narrow(2, 0, kmax).sum(2).div_(kmax)

        if kmin > 0:
            # compute scores for min instances
            self.indices_min = indices.narrow(2, n-kmin, kmin)
            output.add_(sorted.narrow(2, n-kmin, kmin).sum(2).div_(kmin)).div_(2)

        # save input for backward
        self.save_for_backward(input)
        # return output with right size
        return output.view(batch_size, num_channels)

    def backward(self, grad_output):

        # get the input
        input, = self.saved_tensors

        # get batch information
        batch_size = input.size(0)
        num_channels = input.size(1)
        h = input.size(2)
        w = input.size(3)

        # get number of regions
        n = h * w

        # get the number of max and min instances
        kmax = self.get_number_of_instances(self.kmax, n)
        kmin = self.get_number_of_instances(self.kmin, n)

        # compute gradient for max instances
        grad_output_max = grad_output.view(batch_size, num_channels, 1).expand(batch_size, num_channels, kmax)
        grad_input = grad_output.new().resize_(batch_size, num_channels, n).fill_(0).scatter_(2, self.indices_max, grad_output_max).div_(kmax)

        if kmin > 0:
            # compute gradient for min instances
            grad_output_min = grad_output.view(batch_size, num_channels, 1).expand(batch_size, num_channels, kmin)
            grad_input_min = grad_output.new().resize_(batch_size, num_channels, n).fill_(0).scatter_(2, self.indices_min, grad_output_min).div_(kmin)
            grad_input.add_(grad_input_min).div_(2)

        return grad_input.view(batch_size, num_channels, h, w)


class WeldonPool2d(nn.Module):

    def __init__(self, kmax=1, kmin=None):
        super(WeldonPool2d, self).__init__()
        self.kmax = kmax
        self.kmin = kmin
        if self.kmin is None:
            self.kmin = self.kmax

    def forward(self, input):
        return WeldonPool2dFunction(self.kmax, self.kmin)(input)

    def __repr__(self):
        return self.__class__.__name__ + ' (kmax=' + str(self.kmax) + ', kmin=' + str(self.kmin) + ')'


In [None]:
from_idx= 0
    
for layer in model.layers:
    layer.trainable = False
for layer in model.layers[from_idx:]:
    print(layer)
    layer.trainable = True
    

In [None]:
model.summary()

In [None]:
from config import Config
config = Config()
from TCGA_Datasets import TCGA_Dataset

dataset = TCGA_Dataset(config)

samples = dataset._partition[0]['test']
labels = dataset._partition[1]['test']
X, y = dataset.convert_to_arrays(samples, labels, phase = 'test', size = 1)

In [None]:
len(y) / config.sampling_size_val

In [None]:
def patch_to_image(y_patches, proba=True):
        
        if proba == True:
            y_image = np.array([np.mean(y_patches[i*config.sampling_size_val:(i+1)*config.sampling_size_val])for i in range(int(len(y_patches)/config.sampling_size_val))]).reshape((-1,1))
        else:
            y_image = np.array([np.mean(y_patches[i*config.sampling_size_val:(i+1)*config.sampling_size_val])>0.5 for i in range(len(y_patches/config.sampling_size_val))]).reshape((-1,1))
        y_image = y_image.flatten()
        return y_image

In [None]:
import numpy as np 
len(patch_to_image(y, proba = False))
len(y)
#len(y/config.sampling_size_val)

In [None]:
import pandas as pd 
import numpy as np 
import os 
from sklearn.preprocessing import LabelEncoder

images_ids = os.listdir('/labs/gevaertlab/data/cedoz/patches_448')
labels_ids = list(pd.read_excel('TCGA-MICCAI-Patients.xlsx',  index_col = 'Patient').index)
labels = np.intersect1d(labels_ids, images_ids)
table = pd.read_excel('TCGA-MICCAI-Patients.xlsx', index_col = 'Patient')

c = table[table.index.isin(labels)]
le = LabelEncoder()

binarized_data = c.apply(le.fit_transform)
print(le.classes_)
test_data = pd.read_table('MICCAI_labels.txt', index_col = 0, delim_whitespace = True, header = 0)
y_test = test_data.apply(le.fit).values.flatten()
#binarized_data.values.flatten()
test_data.apply(le.fit_transform).values.flatten()

In [None]:
import pandas as pd 
df = pd.read_excel('TCGA-MICCAI-Patients.xlsx',index_col = 'Patient')
df = df[df.index.isin(labels)]
le = LabelEncoder()
binarized_data = df.apply(le.fit_transform).values.flatten()
test_data = pd.read_table('MICCAI_labels.txt', index_col = 0, delim_whitespace = True, header = 0)
ids_test = test_data.index
le.classes_

In [None]:
output_data = pd.read_table('MICCAI_labels.txt', index_col = 0, delim_whitespace = True, header = 0)
output_data.apply(le.fit_transform).values.flatten()


In [None]:
import numpy as np
a = [1,2,3,4,5,6,7,8,9,10]
np.repeat(a, 8)



In [None]:
from config import Config

In [None]:
config = Config()

In [None]:
from tensorflow.contrib.slim.python.slim.nets import resnet_v1
import tensorflow.contrib.slim as slim


In [None]:

def get_binarized_data():

    output_data = pd.read_table('MICCAI_labels.txt', index_col = 0, delim_whitespace = True, header = 0)
    binarized_data = output_data.apply(lambda x: LabelBinarizer().fit_transform(x)[:, 0], axis=0)
    return binarized_data

def get_partition(self):

    samples = os.listdir("/labs/gevaertlab/data/MICCAI/patches_448")
    np.random.shuffle(samples)
    idx_val = int((1-self.config.val_size)*len(samples))
    idx_test = int((1 - config.test_size) * len(samples))
    train_samples, val_samples, test_samples = np.split(samples, [idx_val, idx_test])
    train_samples, val_samples, test_samples = list(train_samples), list(val_samples), list(test_samples)
    train_ids = self.get_ids(train_samples)
    self.partition = {'train': train_samples, 'val': val_samples, 'test': test_samples}

def get_ids(self, samples):

    ids = []
    for sample in samples:
        patches = os.listdir("/labs/gevaertlab/data/MICCAI/patches_448/%s" % sample)
        patches = np.random.choice(patches, size=self.config.sampling_size_train, replace=True)
        for patch in patches:
            ID = "%s/patches_%d/%s/%s"%(self.config.data_path, self.config.patch_size, sample, patch)
            ids.append(ID)

    return ids

def get_labels(self):

    samples = os.listdir("/labs/gevaertlab/data/MICCAI/patches_448"%(self.config.data_path,self.config.patch_size))
    self.labels = {}
    data = self.get_binarized_data()
    for i in data.columns:
        self.labels[i] = {}
        for s in samples:
            self.labels[i][s] = data.loc[i,s]

In [None]:
import pandas as pd 
from sklearn.preprocessing import LabelBinarizer
import os
import numpy as np
from config import Config
from PIL import Image


config = Config()


def get_binarized_data():

    output_data = pd.read_table('MICCAI_labels.txt', index_col = 0, delim_whitespace = True, header = 0)
    binarized_data = output_data.apply(lambda x: LabelBinarizer().fit_transform(x)[:, 0], axis=0)
    return binarized_data

def get_labels():
    samples = os.listdir("/labs/gevaertlab/data/MICCAI/patches_448")
    labels = {}
    data = get_binarized_data()
    for i in data.columns:
        labels[i] = {}
        for s in samples:
            labels[i][s] = data.loc[s,i]
    return labels

def get_ids(samples):

    ids = []
    for sample in samples:
        patches = os.listdir("/labs/gevaertlab/data/MICCAI/patches_448/%s" % sample)
        patches = np.random.choice(patches, size= config.sampling_size_train, replace=True)
        for patch in patches:
            ID = "/labs/gevaertlab/data/MICCAI/patches_448/%s/%s"%(sample, patch)
            ids.append(ID)

    return ids


def get_partition():

    samples = os.listdir("/labs/gevaertlab/data/MICCAI/patches_448")
    np.random.shuffle(samples)
    idx_val = int((1-config.val_size)*len(samples))
    idx_test = int((1 - config.test_size) * len(samples))
    train_samples, val_samples, test_samples = np.split(samples, [idx_val, idx_test])
    train_samples, val_samples, test_samples = list(train_samples), list(val_samples), list(test_samples)
    train_ids = get_ids(train_samples)
    partition = {'train': train_samples, 'val': val_samples, 'test': test_samples}
    return partition
    
def convert_to_arrays(samples, labels):
        
        X, ids = [], []
        for sample in samples:
            patches = os.listdir("/labs/gevaertlab/data/MICCAI/patches_448/%s"%sample)
            patches = np.random.choice(patches, size=config.sampling_size_val, replace=True)
            for patch in patches:
                ID = "/labs/gevaertlab/data/MICCAI/patches_448/%s/%s"% (sample, patch)
                ids.append(ID)
                img = Image.open(ID)
                image = np.array(img)[:,:,:3]
                X.append(image)  
        X = np.asarray(X)
        
        for label in labels.keys():
            y_label = []
            for ID in ids:
                sample = ID.split('/')[-2]
                y_label.append(labels[label][sample])
            y = np.asarray(y_label)

        return X, y

In [None]:
samples =  get_partition()['train']
labels = get_labels()
convert_to_arrays(samples,labels)