<a href="https://colab.research.google.com/github/davidseroussi/owkin-lung/blob/master/3DUnet_Survival.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from google.colab import drive
drive.mount('/content/drive')

!cp /content/drive/"My Drive"/chall_owkin/images.zip .
!unzip images.zip

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Archive:  images.zip
replace images/patient_003.npz? [y]es, [n]o, [A]ll, [N]one, [r]ename: N


In [0]:
!pip install git+https://www.github.com/farizrahman4u/keras-contrib.git
!pip install keras==2.2.4
!pip install scikit-survival
!git clone https://github.com/davidseroussi/3DUnetCNN.git

In [0]:
cd 3DUnetCNN/

/content/3DUnetCNN


In [0]:
import numpy as np
import keras
import cv2
import os
import matplotlib.pyplot as plt

Using TensorFlow backend.


In [0]:
class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, list_item_paths,dir_path, batch_size=1, dim=(80,80,80), n_channels=1,
                 n_classes=10, shuffle=True):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.list_item_paths = list_item_paths
        self.dir_path = dir_path
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_item_paths) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_item_paths_temp = [self.list_item_paths[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_item_paths_temp)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_item_paths))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_item_paths_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, self.n_channels, *self.dim))
        y = np.empty((self.batch_size, self.n_channels, *self.dim))

        # Generate data
        for i, item_path in enumerate(list_item_paths_temp):
            # Store sample

            archive = np.load(self.dir_path + item_path)
            scan = archive['scan']
            mask = archive['mask']

            scan = cv2.resize(scan, dsize=(80, 80))[:,:,6:-6]
            mask = cv2.resize(mask.astype(np.uint8), dsize=(80, 80))[:,:,6:-6]

            X[i,] = np.expand_dims(scan, axis=0)

            # Store class
            y[i] = np.expand_dims(mask, axis=0)

        return X, y

In [0]:
dir_path = '../images/'
archives = os.listdir(dir_path)

np.random.shuffle(archives)

index_split = int(0.8*len(archives))

train_gen = DataGenerator(archives[:index_split], dir_path)
test_gen = DataGenerator(archives[index_split:], dir_path, batch_size=2)


In [0]:
from unet3d.model import isensee2017_model
model = isensee2017_model((1, 80, 80, 80), n_labels=1)

In [0]:
checkpoint = keras.callbacks.ModelCheckpoint("/content/drive/My Drive/chall_owkin/3DUnet.h5", save_best_only=True)
early_stopping = keras.callbacks.EarlyStopping(monitor="val_loss", patience=50)
reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10)
csv_logger = keras.callbacks.CSVLogger('/content/drive/My Drive/chall_owkin/training.log')

history = model.fit_generator(train_gen, validation_data=test_gen, epochs=500, callbacks=[checkpoint, early_stopping, reduce_lr, csv_logger])

In [0]:
# Plot training & validation loss values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

In [0]:
from unet3d.training import load_old_model
model = load_old_model("/content/drive/My Drive/chall_owkin/3DUnet.h5")

In [0]:
dir_path = '../images/'
archives = os.listdir(dir_path)
train_gen = DataGenerator(['patient_002.npz'], dir_path)

for X,y in train_gen:
  pred = model.predict(X)
  break

plt.imshow(X[0][0][:,:,10], cmap='gray')
plt.imshow(y[0][0][:,:,20], cmap='gray')
plt.imshow(pred[0][0][:,:,20], cmap='gray')

In [0]:
from keras.models import Model

layer_name = 'leaky_re_lu_13'

intermediate_model = keras.layers.GlobalAveragePooling3D()(model.get_layer(layer_name).output)

intermediate_model = Model(inputs=model.input,
                           outputs=intermediate_model)

In [0]:
ls /content/drive/'My Drive'/chall_owkin/

3DUnet.h5  [0m[01;34mfeatures[0m/  images.zip  output.csv  training.log


In [0]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import cross_val_score
import sksurv

train_clinical = pd.read_csv('/content/drive/My Drive/chall_owkin/features/clinical_data.csv')
train_clinical = train_clinical.set_index('PatientID')

train_radiomics = pd.read_csv('/content/drive/My Drive/chall_owkin/features/radiomics.csv', index_col=0, header=1)[1:]
train_radiomics.index = train_radiomics.index.astype(int)
train_radiomics.sort_index(inplace=True)

train_output = pd.read_csv('/content/drive/My Drive/chall_owkin/output.csv', index_col=0)
train_output.sort_index(inplace=True)

X = train_clinical[['SourceDataset', 'Nstage']]
le = LabelEncoder()
X['SourceDataset'] = le.fit_transform(X['SourceDataset'])
X.sort_index(inplace=True)

y = sksurv.util.Surv.from_dataframe('Event', 'SurvivalTime', train_output)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [0]:
dir_path = '../images/'
archives = os.listdir(dir_path)
archives = np.sort(archives)
train_gen = DataGenerator(archives, dir_path, shuffle=False)

In [0]:
feature_preds = intermediate_model.predict_generator(train_gen)

In [0]:
df_feature = pd.DataFrame(feature_preds)
df_feature.set_index(X.index, inplace=True)

In [0]:
concat = pd.concat([X, df_feature], axis=1)

In [0]:
from sksurv.ensemble import RandomSurvivalForest
model_rf = RandomSurvivalForest(n_estimators=1000)

In [0]:
model_rf.fit(concat, y)

RandomSurvivalForest(bootstrap=True, max_depth=None, max_features='auto',
                     max_leaf_nodes=None, min_samples_leaf=3,
                     min_samples_split=6, min_weight_fraction_leaf=0.0,
                     n_estimators=1000, n_jobs=None, oob_score=False,
                     random_state=None, verbose=0, warm_start=False)

In [0]:
model_rf.score(concat, y)

0.9438223571835263

In [0]:
from sklearn.model_selection import cross_val_score
from sksurv.ensemble import RandomSurvivalForest

model_rf = RandomSurvivalForest(n_estimators=1000)

scores = cross_val_score(model_rf, train_radiomics, y, cv=3)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Accuracy: 0.66 (+/- 0.02)
