In [None]:
# Extract tiles
!unzip "/content/drive/MyDrive/Data Science/DS projects/MIL CCRCC/40_20/Train_500.zip" -d "/content/Train_500" # Train dataset 500 samples
!unzip "/content/drive/MyDrive/Data Science/DS projects/MIL CCRCC/40_20/Train_1000.zip" -d "/content/Train_1000" # Train dataset 1000 samples
!unzip "/content/drive/MyDrive/Data Science/DS projects/MIL CCRCC/40_20/Train_2000.zip" -d "/content/Train_2000" # Train dataset 2000 samples
!unzip "/content/drive/MyDrive/Data Science/DS projects/MIL CCRCC/40_20/Validation_100.zip" -d "/content/Validation" # Validation dataset

In [None]:
!mv "/content/Train_500/Train_500/df.csv" "/content/Train_500"
!mv "/content/Train_1000/Train_1000/df.csv" "/content/Train_1000"
!mv "/content/Train_2000/Train_2000/df.csv" "/content/Train_2000"
!mv "/content/Validation/Validation_100/df.csv" "/content/Validation"

# Import libraries

In [None]:
import numpy as np
from os import listdir
import os
import pandas as pd
import tensorflow as tf
import sys
sys.path.append('/content/drive/MyDrive/Data Science/DS projects/MIL CCRCC/py_files')
from CustomDataGenerator import CustomDataGenerator
from SimpleModel import SimpleModel

# Functions defenition

In [None]:
# Create pandas Dataframe
def make_dataframe(dir_inst, dir_labels, shuffle = True):
        """ make pandas DataFrame from files in folders
    Parameters
    -------------------
    dir_inst (str) - path to directory fith folders, where each folder is a bag of instances
    dir_labels (str) - path to pandas DataFrame with labels
    shuffle (boolean) - shuffle DataFrame. Default - True
    Returns
    -------------------
    pandas.DataFrame
    """
        cases = sorted(os.listdir(dir_inst))
        print('There are ' + str(len(cases)) + ' folders in directory')
        folders = list()
        for root, dirs, files in os.walk(dir_inst, topdown=False):
            for name in dirs:
                folders.append(os.path.join(root, name))
        data = list()
        for folder in folders:
            images = [folder + '/'+ x for x in os.listdir(folder)]
            data.append(images)
        df = pd.DataFrame({'patient': cases,'X_col': sorted(data)})
        
        # marge with label dataframe
        label_df = pd.read_csv(dir_labels)
        df['patient'] = df['patient'].astype('int')
        df = df.merge(label_df, left_on = 'patient', right_on = 'case', how = 'inner').loc[:,['case','X_col','y_true']]
        
        # shuffle condition
        if shuffle == True:
          df = df.sample(frac = 1,ignore_index = True)
        else:
          df = df.sort_values(by = ['case'], ignore_index = True)
          
        print('_ _ _ _ _ Pandas dataframe is ready _ _ _ _ _')
        return df


# Train SimpleModel (Model_500) with dataset, which contains 500 bags of instances

In [None]:
train_df = make_dataframe(dir_inst = '/content/Train_500/Train_500', 
                            dir_labels = '/content/Train_500/df.csv', 
                            shuffle = True)
val_df = make_dataframe(dir_inst = '/content/Validation/Validation_100', 
                            dir_labels = '/content/Validation/df.csv', 
                            shuffle = False)

train_gen_500 = CustomDataGenerator (df = train_df, shuffle = True, augmentations = True )
val_gen_500 = CustomDataGenerator (df = val_df, shuffle = False, augmentations = False )


In [None]:
# Callbacks

model_checkpoint = keras.callbacks.ModelCheckpoint(
        '/content/drive/MyDrive/Data Science/DS projects/MIL CCRCC/40_20/Models/Simple_Model_500/Simple_Model_500',
        monitor="val_loss",
        verbose=1,
        mode="min",
        save_best_only=True,
        save_weights_only= False,
    )
csv_logger = tf.keras.callbacks.CSVLogger('/content/Simple_Model_500.csv', 
                                          separator=",", 
                                          append=True)

es = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss",
    patience=10,
    verbose=1,
    mode="min")



In [None]:
opt = tf.keras.optimizers.Adam(learning_rate=1e-3, decay=0.0005, beta_1=0.9, beta_2=0.999)
model_500 = SimpleModel(bag_size = 40, instance_shape = (256, 256, 3) )
model_500.compile(optimizer = opt, 
    loss='categorical_crossentropy', metrics=["accuracy",tf.keras.metrics.AUC(name = 'AUC'),
                                                        tf.keras.metrics.AUC(curve = 'PR',name = 'PR_AUC'), 
                                                        tf.keras.metrics.Precision(name = 'Precision',class_id = 1),
                                                        tf.keras.metrics.Recall(name = 'Recall',class_id = 1)])

In [None]:
model_500.fit(
        train_gen_500,
        validation_data = val_gen_500 ,
        epochs=100,
        batch_size= 1,
        callbacks=[model_checkpoint,csv_logger,es], 
        verbose=1)

# Train SimpleModel (Model_1000) with dataset, which contains 1000 bags of instances

In [None]:
train_df = make_dataframe(dir_inst = '/content/Train_1000/Train_1000', 
                            dir_labels = '/content/Train_1000/df.csv', 
                            shuffle = True)
val_df = make_dataframe(dir_inst = '/content/Validation/Validation_100', 
                            dir_labels = '/content/Validation/df.csv', 
                            shuffle = False)

train_gen_1000 = CustomDataGenerator (df = train_df, shuffle = True, augmentations = True )
val_gen_1000 = CustomDataGenerator (df = val_df, shuffle = False, augmentations = False )


In [None]:
# Callbacks

model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
        '/content/drive/MyDrive/Data Science/DS projects/MIL CCRCC/40_20/Models/Simple_Model_1000/Simple_Model_1000',
        monitor="val_loss",
        verbose=1,
        mode="min",
        save_best_only=True,
        save_weights_only= False)
        
csv_logger = tf.keras.callbacks.CSVLogger('/content/Simple_Model_1000.csv', 
                                          separator=",", 
                                          append=True)

es = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss",
    patience=10,
    verbose=1,
    mode="min")



In [None]:
opt = tf.keras.optimizers.Adam(learning_rate=1e-3, decay=0.0005, beta_1=0.9, beta_2=0.999)
model_1000 = SimpleModel(bag_size = 40, instance_shape = (256, 256, 3) )
model_1000.compile(optimizer = opt, 
    loss='categorical_crossentropy', metrics=["accuracy",tf.keras.metrics.AUC(name = 'AUC'),
                                                        tf.keras.metrics.AUC(curve = 'PR',name = 'PR_AUC'), 
                                                        tf.keras.metrics.Precision(name = 'Precision',class_id = 1),
                                                        tf.keras.metrics.Recall(name = 'Recall',class_id = 1)])

In [None]:
model_1000.fit(
        train_gen_1000,
        validation_data = val_gen_1000 ,
        epochs=100,
        batch_size= 1,
        callbacks=[model_checkpoint,csv_logger,es],
        verbose=1)

# Train SimpleModel (Model_2000) with dataset, which contains 2000 bags of instances

In [None]:
train_df = make_dataframe(dir_inst = '/content/Train_2000/Train_2000', 
                            dir_labels = '/content/Train_2000/df.csv', 
                            shuffle = True)
val_df = make_dataframe(dir_inst = '/content/Validation/Validation_100', 
                            dir_labels = '/content/Validation/df.csv', 
                            shuffle = False)

train_gen_2000 = CustomDataGenerator (df = train_df, shuffle = True, augmentations = True )
val_gen_2000 = CustomDataGenerator (df = val_df, shuffle = False, augmentations = False )

In [None]:
# Callbacks

model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
        '/content/drive/MyDrive/Data Science/DS projects/MIL CCRCC/40_20/Models/Simple_Model_2000/Simple_Model_2000',
        monitor="val_loss",
        verbose=1,
        mode="min",
        save_best_only=True,
        save_weights_only= False)
        
csv_logger = tf.keras.callbacks.CSVLogger('/content/Simple_Model_2000.csv', 
                                          separator=",", 
                                          append=True)

es = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss",
    patience=10,
    verbose=1,
    mode="min")

In [None]:
opt = tf.keras.optimizers.Adam(learning_rate=1e-3, decay=0.0005, beta_1=0.9, beta_2=0.999)
model_2000 = SimpleModel(bag_size = 40, instance_shape = (256, 256, 3) )
model_2000.compile(optimizer = opt, 
    loss='categorical_crossentropy', metrics=["accuracy",tf.keras.metrics.AUC(name = 'AUC'),
                                                        tf.keras.metrics.AUC(curve = 'PR',name = 'PR_AUC'), 
                                                        tf.keras.metrics.Precision(name = 'Precision',class_id = 1),
                                                        tf.keras.metrics.Recall(name = 'Recall',class_id = 1)])

In [None]:
model_2000.fit(
        train_gen_2000,
        validation_data = val_gen_2000 ,
        epochs=100,
        batch_size= 1,
        callbacks=[model_checkpoint,csv_logger,es],
        verbose=1)