In [2]:
!jt -N -T -f firacode -tf loraserif -nf latosans -fs 100 -tfs 100 \
-nfs 100 -t monokai -cellw 1000

# 458: A4 Xception

# import

In [3]:
# tensorflow tools #
import tensorflow as tf
from tensorflow import keras 
from tensorflow.keras.models import Sequential
from tensorflow.keras import Model
from tensorflow.keras.applications import Xception
from tensorflow.keras.applications.xception import preprocess_input
from tensorflow.keras.layers import (Dense, Input, MaxPooling2D, MaxPool2D, GlobalAveragePooling2D,
                                     Conv2D, Flatten, Dropout, BatchNormalization) 
from tensorflow.keras.activations import sigmoid, softmax, relu
from tensorflow.keras.optimizers import RMSprop, Adam, Adagrad, SGD
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
# ml viz
from tensorboard.plugins.hparams import api as hp 
%load_ext tensorboard
import pydot 
import graphviz 
# sklearn tools #
import sklearn 
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.decomposition import PCA, TruncatedSVD
# image tools # 
from tensorflow.keras.preprocessing import image 
import cv2
import imutils
# data handling tools #
import numpy as np
import pandas as pd 
# plotting tools #
import matplotlib.pyplot as plt 
import seaborn as sns 
# general tools #
import datetime
from collections import Counter
import os 
pjoin = os.path.join
import shutil
import gc
import pickle 
import itertools
import re 
import string 
# import kaggle

In [11]:
colab = False

if colab:
    from google.colab import drive
    drive.mount('/content/drive')

    !cp -rf ./drive/My\ Drive/458_a4/train . 
    !cp -rf ./drive/My\ Drive/458_a4/val . 
    !cp -rf ./drive/My\ Drive/458_a4/test . 

# paths

In [12]:
train_path, val_path, test_path = './train', './val', './test'
if colab:
    model_path = './drive/My Drive/458_a4/model'
    bm_lists_path = './drive/My Drive/458_a4/bm_lists'
    tb_log_path = './drive/My Drive/458_a4/tb_runlog'
else:
    model_path = './model'
    bm_lists_path = './bm_lists'
    tb_log_path = './tb_runlog'

    
path_ls = [train_path, val_path, test_path, model_path, bm_lists_path, tb_log_path]

for path in path_ls:
    if not os.path.exists(path):
        os.mkdir(path)

# reproducibility & maintenance

In [13]:
keras.backend.clear_session()
tf.random.set_seed(38)
np.random.seed(38)
_= gc.collect() 

# data view

In [14]:
for maindir in [train_path, val_path, test_path]:
    for subdir in os.listdir(maindir):
        if os.path.isdir(os.path.join(maindir, subdir)):
            n = len(os.listdir(os.path.join(maindir, subdir)))
            print(f'{maindir}/{subdir}:'.rjust(30) + f'{n}'.rjust(5))


             ./train/no_tumor:  262
     ./train/meningioma_tumor:  262
         ./train/glioma_tumor:  262
      ./train/pituitary_tumor:  262
               ./val/no_tumor:   65
       ./val/meningioma_tumor:   65
           ./val/glioma_tumor:   65
        ./val/pituitary_tumor:   65
              ./test/no_tumor:   81
      ./test/meningioma_tumor:  113
          ./test/glioma_tumor:   84
       ./test/pituitary_tumor:   36


# data generators

In [15]:
# train generator instantiation 
train_im_datagen = image.ImageDataGenerator(
                                      rescale=1./255, 
                                     rotation_range=.15, 
                                     width_shift_range=0, 
                                     height_shift_range=0, 
                                     brightness_range=(.1, .9),
                                     shear_range=.15,
                                     zoom_range=0,
                                     horizontal_flip=True,
                                     vertical_flip=True,
                                     data_format='channels_last',
                                     validation_split=0, 
                                    #  preprocessing_function=preprocess_input
                                     )
# test generator instantiation 
test_im_datagen = image.ImageDataGenerator(
    rescale=1./255, 
    # preprocessing_function=preprocess_input
    )

In [16]:
def get_data_gens(batch_size):
    ''' Given the batch size, returns a tuple of generators from the 
        designated train, val, and test paths:  (traingen, valgen, testgen). '''
    # train data flow inititalized #
    traingen = train_im_datagen.flow_from_directory(train_path, 
                                                    target_size=target_size, 
                                                    color_mode='rgb',
                                                    class_mode='categorical', 
                                                    batch_size=batch_size, 
                                                    shuffle=True,
                                                    seed=38)
    # val data flow inititalized #
    valgen = test_im_datagen.flow_from_directory(val_path, 
                                                 target_size=target_size, 
                                                 color_mode='rgb',
                                                 class_mode='categorical', 
                                                 batch_size=batch_size, 
                                                 shuffle=False,
                                                 seed=38)
    # test data flow inititalized #
    testgen = test_im_datagen.flow_from_directory(test_path, 
                                                  target_size=target_size, 
                                                  color_mode='rgb',
                                                  class_mode='categorical', 
                                                  batch_size=batch_size, 
                                                  shuffle=False,
                                                  seed=38)
    
    return traingen, valgen, testgen

# classes

In [17]:
class Clock():
    ''' A simple clock class that prints or hands back the elapsed time between 
        start and stop calls in a human friendly format. '''
    import datetime
    def __init__(self):
        self.running = False
        self.start_time = None
        self.stop_time = None
        self.elapsed = None
        
    def start(self):
        self.running = True            
        self.start_time = datetime.datetime.now()
        
    def stop(self, stdout=True, handback=False):
        if self.running:
            self.running = False
            self.end_time = datetime.datetime.now()
            self.delta = str(self.end_time - self.start_time).split(':')
            self.delta[2] = self.delta[2][:2]
#             self.elapsed = 'hours:{0[0]}, minutes:{0[1]}, seconds:{0[2]}'.format(self.delta)
            self.elapsed = 'minutes:{0[1]}, seconds:{0[2]}'.format(self.delta)
            if stdout:
                print(self.elapsed)
            if handback:
                return self.elapsed
            
    def __repr__(self):
        if self.running:
            return 'The clock is running!'
        else:
            return 'The clock is not running.'

In [32]:
clock = Clock() 

# Xception

In [18]:
keras.backend.clear_session()
_= gc.collect() 

## hyperparams

In [23]:
epochs = 2 
target_size = (224,224)
input_shape = (224,224,3)
classes_dim = 4

In [24]:
train_base = [False]
optimizer = ['adam', 'rmsprop']
lr = [.00001, .001]
momentum = [0., .5, .9]
do_rate = [0., .2, .5]
batch_normalize = [True, False]
batch_size = [64, 128]
dense_units = [128, 512, 1028]

hyperparams = list(itertools.product(train_base, optimizer, lr, momentum, do_rate, 
                                     batch_normalize, batch_size, dense_units))
param_names = \
'''train_base, optimizer, lr, momentum, do_rate, batch_normalize, batch_size, dense_units'''.split(', ')

In [25]:
len(hyperparams)

432

## create_model()

In [26]:
def create_model(name, hparams):
    ''' Recieves a name for the model and the hyperparameters to create it and returns the 
        compiled model. '''
    # optimizer #
    if hparams['optimizer'] == 'adam':
        optimizer = Adam(learning_rate=hparams['lr'])
    elif hparams['optimizer'] == 'rmsprop':
        optimizer = RMSprop(learning_rate=hparams['lr'], momentum=hparams['momentum'])

    # conv base creation #
    conv_base = keras.applications.Xception(
    include_top=False,
    weights='imagenet',
    input_shape=input_shape,
    pooling='avg',
    )
    # conv base training adjustments #
    if hparams['train_base']:
        conv_base = True
    else:
        conv_base.trainable = False

    # *******  input block  ******* #
    input_ = Input(shape=input_shape, name='input')
    
    # *******  conv base block  ******* #
    x = conv_base(input_)
    if hparams['batch_normalize'] == True:
        x = BatchNormalization()(x)      
    
    # *******  dense block  ******* #
    x = Flatten()(x)
    x = Dense(hparams['dense_units'], 'relu')(x)
    if hparams['batch_normalize'] == True:
                x = BatchNormalization()(x)      
    x = Dropout(hparams['do_rate'])(x)
    
    # output #
    output_ = Dense(classes_dim, 'softmax', name='output')(x)
    
    
    # *******  model  ******* #
    m = Model(
        inputs=[input_], 
        outputs=[output_], 
        name=name
    )

    # *******  compile  ******* #
    m.compile(
        optimizer=optimizer, 
        loss='categorical_crossentropy', 
        metrics=['accuracy']
        )


    print(m.summary())
    
    return m
        

## train_test_model()

In [27]:
def train_test_model(m:tf.keras.Model, fname, hparams, epochs):
    # cb for logging the metrics
    tb_callback = keras.callbacks.TensorBoard(log_dir=fname, histogram_freq=1, 
                                              write_images=True)
    # cb for logging the parameters
    hp_callback = hp.KerasCallback(fname, hparams)
    # cb for early stopping
    earlystop_callback = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=.001, 
                                                       baseline=.25, patience=100)
    # cb for reducing lr on plateau of val acc
    red_lr_callback = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=.2, 
                                                        min_delta=.001, patience=5, cooldown=0, 
                                                        min_lr=.000001)
    # cb for saving model
    cp_callback = keras.callbacks.ModelCheckpoint(pjoin(model_path, m.name), save_best_only=True)
    
    # data #
    traingen, valgen, testgen = get_data_gens(hparams['batch_size'])
    
    # fit #
    h = m.fit(traingen, 
              steps_per_epoch=(traingen.n // traingen.batch_size),
              validation_data=valgen, 
              validation_steps=(valgen.n // valgen.batch_size),
              epochs=epochs, 
              callbacks=[red_lr_callback, cp_callback, earlystop_callback])

    # evaluate on test data #
    test_loss, test_acc = m.evaluate(testgen)

    return h.history, test_acc
        

## pickle_bm_lists()

In [29]:
def pickle_bm_lists(bm_ls_mapper, name):
    for fname,ls in bm_ls_mapper.items():
        with open(pjoin(bm_lists_path,  name + '_' + fname), 'wb') as file: 
            pickle.dump(ls, file)

## run_benchmarking()

In [30]:
def run_benchmarking():
    ''' For the designated indices, the hyperparameter space is explored. All relevant benchmarks
        are stored in lists and pickled in the current directory for safe keeping. '''
    # maintenance #
    keras.backend.clear_session() 
    gc.collect()
    !rm -rf tb_log_path
    # benchark lists #
    history_ls, test_acc_ls, time_ls, param_ls = [], [], [], []
    # benchmark lists fname mapper
    bm_ls_mapper = {'history_ls.pkl':history_ls, 'test_acc_ls.pkl':test_acc_ls, 
                    'time_ls.pkl':time_ls, 'param_ls.pkl':param_ls}
    # this is where distribution across multiple instances can be orchestrated #
    start_idx, end_idx = 0, len(hyperparams)
    
    #******************************** run it ********************************#
    for run in range(start_idx, end_idx):
        params = hyperparams[run]
        param_dict = dict(zip(param_names, params))
        print(f'\n\nRUN {run}\n{param_dict}')

        # maintenance #
        keras.backend.clear_session() 
        gc.collect()
        !rm -rf tb_log_path
        # benchark lists #
        history_ls, test_acc_ls, time_ls, param_ls = [], [], [], []
        # benchmark lists fname mapper
        bm_ls_mapper = {'history_ls.pkl':history_ls, 'test_acc_ls.pkl':test_acc_ls, 
                        'time_ls.pkl':time_ls, 'param_ls.pkl':param_ls}


        #******************************** run it ********************************#
        # create model #
        m = create_model(f'frozen_{str(run)}', param_dict)

        # train and test the model #
        clock.start()
        h, test_acc = train_test_model(m, 
                                        pjoin(tb_log_path, f'frozen_{str(run)}'),
                                        param_dict,
                                        epochs)
        clock.stop() 

        # append current benchmarks to appropriate bm list
        param_ls.append(param_dict)        
        history_ls.append(h)
        test_acc_ls.append(test_acc)
        time_ls.append(clock.elapsed)

        # save lists #
        pickle_bm_lists(bm_ls_mapper, f'frozen_{str(run)}')

    return 'finished' 


## run it!

In [33]:
runtime_clock = Clock()
runtime_clock.start()

### ******************* ###
run_benchmarking()
### ******************* ###

runtime_clock.stop() 



RUN 0
{'train_base': False, 'optimizer': 'adam', 'lr': 1e-05, 'momentum': 0.0, 'do_rate': 0.0, 'batch_normalize': True, 'batch_size': 64, 'dense_units': 128}
Model: "frozen_0"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (InputLayer)           [(None, 224, 224, 3)]     0         
_________________________________________________________________
xception (Functional)        (None, 2048)              20861480  
_________________________________________________________________
batch_normalization_4 (Batch (None, 2048)              8192      
_________________________________________________________________
flatten (Flatten)            (None, 2048)              0         
_________________________________________________________________
dense (Dense)                (None, 128)               262272    
_________________________________________________________________
batch_normalization_5 (Batch (

Found 314 images belonging to 4 classes.
Epoch 1/2

KeyboardInterrupt: 