In [None]:
colab = False
if colab:
    from google.colab import drive
    drive.mount('/content/drive')

## **Week 4: From MLPs to Convolutional Neural Networks**
### José Manuel López, Alex Martín, Marcos V. Conde

#### <ins>Understanding Layer Manipulation</ins>
1. Check existing architectures
2. Set a new model from an existing architecture
3. Apply model to a small dataset

#### <ins>Deal with dataset loading</ins>
4. Introduce and evaluate Data Augmentation Impact

#### <ins>Hyperparametrs optimization</ins>
4. Introduce and evaluate the usage of dropout, batch normalization, ...
5. Apply random search on model hyperparametes

In [None]:
from tensorflow.python.util import deprecation
from tensorflow.keras.mixed_precision import experimental as mixed_precision
from tensorflow.python.client import device_lib 
import tensorflow as tf
import warnings
import os
from tensorflow.keras import backend as K

In [None]:
#reset Keras Session
def reset_keras():
    sess = tf.compat.v1.keras.backend.get_session()
    tf.compat.v1.keras.backend.clear_session()
    sess.close()
    sess = tf.compat.v1.keras.backend.get_session()
    
    del model

    # use the same config as you used to create the session
    config = tf.compat.v1.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 1
    config.gpu_options.visible_device_list = "0"
    tf.compat.v1.keras.backend.set_session(tf.compat.v1.Session(config=config))
    

reset_keras()

Tensorflow allocates all VRAM without this.
Additionally we want to enable mixed precision aka use TensorCores if our GPU is capable to speedup the computation.

In [None]:
# Disable Warnings
warnings.filterwarnings('ignore')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# VRAM broke without this
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)
deprecation._PRINT_DEPRECATION_WARNINGS = False
tf.config.experimental_run_functions_eagerly(True)

try:
    # print model to see if it's compatible with Mixed Precision
    print(device_lib.list_local_devices()[1].physical_device_desc)
    # Change to TF16 mixed precision
    policy = mixed_precision.Policy('mixed_float16')
    mixed_precision.set_policy(policy)
except:
    pass

print(gpus[0])
print(tf.__version__)

### Loading Datasets

We are going to define variables related to dataset and load data into generators

In [None]:
from keras.preprocessing.image import ImageDataGenerator

In [None]:
def preprocess_input_manual(x, dim_ordering='default'):
    if dim_ordering == 'default':
        dim_ordering = K.image_data_format()
    assert dim_ordering in {'channels_first', 'channels_last'}

    if dim_ordering == 'channels_first':
        # 'RGB'->'BGR'
        x = x[ ::-1, :, :]
        # Zero-center by mean pixel
        x[ 0, :, :] -= 103.939
        x[ 1, :, :] -= 116.779
        x[ 2, :, :] -= 123.68
    else:
        # 'RGB'->'BGR'
        x = x[:, :, ::-1]
        # Zero-center by mean pixel
        x[:, :, 0] -= 103.939
        x[:, :, 1] -= 116.779
        x[:, :, 2] -= 123.68
    return x

In [None]:
IMG_SIZE = 224
BATCH_SIZE = 32
RANDOM_SEED = 42
validation_samples = 2288
CLASSES = ['coast','forest','highway','inside_city','mountain','Opencountry','street','tallbuilding']
def get_dataset(path, batch_size=32):
    print("Loading: {}".format(path))
    datagen = ImageDataGenerator(featurewise_center=False,
              samplewise_center=False,
              featurewise_std_normalization=False,
              samplewise_std_normalization=False,
              preprocessing_function=preprocess_input_manual,
              rotation_range=0.,
              width_shift_range=0.,
              height_shift_range=0.,
              shear_range=0.,
              zoom_range=0.,
              channel_shift_range=0.,
              fill_mode='nearest',
              cval=0.,
              horizontal_flip=False,
              vertical_flip=False,
              rescale=None)

    train_loader = datagen.flow_from_directory(
                path+'/train',  
                target_size=(IMG_SIZE, IMG_SIZE), 
                batch_size=BATCH_SIZE,
                classes = CLASSES,
                class_mode='categorical') 

    test_loader = datagen.flow_from_directory(
            path+'/test',
            target_size=(IMG_SIZE, IMG_SIZE),
            batch_size=BATCH_SIZE,
            classes = CLASSES,
            class_mode='categorical')
    

    examples = enumerate(test_loader)
    batch_idx, (example_data, example_targets) = next(examples)
    print("Data Shape: {}".format(example_data.shape))
    print("---"*30)

    return train_loader, test_loader

In [None]:

if colab:
    DATA_DIR = "/content/drive/MyDrive/MIT_small_train_1/MIT_small_train_{}"
else:
    DATA_DIR = "./MIT_small_train_{}"

# Dataset 1
train_loader_1, test_loader_1 = get_dataset(DATA_DIR.format(1))
# Dataset 2
#train_loader_2, test_loader_2 = get_dataset(DATA_DIR.format(2))
# Dataset 3
#train_loader_3, test_loader_3 = get_dataset(DATA_DIR.format(3))
# Dataset 4
#train_loader_4, test_loader_4 = get_dataset(DATA_DIR.format(4))

## Check existing architectures: ResNet50
We are going to try ResNet50 pretrained on ImageNet and evaluate the performance of the model.

In [None]:
from tensorflow.keras.applications.resnet import ResNet50
from tensorflow.keras.applications.resnet import preprocess_input, decode_predictions
import numpy as np
import matplotlib.pyplot as plt


In [None]:
examples = enumerate(train_loader_1)
batch_idx, (example_data, example_targets) = next(examples)
n = np.random.randint(low = 0, high = len(example_data)+1, size=1)[0]

print("N: {}".format(n))

x = preprocess_input_manual(example_data)
model = ResNet50(weights='imagenet')
preds = model.predict(x)

examples = enumerate(train_loader_1)
batch_idx, (example_data, example_targets) = next(examples)

print('Predicted:', decode_predictions(preds, top=3)[n])
print('Real:', CLASSES[np.argmax(example_targets[n])])

plt.imshow(example_data[n], cmap='gray', interpolation='none')
plt.xticks([])
plt.yticks([])
plt.show()

## Fine tuning and existing architecture
Now we are we are going to use ResNet50's skeleton and change the last layer to perform the classification. 

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D

In [None]:
def plot_loss_accuracy(history, title):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(25,8))
    
    fig.suptitle(title, fontsize=16)

    # Plot history: CrossEntropy
    ax1.plot(history.history['loss'], label='CrossEntropy (training data)')
    ax1.plot(history.history['val_loss'], label='CrossEntropy (validation data)')
    ax1.set_title('Loss Function: Cross Entropy')
    ax1.set(xlabel='Epoch', ylabel='Loss Value')
    ax1.legend(loc="upper left")

    # Plot history: Accuracy
    ax2.plot(history.history['accuracy'], label='Accuracy (training data)')
    ax2.plot(history.history['val_accuracy'], label='Accuracy (validation data)')
    ax2.set_title('Accuracy')
    ax2.set(xlabel='Epoch', ylabel='Accuracy Value')
    ax2.legend(loc="upper left")

    plt.show()
    plt.savefig('accuracy_loss.jpg')

In [None]:
def buil_custom_resnet50(opti ='adadelta',summary=False):
    # ResNet Base Model
    base_model = ResNet50(weights='imagenet')
    x = base_model.layers[-2].output

    # Perform classification
    predictions = Dense(len(CLASSES), activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=predictions)

    # We don't want to modify imagenet weights
    for layer in base_model.layers:
        layer.trainable = False
            
    # Compile model
    model.compile(optimizer= opti, loss='categorical_crossentropy',metrics=["accuracy"])
    if summary:
      model.summary()
    return model

# Training on first datasets
model = buil_custom_resnet50()


In [None]:
history_1 = model.fit(train_loader_1,steps_per_epoch= int(400 // BATCH_SIZE), epochs=50, validation_data=test_loader_1, validation_steps= int(validation_samples // BATCH_SIZE))

In [None]:
plot_loss_accuracy(history_1, 'ResNet50: Dataset 1')


With the example code delivered we obtain the above results by using the weights from ResNet50 after training with Imagenet. Now we will start to search for the optimal values of the hyperparameters to get better results


We will always use the Early stopping to avoid trainings that are not evolving to give better results. 


**Random search**

To perform the training with different batch sizes we have to load the data witht the desired batch size previously. 

In [None]:
from random import random, randint

batch_sizes = [randint(10 ,100) for i in range(7)] 
print('Batch sizes that will be used: ', batch_sizes)

optimizers = [tf.keras.optimizers.SGD, tf.keras.optimizers.RMSprop , tf.keras.optimizers.Adam ,tf.keras.optimizers.Adadelta ,
              tf.keras.optimizers.Adagrad , tf.keras.optimizers.Adamax , tf.keras.optimizers.Nadam] 


for BATCH_SIZE in batch_sizes:
  train_loader_1, test_loader_1 = get_dataset(DATA_DIR.format(1),BATCH_SIZE)
  #After loading the dataset we have to compile the dataset with the desired parameters
  #we will get one random value for the learning rate, the optimizer and the momentum used in the optimizer 
  opti = np.random.randint( 6,  size = 1)
  
  lr_random = random()#number between 0-1
  lr = 0.0001 + (lr_random * (0.3 - 0.0001))#rescaling the number to our range of interest between 0.0001 and 0.3
  mom_random = random()#number between 0-1
  moment =  (lr_random * (0.9))
  if opti<2:
    optimizer = optimizers[opti[0]](learning_rate = lr , momentum = moment)
  else: 
    optimizer = optimizers[opti[0]](learning_rate = lr )
    moment = 'Not used'

  model = buil_custom_resnet50(opti = optimizer,summary=False)
  # We will train all the models with 50 epochs and set an early stop if the validation accuracy doesn't get better after 10 epochs 
  early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=10)

  history_1 = model.fit(train_loader_1,steps_per_epoch= int(400 // BATCH_SIZE), epochs = 50, validation_data=test_loader_1, validation_steps= int(validation_samples // BATCH_SIZE),  callbacks=[early_stop])
  plot_loss_accuracy(history_1, 'ResNet50: Batch Size ={}, lr = {}, optimizer = {}, momentum = {}'.format(BATCH_SIZE, round(lr,3),optimizers[opti[0]], moment ))


After the first one we will perform a second random search to get better result with the range of values adjusted and only ussin SGD. 


In [None]:
from random import random, randint

batch_sizes = [randint(30 ,70) for i in range(5)] 
print('Batch sizes that will be used: ', batch_sizes)


for BATCH_SIZE in batch_sizes:
  train_loader_1, test_loader_1 = get_dataset(DATA_DIR.format(1),BATCH_SIZE)
  #After loading the dataset we have to compile the dataset with the desired parameters
  #we will get one random value for the learning rate, the optimizer and the momentum used in the optimizer 
  
  
  lr_random = random()#number between 0-1
  lr = 0.04 + (lr_random * (0.1 - 0.04))#rescaling the number to our range of interest between 0.0001 and 0.3
  mom_random = random()#number between 0-1
  moment =  0.1 + (lr_random * (0.3 - 0.1))
  
  optimizer = tf.keras.optimizers.SGD(learning_rate = lr , momentum = moment)
 
  model = buil_custom_resnet50(opti = optimizer,summary=False)
  # We will train all the models with 50 epochs and set an early stop if the validation accuracy doesn't get better after 10 epochs 
  early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=10)

  history_1 = model.fit(train_loader_1,steps_per_epoch= int(400 // BATCH_SIZE), epochs = 50, validation_data=test_loader_1, validation_steps= int(validation_samples // BATCH_SIZE),  callbacks=[early_stop])
  plot_loss_accuracy(history_1, 'ResNet50: Batch Size ={}, lr = {}, optimizer = {}, momentum = {}'.format(BATCH_SIZE, round(lr,3), optimizer , moment ))

After the previous test we found the hyperparaeters to perform a satisfying classification with the given dataset. Now we will try to enhance it more with data augmentation. 

**Data augmentation** 

Now we will introduce different transformations to our datset in order to enlarge the number of cases that the neural network is being trained with. After that it will be tested with the test dataset without applying the transformations.  

In [None]:
#Loading training and validation with data augmentation
BATCH_SIZE = 62
path = DATA_DIR.format(1)

datagen_train_valdidation = ImageDataGenerator(featurewise_center=False,
              samplewise_center=False,
              featurewise_std_normalization=False,
              samplewise_std_normalization=False,
              preprocessing_function=preprocess_input_manual,
              rotation_range=0.,
              width_shift_range=15,
              height_shift_range=15,
              shear_range=0.,
              zoom_range=0.2,
              channel_shift_range=0.,
              fill_mode='nearest',
              cval=0.,
              horizontal_flip=True,
              vertical_flip=False,
              rescale=None, 
              validation_split=0.2)

train_loader = datagen_train_valdidation.flow_from_directory(
            path+'/train',  
            target_size=(IMG_SIZE, IMG_SIZE), 
            batch_size=BATCH_SIZE,
            classes = CLASSES,
            class_mode='categorical', 
            subset='training') 

validation_loader = datagen_train_valdidation.flow_from_directory(
          path+'/train',  
          target_size=(IMG_SIZE, IMG_SIZE), 
          batch_size=BATCH_SIZE,
          classes = CLASSES,
          class_mode='categorical', 
          subset='validation') 

#loading test data without data augmentation
datagen_test = ImageDataGenerator(featurewise_center=False,
          samplewise_center=False,
          featurewise_std_normalization=False,
          samplewise_std_normalization=False,
          preprocessing_function=preprocess_input_manual,
          rotation_range=0.,
          width_shift_range=0.,
          height_shift_range=0.,
          shear_range=0.,
          zoom_range=0.,
          channel_shift_range=0.,
          fill_mode='nearest',
          cval=0.,
          horizontal_flip=False,
          vertical_flip=False,
          rescale=None)

test_loader = datagen_test.flow_from_directory(
        path+'/test',
        target_size=(IMG_SIZE, IMG_SIZE),
        batch_size=BATCH_SIZE,
        classes = CLASSES,
        class_mode='categorical')

In [None]:
# Build the model with the best found hyperparameters and we will also introduce learninf rate decay to see if the accuracy can get better with it 
lr = 0.045
moment = 0.11

optimizer = tf.keras.optimizers.SGD(learning_rate = lr , momentum = moment)

model = buil_custom_resnet50(opti = optimizer,summary=False)
# We will train all the models with 50 epochs and set an early stop if the validation accuracy doesn't get better after 15 epochs
#and also reduce the learning rate after 5 epochs with no improvement by a factor 0.1
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=15)
reduce_lr =   tf.keras.callbacks.ReduceLROnPlateau(
      monitor="val_accuracy",
      factor=0.1,
      patience=5,
      verbose=0,
      mode="auto",
      min_delta=0,
      cooldown=0)

history_1 = model.fit(train_loader_1,steps_per_epoch= int(400 // BATCH_SIZE), epochs = 100, validation_data=test_loader_1, validation_steps= int(validation_samples // BATCH_SIZE),  callbacks=[early_stop, reduce_lr])
plot_loss_accuracy(history_1, 'ResNet50: Batch Size ={}, lr = {}, optimizer = {}, momentum = {}'.format(BATCH_SIZE, round(lr,3), optimizer , moment ))

In [None]:
(loss, acc) = model.evaluate(train_loader_1, verbose = 0)
print("Train Accuracy: {:.4f}  Train Loss: {:.4f}".format(acc,loss))
(loss, acc) = model.evaluate(test_loader_1, verbose = 0)
print("Test Accuracy: {:.4f}  Test Loss: {:.4f}".format(acc,loss))