# Model 13: Further Capacity Increase

This notebook contains the construction and training of the model iterations and various experiments. The notebook is split up into four sections: training mode selection (where the model will run), set-up, model constrution, and training. 

Evaluation will take place in the *model_optimization_and_evaluation.ipynb* notebook found in the *notebooks* folder.

In [1]:
# importing necessary packages and libraries
import numpy as np
import pandas as pd
from keras import layers
from keras import models
from keras import optimizers
from keras import applications 
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint
import pickle

Using TensorFlow backend.


## Step 1: Training Mode Selection

In the cell below, specify the training mode for the model. This will determine the location from which the source data is drawn, and to which the trained models (and training histories) are saved. 

- **training_mode = 'floydhub'** (runs on Floydhub)
- **training_mode = 'local'** (runs on local disk and processor)

In [2]:
# select training mode
training_mode = 'floydhub'

## Step 2: Set-up

In [3]:
# directory base paths
data_path_local = '../../data/0002_array_data/train_data/'
model_path_local = '../../notebooks/model_construction/saved_models/'
data_path_floydhub = '/floyd/input/capstone_mushrooms/'
model_path_floydhub = '/floyd/home/'

# setting directory paths based on training mode selection
if training_mode == 'floydhub':
    data_path = data_path_floydhub
    model_path = model_path_floydhub
elif training_mode == 'local':
    data_path = data_path_local
    model_path = model_path_local
else:
    raise Exception('Please choose valid training mode: "floydhub" or "local".')

In [4]:
# loading the training and validation data subsets
X_train = np.load(f'{data_path}X_train_data.npy')
y_train = np.load(f'{data_path}y_train_data.npy')
X_val = np.load(f'{data_path}X_val_data.npy')
y_val = np.load(f'{data_path}y_val_data.npy')

In [5]:
# setting training parameters
batch_size = 8
n_classes = 20
n_epochs = 100
img_shape = X_train.shape[1:]
model_names = []
model_list = []
model_hists = []

## Step 3: Model Construction

### Trial 1

In [6]:
# defining the model architecture
m13_t1 = models.Sequential()

# convolution/max pool stacks
m13_t1.add(layers.Conv2D(64,(3,3), input_shape=img_shape, padding='same'))
m13_t1.add(layers.LeakyReLU(alpha=0.1))
m13_t1.add(layers.MaxPooling2D((2,2)))

m13_t1.add(layers.Conv2D(128,(3,3), padding='same'))
m13_t1.add(layers.LeakyReLU(alpha=0.1))
m13_t1.add(layers.MaxPooling2D((2,2)))

m13_t1.add(layers.Conv2D(256,(3,3), padding='same'))
m13_t1.add(layers.LeakyReLU(alpha=0.1))
m13_t1.add(layers.MaxPooling2D((2,2)))

m13_t1.add(layers.Conv2D(512,(3,3), padding='same'))
m13_t1.add(layers.LeakyReLU(alpha=0.1))
m13_t1.add(layers.MaxPooling2D((2,2)))

m13_t1.add(layers.Conv2D(512,(3,3), padding='same'))
m13_t1.add(layers.LeakyReLU(alpha=0.1))
m13_t1.add(layers.MaxPooling2D((2,2)))

# fully connected layers
m13_t1.add(layers.Flatten())
m13_t1.add(layers.Dense(1024))
m13_t1.add(layers.LeakyReLU(alpha=0.1))
m13_t1.add(layers.Dropout(0.25))
m13_t1.add(layers.Dense(2048))
m13_t1.add(layers.LeakyReLU(alpha=0.1))
m13_t1.add(layers.Dropout(0.25))
m13_t1.add(layers.Dense(n_classes, activation='softmax'))

# reviewing the model architecture and adding model and name to list
m13_t1.summary()
model_names.append('m13_t1')
model_list.append(m13_t1)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 200, 200, 64)      1792      
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 200, 200, 64)      0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 100, 100, 64)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 100, 100, 128)     73856     
_________________________________________________________________
leaky_re_lu_2 (LeakyReLU)    (None, 100, 100, 128)     0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 50, 50, 128)       0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 50, 50, 256)      

## Step 4: Training

In [7]:
# setting up standardization and augmentation parameters
train_datagen = ImageDataGenerator(rescale=1./255,
                                   rotation_range=40,
                                   width_shift_range=0.3,
                                   height_shift_range=0.3,
                                   shear_range=0.3,
                                   zoom_range=0.3,
                                   fill_mode='nearest',
                                   horizontal_flip=True,
                                   vertical_flip=True)
val_datagen = ImageDataGenerator(rescale=1./255)

In [8]:
%%time
# data standardization and augmentation
train_generator = train_datagen.flow(X_train, y_train, batch_size=batch_size)
val_generator = val_datagen.flow(X_val, y_val, batch_size=batch_size)

CPU times: user 336 ms, sys: 970 ms, total: 1.31 s
Wall time: 1.3 s


In [9]:
# setting up optimizer
opt = optimizers.Adamax(learning_rate=9e-4)

# compiling loss functions
m13_t1.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['acc'])

In [10]:
%%time
# setting up model saving checkpoints
m13_t1_cp = ModelCheckpoint(filepath=f'{model_path}m13_t1.h5',
                              monitor='val_acc',
                              verbose=1,
                              save_best_only=True)

# fitting model
m13_t1_history = m13_t1.fit(train_generator,
                                steps_per_epoch=len(X_train)//batch_size,
                                epochs=n_epochs,
                                callbacks=[m13_t1_cp],
                                validation_data=val_generator,
                                validation_steps=len(X_val)//batch_size)

# adding training history to list
model_hists.append(m13_t1_history)

Epoch 1/100

Epoch 00001: val_acc improved from -inf to 0.18069, saving model to /floyd/home/m13_t1.h5
Epoch 2/100

Epoch 00002: val_acc improved from 0.18069 to 0.33292, saving model to /floyd/home/m13_t1.h5
Epoch 3/100

Epoch 00003: val_acc improved from 0.33292 to 0.40274, saving model to /floyd/home/m13_t1.h5
Epoch 4/100

Epoch 00004: val_acc did not improve from 0.40274
Epoch 5/100

Epoch 00005: val_acc improved from 0.40274 to 0.44140, saving model to /floyd/home/m13_t1.h5
Epoch 6/100

Epoch 00006: val_acc improved from 0.44140 to 0.46135, saving model to /floyd/home/m13_t1.h5
Epoch 7/100

Epoch 00007: val_acc improved from 0.46135 to 0.52369, saving model to /floyd/home/m13_t1.h5
Epoch 8/100

Epoch 00008: val_acc did not improve from 0.52369
Epoch 9/100

Epoch 00009: val_acc improved from 0.52369 to 0.54489, saving model to /floyd/home/m13_t1.h5
Epoch 10/100

Epoch 00010: val_acc did not improve from 0.54489
Epoch 11/100

Epoch 00011: val_acc did not improve from 0.54489
Epoch 1

In [11]:
# creating dictionary for model names, models, and histories from respective lists
models_dict = {i:[j,k] for i,j,k in zip(model_names,model_list,model_hists)}

In [12]:
# evaluating models on validation set
for key, value in models_dict.items():
    model = models.load_model(f'{model_path}{key}.h5')
    (val_loss, val_accuracy) = model.evaluate(val_generator,verbose=1)
    print(f'{key} Val Accuracy: {round((val_accuracy*100),2)}%')
    print(f'{key} Val Loss: {round(val_loss,4)}')
    print('---')

m13_t1 Val Accuracy: 82.96%
m13_t1 Val Loss: 0.0
---


In [13]:
# saving training histories
for key, value in models_dict.items():
    with open(f'{model_path}{key}_history', 'wb') as file_pi:
        pickle.dump(value[1].history, file_pi)
    print(f'{key}_history saved in {model_path}')

m13_t1_history saved in /floyd/home/
