In [1]:
import os
import numpy as np

from keras.preprocessing.image import ImageDataGenerator
import keras
import datetime
import sys  
from keras.models import Sequential, Model
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers import Activation
from keras.activations import relu, tanh, elu
from keras.optimizers import Adagrad, Adam, Nadam, SGD
from keras.losses import categorical_crossentropy
from keras.layers.normalization import BatchNormalization
from keras.constraints import maxnorm
from keras import optimizers
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.utils import np_utils
from keras import backend as K
from keras.metrics import top_k_categorical_accuracy

import tensorflow as tf
from keras.callbacks import ModelCheckpoint

from keras.applications.mobilenet import MobileNet as FE
from keras.applications.mobilenet import preprocess_input, decode_predictions

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
name = "MobileNet"
classes = 10
weight_decay=1e-5
lr = 1e-3
epochs = 50
decay = lr/epochs
batch_size = 50
image_size = 224

In [3]:
def preprocess_input(x):
    x /= 255.0
    x -= 0.5
    x *= 2.0
    return x # x is now between -1 and 1

In [4]:
def top_2_accuracy(y_true, y_pred):
    return top_k_categorical_accuracy(y_true, y_pred, k=2)

In [5]:
data_generator_train = ImageDataGenerator(
    featurewise_center=False,  
    samplewise_center=False,
    featurewise_std_normalization=False,
    samplewise_std_normalization=False, 
    rotation_range=30, 
    zoom_range=0.3,
    horizontal_flip=True, 
    vertical_flip=False,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.001,
    channel_shift_range=0.1,
    fill_mode='reflect',
    data_format='channels_last',
    #preprocessing_function=preprocess_input  
)



data_generator_val = ImageDataGenerator(
    featurewise_center=False,  
    samplewise_center=False,
    featurewise_std_normalization=False,
    samplewise_std_normalization=False, 
    data_format='channels_last',
    preprocessing_function=preprocess_input
)

data_generator_test = ImageDataGenerator(
    featurewise_center=False,  
    samplewise_center=False,
    featurewise_std_normalization=False,
    samplewise_std_normalization=False,
     data_format='channels_last',
    preprocessing_function=preprocess_input
)


train_generator = data_generator_train.flow_from_directory(
    'new_data_set/training',   
    target_size=(image_size, image_size),
    batch_size=batch_size
)

val_generator = data_generator_val.flow_from_directory(
   'new_data_set/validation', shuffle=False,
    target_size=(image_size, image_size),
    batch_size=15
)

test_generator = data_generator_test.flow_from_directory(
    'new_data_set/testing', shuffle=False, 
    target_size = (image_size,image_size),
    batch_size = 15
)

Found 34260 images belonging to 10 classes.
Found 4012 images belonging to 10 classes.
Found 2581 images belonging to 12 classes.


In [6]:
num_training = 34260 
num_validation = 4012 
num_testing = 2581 

In [7]:
# Constructing the Model 
feature_extractor= FE(input_shape=(image_size, image_size, 3), include_top=False, weights='imagenet',
                      pooling = 'avg', classes=classes)

for layer in feature_extractor.layers[:]:
        layer.trainable = False
        
classifier = feature_extractor.output
classifier = Dropout(0.5)(classifier)
logits1 = Dense(100, activation='relu', kernel_regularizer=keras.regularizers.l2(weight_decay)) (classifier)
logits2 = Dense(50, activation='relu', kernel_regularizer=keras.regularizers.l2(weight_decay)) (logits1)
logits3 = Dense(classes, activation='relu', kernel_regularizer=keras.regularizers.l2(weight_decay)) (logits2)
probabilities = Activation('softmax') (logits3)
    
full_model = Model(feature_extractor.input, probabilities)

In [8]:
# add entropy to the usual logloss (it is for regularization),
# "Regularizing Neural Networks by Penalizing Confident Output Distributions",
# https://arxiv.org/abs/1701.06548
# it reduces overfitting a little bit
def loss(y_true, y_pred):
    entropy = -KerasBackend.mean(KerasBackend.sum(y_pred*KerasBackend.log(y_pred), 1))
    beta = 0.1
    return categorical_crossentropy(y_true, y_pred) - beta*entropy


In [9]:
optimizer=SGD(lr=lr, momentum=0.9, nesterov=True)
full_model.compile(loss='categorical_crossentropy', optimizer=optimizer,
                   metrics=['accuracy', top_2_accuracy])

In [10]:
check_point = ModelCheckpoint("my_best_weights_" + name + "_training.hdf5", monitor = "val_acc", save_best_only = True, period = 1)
history = full_model.fit_generator(  train_generator,
                                #steps_per_epoch= 10,
                                steps_per_epoch= num_training // batch_size,
                                epochs=epochs,
                                validation_data=val_generator,
                                   validation_steps= num_validation // 15,
                                verbose=1,
                                workers=8, 
                                callbacks=[check_point],
                             )

Epoch 1/50







Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50


Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [11]:
full_model.load_weights("my_best_weights_MobileNet_training.hdf5")

In [12]:
full_model.save(name + "_10_classes_training.hdf5")

In [13]:
for i, layer in enumerate(feature_extractor.layers):
    print(i, layer.name)

0 input_1
1 conv1
2 conv1_bn
3 conv1_relu
4 conv_dw_1
5 conv_dw_1_bn
6 conv_dw_1_relu
7 conv_pw_1
8 conv_pw_1_bn
9 conv_pw_1_relu
10 conv_dw_2
11 conv_dw_2_bn
12 conv_dw_2_relu
13 conv_pw_2
14 conv_pw_2_bn
15 conv_pw_2_relu
16 conv_dw_3
17 conv_dw_3_bn
18 conv_dw_3_relu
19 conv_pw_3
20 conv_pw_3_bn
21 conv_pw_3_relu
22 conv_dw_4
23 conv_dw_4_bn
24 conv_dw_4_relu
25 conv_pw_4
26 conv_pw_4_bn
27 conv_pw_4_relu
28 conv_dw_5
29 conv_dw_5_bn
30 conv_dw_5_relu
31 conv_pw_5
32 conv_pw_5_bn
33 conv_pw_5_relu
34 conv_dw_6
35 conv_dw_6_bn
36 conv_dw_6_relu
37 conv_pw_6
38 conv_pw_6_bn
39 conv_pw_6_relu
40 conv_dw_7
41 conv_dw_7_bn
42 conv_dw_7_relu
43 conv_pw_7
44 conv_pw_7_bn
45 conv_pw_7_relu
46 conv_dw_8
47 conv_dw_8_bn
48 conv_dw_8_relu
49 conv_pw_8
50 conv_pw_8_bn
51 conv_pw_8_relu
52 conv_dw_9
53 conv_dw_9_bn
54 conv_dw_9_relu
55 conv_pw_9
56 conv_pw_9_bn
57 conv_pw_9_relu
58 conv_dw_10
59 conv_dw_10_bn
60 conv_dw_10_relu
61 conv_pw_10
62 conv_pw_10_bn
63 conv_pw_10_relu
64 conv_dw_11
65 c

In [None]:
for layer in feature_extractor.layers[:70]:
    layer.trainable = False
    
for layer in feature_extractor.layers[70:]:
    layer.trainable = True

In [None]:
optimizer=SGD(lr=lr/10, momentum=0.9, nesterov=True)
full_model.compile(loss='categorical_crossentropy', optimizer=optimizer,
                   metrics=['accuracy', top_2_accuracy])

In [None]:
check_point = ModelCheckpoint("my_best_weights_" + name + "_finetuning.hdf5", monitor = "val_acc", save_best_only = True, period = 1)
history = full_model.fit_generator(  train_generator,
                                steps_per_epoch= num_training  // batch_size,
                                epochs=epochs,
                                validation_data=val_generator,
                                verbose=1,
                                workers=8, 
                                callbacks=[check_point],
                             )

In [None]:
full_model.save(name + "_10_classes_finetuning.hdf5")