In [1]:
import numpy as np
from sklearn.model_selection import train_test_split

import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import to_categorical

from keras.models import Model
from keras.layers import Input, Convolution2D, MaxPooling2D,\
    Activation, Dropout, GlobalAveragePooling2D, concatenate

import matplotlib.pyplot as plt
%matplotlib inline

Using TensorFlow backend.


# Load data

In [2]:
images = np.load('images.npy')
targets = np.load('targets.npy')

In [3]:
unique_classes = np.unique(targets)
# class name -> integer
encode = {n: i for i, n in enumerate(unique_classes)}
# integer -> class name
decode = {i: n for i, n in enumerate(unique_classes)}

In [4]:
# encode class names by integers
targets = np.array([encode[n] for n in targets], dtype='int32')
targets_onehot = to_categorical(targets)

In [5]:
# this values are taken from here 
# http://pytorch.org/docs/master/torchvision/models.html
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])

In [6]:
%%time
images = images.astype('float32')
images /= 255.0

# standardize
images -= mean
images /= std

CPU times: user 58.6 s, sys: 2.87 s, total: 1min 1s
Wall time: 1min 1s


In [7]:
X_train, X_test, Y_train, Y_test = train_test_split(
    images, targets_onehot, 
    test_size=0.15, stratify=targets
)
print(len(X_train), len(X_test))

18785 3315


In [8]:
del images
del targets_onehot

# Generate data

In [9]:
data_generator = ImageDataGenerator(
    rotation_range=35, 
    zoom_range=0.2,
    horizontal_flip=True, 
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.01,
    channel_shift_range=0.1,
    fill_mode='reflect',
    data_format='channels_last'
)

train_generator = data_generator.flow(
        X_train, Y_train,
        batch_size=len(X_train)
)

In [10]:
%%time
x_chunk, y_chunk = train_generator.next()

CPU times: user 2min 28s, sys: 15.7 s, total: 2min 44s
Wall time: 2min 44s


# Model

In [11]:
# pretrained weights of SqueezeNet v1.1
weights = np.load('weights.npy')[()]
# we don't need weights of the last layer
del weights['conv10']

In [12]:
# a building block of the SqueezeNet architecture
def fire_module(number, x, squeeze, expand, trainable=False):
    
    module_name = 'fire' + number
    
    x = Convolution2D(
        squeeze, (1, 1), 
        name=module_name + '/' + 'squeeze',
        trainable=trainable
    )(x)
    x = Activation('relu')(x)

    a = Convolution2D(
        expand, (1, 1),
        name=module_name + '/' + 'expand1x1',
        trainable=trainable
    )(x)
    a = Activation('relu')(a)

    b = Convolution2D(
        expand, (3, 3), padding='same',
        name=module_name + '/' + 'expand3x3',
        trainable=trainable
    )(x)
    b = Activation('relu')(b)

    return concatenate([a, b])

In [20]:
def SqueezeNet():

    image = Input(shape=(224, 224, 3))

    x = Convolution2D(
        64, (3, 3), strides=(2, 2), name='conv1', 
        trainable=False
    )(image) # 111, 111, 64
    
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(x) # 55, 55, 64

    x = fire_module('2', x, squeeze=16, expand=64) # 55, 55, 128
    x = fire_module('3', x, squeeze=16, expand=64) # 55, 55, 128
    x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(x) # 27, 27, 128

    x = fire_module('4', x, squeeze=32, expand=128) # 27, 27, 256
    x = fire_module('5', x, squeeze=32, expand=128) # 27, 27, 256
    x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(x) # 13, 13, 256

    x = fire_module('6', x, squeeze=48, expand=192) # 13, 13, 384
    x = fire_module('7', x, squeeze=48, expand=192) # 13, 13, 384
    x = fire_module('8', x, squeeze=64, expand=256) # 13, 13, 512
    x = fire_module('9', x, squeeze=64, expand=256, trainable=True) # 13, 13, 512
    x = Dropout(0.5)(x)
    
    x = Convolution2D(
        256, (1, 1), name='conv10',
        kernel_initializer=keras.initializers.RandomNormal(stddev=0.01),
        kernel_regularizer=keras.regularizers.l2(1e-2)
    )(x) # 13, 13, 256
    
    x = Activation('relu')(x)
    logits = GlobalAveragePooling2D()(x) # 256
    classes = Activation('softmax')(logits)
    
    return Model(image, classes)

In [21]:
model = SqueezeNet()
model.count_params()

853824

In [22]:
# load pretrained weights into the model
for k in [w for w in weights]:
    model.get_layer(k).set_weights(weights[k])

In [23]:
# add regularization to the last fire module
model.get_layer('fire9/squeeze').kernel_regularizer = keras.regularizers.l2(1e-2)
model.get_layer('fire9/expand1x1').kernel_regularizer = keras.regularizers.l2(1e-2)
model.get_layer('fire9/expand3x3').kernel_regularizer = keras.regularizers.l2(1e-2)

In [24]:
model.compile(
    loss='categorical_crossentropy',
    optimizer=keras.optimizers.Adam(lr=1e-3),
    metrics=['accuracy']
)

In [25]:
%%time
model.fit(
    x_chunk, y_chunk,
    batch_size=64,
    epochs=6, verbose=2,
    validation_data=(X_test, Y_test)
);

Train on 18785 samples, validate on 3315 samples
Epoch 1/6
37s - loss: 3.5222 - acc: 0.3364 - val_loss: 2.3952 - val_acc: 0.5134
Epoch 2/6
36s - loss: 2.2949 - acc: 0.5410 - val_loss: 2.1577 - val_acc: 0.5689
Epoch 3/6
36s - loss: 2.0155 - acc: 0.6046 - val_loss: 2.0913 - val_acc: 0.5991
Epoch 4/6
36s - loss: 1.8555 - acc: 0.6401 - val_loss: 2.0275 - val_acc: 0.6069
Epoch 5/6
36s - loss: 1.7545 - acc: 0.6656 - val_loss: 2.0770 - val_acc: 0.6087
Epoch 6/6
36s - loss: 1.6586 - acc: 0.6898 - val_loss: 2.1262 - val_acc: 0.6012
CPU times: user 1min 31s, sys: 21.9 s, total: 1min 53s
Wall time: 3min 40s


<keras.callbacks.History at 0x7f71797a02b0>