In [1]:
import sys
sys.path.append('/usr/local/lib/python3.6/site-packages') # For cv2 finding
import os, glob, math,time
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
from keras.utils import np_utils
#from tqdm import tqdm
import gc

Using TensorFlow backend.


In [3]:
def createSubmitionFile(fileName, data):
    """
    using the test_imgs_list.csv file, which contains the names of the test images files, and the prediction data,
    create a csv file in the expected format, so that it can be submitted to the Kaggle competition for scorring
    fileName - name of teh submition file that will be created
    data - a tensor with the prediction probablilities for each image
    """
    df = pd.read_csv('data/test_imgs_list.csv')
    df = df.drop(['file_names'], axis=1)
    df['img'] = df['img'] + ".jpg"
    labels = ["c" + str(i) for i in range(10)]
    predictions = pd.DataFrame(data=data,    # values
            index=range(data.shape[0]),
            columns=labels)
    for label in labels:
        df[label] = predictions[label]
    df.head()
    df.to_csv(fileName, columns = ['img'] + labels, index = False)
    return df

### load the pre-processed training and validation datasets from storage

In [4]:
train_data = np.load('data/tensors/training/train_data_224.txt.npy')
valid_data = np.load('data/tensors/training/valid_data_224.txt.npy')
train_targets = np.load('data/tensors/training/train_targets.txt.npy')
valid_targets = np.load('data/tensors/training/valid_targets.txt.npy')

### Create and intialize an ImageDataGenerator which will be used for image augmentation during training 

In [5]:
from keras.preprocessing.image import ImageDataGenerator

# create and configure augmented image generator
datagen_train = ImageDataGenerator(
    width_shift_range=0.2,  # randomly shift images horizontally (10% of total width)
    height_shift_range=0.2,  # randomly shift images vertically (10% of total height)
    horizontal_flip=False, # randomly flip images horizontally
    rotation_range=20.0
) 

# fit augmented image generator on data
datagen_train.fit(train_data)

### Create and initialize a checkpoint function which will save to storage the model parameters for the best validation score during training

In [6]:
from keras.callbacks import ModelCheckpoint   


# train the model
checkpointer = ModelCheckpoint(filepath='mobilenet_v2_model4.weights.best.hdf5', verbose=5, 
                               save_best_only=True)

### Create the MobileNetV2 model and executet the training

In [7]:
from keras.applications.mobilenet_v2 import MobileNetV2
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D, Dropout
from keras import backend as K
from keras.layers import Input
from keras.optimizers import Adam

batch_size = 16
epochs = 10
neurons = 512
dropout_rate = 0.6
activation = 'tanh'
learn_rate = 0.0001
alpha = 0.75
# create the base pre-trained model
base_model = MobileNetV2(weights='imagenet', input_shape=(224, 224, 3), include_top=False, alpha=alpha)

# add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)
# # let's add a fully-connected layer
x = Dense(neurons, activation=activation)(x)
x = Dropout(dropout_rate)(x)
x = Dense(neurons, activation=activation)(x)
x = Dropout(dropout_rate)(x)
# and a logistic layer 
predictions = Dense(10, activation='softmax')(x)



# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)

optim = Adam(lr=learn_rate)
    
model.compile(optimizer=optim, loss='categorical_crossentropy')
model.summary()

model.fit_generator(datagen_train.flow(train_data, train_targets, batch_size=batch_size),
                    steps_per_epoch=train_data.shape[0] // batch_size,
                    epochs=epochs, verbose=5, callbacks=[checkpointer],
                    validation_data=(valid_data, valid_targets),
                    validation_steps=valid_data.shape[0] // batch_size)



__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
Conv1_pad (ZeroPadding2D)       (None, 225, 225, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
Conv1 (Conv2D)                  (None, 112, 112, 24) 648         Conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_Conv1 (BatchNormalization)   (None, 112, 112, 24) 96          Conv1[0][0]                      
__________________________________________________________________________________________________
Conv1_relu

Epoch 1/10

Epoch 00001: val_loss improved from inf to 1.16998, saving model to mobilenet_v2_model4.weights.best.hdf5
Epoch 2/10

Epoch 00002: val_loss improved from 1.16998 to 1.04502, saving model to mobilenet_v2_model4.weights.best.hdf5
Epoch 3/10

Epoch 00003: val_loss did not improve from 1.04502
Epoch 4/10

Epoch 00004: val_loss improved from 1.04502 to 0.95952, saving model to mobilenet_v2_model4.weights.best.hdf5
Epoch 5/10

Epoch 00005: val_loss improved from 0.95952 to 0.75304, saving model to mobilenet_v2_model4.weights.best.hdf5
Epoch 6/10

Epoch 00006: val_loss did not improve from 0.75304
Epoch 7/10

Epoch 00007: val_loss did not improve from 0.75304
Epoch 8/10

Epoch 00008: val_loss did not improve from 0.75304
Epoch 9/10

Epoch 00009: val_loss did not improve from 0.75304
Epoch 10/10

Epoch 00010: val_loss did not improve from 0.75304


<keras.callbacks.History at 0x7f20d422ee10>

In [8]:
model.load_weights('mobilenet_v2_model4.weights.best.hdf5')

### Infer the category for each image from the training dataset by loading the preprocessed training images and passing through the trained model

In [None]:
print('loading chunk 1')
test_data = np.load('data/tensors/testing/test_data_224/chunk_1.txt.npy')
print('predictions for chunk 1')
start_time = time.time()
model_predictions = [model.predict(np.expand_dims(feature, axis=0)) for feature in test_data]
elapsed_time = time.time() - start_time
print ('elapsed time for predicting of chunck 1 is: {}'.format(elapsed_time))
print('predictions for chunk 1')
for i in range(2, 9):
    print('loading chunk ' + str(i))
    test_data = np.load('data/tensors/testing/test_data_224/chunk_' + str(i) + '.txt.npy')
    print('predictions for chunk ' + str(i))
    start_time = time.time()
    predictions = [model.predict(np.expand_dims(feature, axis=0)) for feature in test_data]
    elapsed_time = time.time() - start_time
    print ('elapsed time for predicting of chunck {} is: {}'.format(i, elapsed_time))
    print('stacking prediction arrays for chunk ' + str(i))
    model_predictions = np.vstack((model_predictions, predictions))   
    gc.collect()
model_predictions.shape
reshaped_model_predictions = np.reshape(model_predictions, (79726, 10))


loading chunk 1
predictions for chunk 1


### Create the file for submition

In [14]:
df = createSubmitionFile('data/submit_mobilenet_v2_model3.csv', reshaped_model_predictions)

In [15]:
df.head()

Unnamed: 0,img,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9
0,img_1.jpg,4.409167e-08,5.417031e-08,5.767112e-09,1.068077e-08,2.674325e-09,0.999998,7.801695e-09,1.455993e-08,3.917699e-08,2.121164e-06
1,img_10.jpg,2.389169e-07,2.595256e-08,8.637616e-09,7.927019e-09,3.165932e-09,1.0,2.388482e-08,5.875803e-08,3.399013e-08,1.131529e-07
2,img_100.jpg,0.9997748,3.253946e-06,4.865116e-07,2.52932e-06,1.148653e-06,0.000117,5.165196e-07,1.961477e-07,6.087543e-06,9.374465e-05
3,img_1000.jpg,3.004844e-06,5.712915e-06,0.0001514524,1.061673e-06,8.839898e-07,2e-06,1.769923e-05,1.409939e-07,0.9996926,0.0001258337
4,img_100000.jpg,0.1075693,9.62784e-05,8.384215e-06,0.8857452,0.0004529564,0.002702,6.844537e-06,0.003404719,1.105628e-06,1.302711e-05
