# Cdiscount-CNN

## Import some packages.

In [1]:
from keras.applications.resnet50 import ResNet50 as CNN
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TensorBoard
from keras.models import Model
from keras.layers import Dense, Input, Flatten
import keras.backend as K

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import threading
from utilities import utils
from utilities.BSONIterator import BSONIterator

%load_ext autoreload
%autoreload 2
%matplotlib inline

Using TensorFlow backend.


In [2]:
utils.set_results_reproducible()

## Load Lookup Tables
First load the lookup tables from the CSV files.

In [3]:
categories_df = pd.read_csv("inputs/categories.csv", encoding = "ISO-8859-1", index_col=0)
cat2idx, idx2cat = utils.make_category_tables(categories_df)

train_offsets_df = pd.read_csv("inputs/train_offsets.csv", encoding = "ISO-8859-1", index_col=0)
train_images_df = pd.read_csv("inputs/train_images.csv", encoding = "ISO-8859-1", index_col=0)
val_images_df = pd.read_csv("inputs/val_images.csv", encoding = "ISO-8859-1", index_col=0)

#test_offsets_df = pd.read_csv("inputs/test_offsets.csv", index_col=0)
#test_images_df = pd.read_csv("inputs/test_images.csv", index_col=0)

  mask |= (ar1 == a)


## Set Some Settings

In [4]:
data_dir = "inputs/"

train_bson_path = os.path.join(data_dir, "train.bson")
num_train_products = 7069896

#train_bson_path = "inputs/train_example.bson"
#num_train_products = 82

#test_bson_path = os.path.join(data_dir, "test.bson")
#num_test_products = 1768172

In [6]:
train_bson_file = open(train_bson_path, "rb")

num_classes = 5270
batch_size = 96
input_size = 197 #180
num_train_images = len(train_images_df)
num_val_images = len(val_images_df)
lock = threading.Lock()

# Tip: use ImageDataGenerator for data augmentation and preprocessing.
train_datagen = ImageDataGenerator(rotation_range=10, width_shift_range=0.1, 
       height_shift_range=0.1, shear_range=0.15, zoom_range=0.1, 
       channel_shift_range=10., horizontal_flip=True)
train_gen = BSONIterator(train_bson_file, train_images_df, train_offsets_df, num_classes, 
                         train_datagen, lock, target_size=(input_size, input_size),
                         batch_size=batch_size, shuffle=True)

val_datagen = ImageDataGenerator()
val_gen = BSONIterator(train_bson_file, val_images_df, train_offsets_df, num_classes,
                       val_datagen, lock, target_size=(input_size, input_size),
                       batch_size=batch_size)

Found 9901521 images belonging to 5270 classes.
Found 2469772 images belonging to 5270 classes.


## Train

In [8]:
model = CNN(include_top=False, input_shape=(input_size, input_size, 3), weights=None)
model.load_weights('weights/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5')
flatten_output = Flatten()(model.output)
classifier = Dense(num_classes, activation='softmax')(flatten_output)

model = Model(inputs=model.input, outputs=classifier)


model.compile(optimizer="adam",
              loss="categorical_crossentropy",
              metrics=["accuracy"])

#model.summary()

In [20]:
# first layer of conv4_x (28th trainable layer from top including fc) --> refer to ResNet50 structure
index_layer = -97 

for layer in model.layers[:index_layer]: layer.trainable=False
for layer in model.layers[index_layer:]: layer.trainable=True    

In [22]:
epochs = 50

run_name = utils.get_run_name('weights/{}.hdf5', 'ResNet50-')
weights_path = 'weights/{}.hdf5'.format(run_name)

callbacks = [EarlyStopping(monitor='val_acc',
                           patience=3,
                           verbose=1,
                           min_delta=1e-4,
                           mode='max'),
             ReduceLROnPlateau(monitor='val_acc',
                               factor=0.1,
                               patience=2,
                               verbose=1,
                               epsilon=1e-4,
                               mode='max'),
             ModelCheckpoint(monitor='val_acc',
                             filepath=weights_path,
                             save_best_only=True,
                             save_weights_only=True,
                             mode='max'),
             TensorBoard(log_dir='logs/{}'.format(run_name), batch_size=batch_size)]

model.load_weights("weights/ResNet50-last_FC-2017-10-07-1127.hdf5")
K.set_value(model.optimizer.lr, 1e-4)

# To train the model:
print('Starting run "{}"'.format(run_name))
model.fit_generator(train_gen,
                    steps_per_epoch = num_train_images // batch_size,
                    epochs = epochs,
                    verbose=1,
                    callbacks=callbacks,
                    validation_data = val_gen,
                    validation_steps = num_val_images // batch_size,
                    workers = 8)

Starting run "ResNet50--2017-10-10-0013"
Epoch 1/50
Epoch 2/50
Epoch 3/50

KeyboardInterrupt: 

## Test

In [None]:
test_bson_file = open(test_bson_path, "rb")

In [None]:
test_datagen = ImageDataGenerator()
test_gen = BSONIterator(test_bson_file, test_images_df, test_offsets_df,
                        num_classes, test_datagen, batch_size=batch_size, 
                        with_labels=False, shuffle=False)

Running model.predict_generator() gives a list of 3095080 predictions, one for each image.

The indices of the predictions correspond to the indices in test_images_df. After making the predictions, you probably want to average the predictions for products that have multiple images.

Use idx2cat[] to convert the predicted category index back to the original class label.

In [None]:
num_test_samples = len(test_images_df)
predictions = model.predict_generator(test_gen, steps=num_test_samples // batch_size, workers=8)

Some people mentioned earlier that they got errors on bson.BSON.decode(). I was not able to reproduce this until just now. It turns out that using ImageDataGenerator with certain data augmentation options causes this to happen.

If you use rotation_range, width_shift_range, height_shift_range, shear_range, or zoom_range, then the BSON decoding gets messed up for some reason. I don't understand why but all these augmentation options use a transformation matrix, and so maybe the code that applies this matrix to the image has a bug and overwrites memory. But that is just a guess.

Interestingly enough, the data augmentation options that do not use this transformation matrix, such as channel_shift_range and horizontal_flip work just fine...

### Some handy codes.