This notebook was inspired by Thomas' kernel: https://www.kaggle.com/ponadto/complete-process-using-resnet-as-a-starting-point (thanks Thomas!)

I wanted to use a simpler architecture (https://github.com/fchollet/keras/blob/master/examples/cifar10_cnn.py), but get more out of image augmentation. I'm using `keras`'es `ImageDataGenerator` with the `flow_from_directory` functionality (images generated directly from files in a specified directory).

In [None]:
from __future__ import division

from keras.models import Model
from keras.models import Sequential
from keras.layers import Input, Activation, merge, Dense, Flatten
from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D

from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers.normalization import BatchNormalization
from keras import optimizers

from keras.preprocessing.image import ImageDataGenerator

## Config declarations

In [None]:
conf = dict()

conf['seed'] = 2017
conf['num_classes'] = 3
conf['num_channels'] = 3

# Amount of data for training
conf['num_epochs'] = 10#10
conf['batch_size'] = 12#32
conf['steps_per_epoch'] = 16
conf['validation_steps'] = 16

conf['num_workers'] = 1#8

# Shape of image for CNN (Larger the better, but you need to increase CNN as well)
im_sz = 64
conf['image_shape'] = (im_sz, im_sz)

# I wanted to see how will the model perform in case I use "poolings" other than `MaxPooling2D`
# conf['pooling_strategy'] = AveragePooling2D
conf['pooling_strategy'] = MaxPooling2D

## Create a simple model

In [None]:
def create_model(conf):
    ''' I borrowed the model from here: 
            https://github.com/fchollet/keras/blob/master/examples/cifar10_cnn.py
    '''
    num_channels = conf['num_channels']
    num_classes = conf['num_classes']
    img_rows, img_cols = conf['image_shape']
    PoolingStrategy = conf['pooling_strategy']
    
    model = Sequential()

    model.add(Conv2D(32, (3, 3), padding='valid',
                     input_shape=(img_rows, img_cols, num_channels)))

    model.add(Activation('relu'))
    model.add(Conv2D(32, (3, 3))) 
    model.add(Activation('relu'))
    model.add(PoolingStrategy(pool_size=(2, 2)))
    model.add(Dropout(0.25))

#     model.add(Conv2D(64, (3, 3), padding='valid'))
#     model.add(Activation('relu'))
#     model.add(Conv2D(64, (3, 3))) 
#     model.add(Activation('relu'))
#     model.add(PoolingStrategy(pool_size=(2, 2)))
#     model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(BatchNormalization())
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes))
    model.add(Activation('softmax'))

    adam = optimizers.adam(lr=0.0001, decay=1e-6)

    model.compile(loss='categorical_crossentropy',
                  optimizer=adam,
                  metrics=['accuracy'])

    return model


# The actual creation of the model
model = create_model(conf)

## Fitting the model using the `ImageDataGenerator` (with no actual validation set -- "It's a trap!")

In [None]:
print('Fit model...')

# There was a problem with truncated images, solution found here:
# http://stackoverflow.com/questions/12984426/python-pil-ioerror-image-file-truncated-with-big-images
# and here's the solution...
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True


SCALING = 1/255.

train_datagen = ImageDataGenerator(
                    rescale=SCALING,
                    shear_range=0.2,
                    zoom_range=0.2,
                    horizontal_flip=True,
                    vertical_flip=True)

train_generator = train_datagen.flow_from_directory(
                    '../input/train',
                    target_size=conf['image_shape'],
                    batch_size=conf['batch_size'],
                    seed=conf['seed'])

# Not really used; `model.predict_generator` failed after ~100 steps
test_datagen = ImageDataGenerator(
                    rescale=SCALING)
test_generator = test_datagen.flow_from_directory(
                    '../input/',
                    target_size=conf['image_shape'],
                    batch_size=conf['batch_size'],
                    classes=['test'],
                    class_mode=None,
                    shuffle=False)

model.fit_generator(train_generator,
                    steps_per_epoch=conf['steps_per_epoch'],
                    epochs=conf['num_epochs'],
                    validation_data=train_generator,
                    validation_steps=conf['validation_steps'],
                    workers=conf['num_workers'])

model.save('last_model.h5')

## Create submission files with prediction for submission

In [None]:
import pandas as pd
import cv2
import numpy as np

sample_subm = pd.read_csv('../input/sample_submission.csv')
ids = sample_subm['image_name'].values

for id_ in ids:
    print('Predict for image {}'.format(id_))
    f = '../input/test/' + id_
    image_list = []
    
    image = cv2.imread(f)
    image = cv2.resize(image, conf['image_shape'])
    image = SCALING*image
    image_list.append(image)
        
    image_list = np.array(image_list)

    predictions = model.predict(image_list, verbose=1, batch_size=1)

    sample_subm.loc[sample_subm['image_name'] == id_, 'Type_1'] = predictions[0, 0]
    sample_subm.loc[sample_subm['image_name'] == id_, 'Type_2'] = predictions[0, 1]
    sample_subm.loc[sample_subm['image_name'] == id_, 'Type_3'] = predictions[0, 2]
    
sample_subm.to_csv("subm.csv", index=False)