In [1]:
%matplotlib inline

In [2]:
path = "/input/"

In [3]:
from __future__ import division,print_function

import os, json
from glob import glob
import numpy as np
np.set_printoptions(precision=4, linewidth=100)
from matplotlib import pyplot as plt

In [4]:
import utils; reload(utils)
from utils import plots

Using Theano backend.


In [5]:
from keras import backend as K

In [6]:
import keras.utils

In [7]:
from keras.layers.core import Flatten, Dense, Lambda, Dropout

In [8]:
import keras.models

In [9]:
from keras.preprocessing import image

In [10]:
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D

In [11]:
from keras.optimizers import SGD, RMSprop
from keras.utils.data_utils import get_file

In [12]:
FILES_PATH = 'http://www.platform.ai/models/'; CLASS_FILE='imagenet_class_index.json'
# Keras' get_file() is a handy function that downloads files, and caches them for re-use later
fpath = get_file(CLASS_FILE, FILES_PATH+CLASS_FILE, cache_subdir='models')
with open(fpath) as f: class_dict = json.load(f)
# Convert dictionary with string indexes into an array
classes = [class_dict[str(i)][1] for i in range(len(class_dict))]

## Let's make that model!

We will need one convolutional and one dense block.

In [13]:
def add_convolutional_block(layers, model, filters):
    for i in range(layers):
        model.add(ZeroPadding2D((1,1)))
        model.add(Convolution2D(filters, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))

The function above will add a number of padding and convolution layers to a given model, and finish with a final max pooling layer.

`conv block = (padding, convo)*n + max pooling`

In [14]:
def add_dense_block(model):
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))

Dense blocks use dropout of 50%.

Some VGG-specific stuff:

In [15]:
vgg_mean = np.array([123.68, 116.779, 103.939]).reshape((3,1,1))

def vgg_preprocess(x):
    x = x - vgg_mean     # subtract mean
    return x[:, ::-1]    # reverse axis bgr->rgb

In [16]:
def VGG_16():
    model = Sequential()
    model.add(Lambda(vgg_preprocess, input_shape=(3,224,224)))

    add_convolutional_block(2, model, 64)
    add_convolutional_block(2, model, 128)
    add_convolutional_block(3, model, 256)
    add_convolutional_block(3, model, 512)
    add_convolutional_block(3, model, 512)

    model.add(Flatten())
    add_dense_block(model)
    add_dense_block(model)
    model.add(Dense(1000, activation='softmax'))
    return model

In [17]:
from keras.models import Sequential

In [18]:
model = VGG_16()

  .format(self.name, input_shape))


In [19]:
fpath = get_file('vgg16.h5', FILES_PATH+'vgg16.h5', cache_subdir='models')
model.load_weights(fpath)

In [20]:
batch_size = 4

In [21]:
def get_batches(dirname, gen=image.ImageDataGenerator(), shuffle=True, 
                batch_size=batch_size, class_mode='categorical'):
    return gen.flow_from_directory(path+dirname, target_size=(224,224), 
                class_mode=class_mode, shuffle=shuffle, batch_size=batch_size)

In [22]:
batches = get_batches('train', batch_size=batch_size)
val_batches = get_batches('valid', batch_size=batch_size)

Found 20 images belonging to 2 classes.
Found 20 images belonging to 2 classes.


In [25]:
classes = [u'cat', u'dog']

In [24]:
def pred_batch(imgs):
    preds = model.predict(imgs)
    idxs = np.argmax(preds, axis=1)
    
    probs = []
    for i in range(len(idxs)):
        probs.append(preds[i, 1])
    return probs

## Let's sort this mess to get answers for data!

In [31]:
out_dir = "/output/"

Finetuning and fitting (from vgg16.py)

In [32]:
model.pop()
for layer in model.layers: layer.trainable=False
model.add(Dense(batches.nb_class, activation='softmax'))
model.compile(optimizer=RMSprop(lr=0.001),
                loss='categorical_crossentropy', metrics=['accuracy'])

model.fit_generator(batches, samples_per_epoch=batches.nb_sample, nb_epoch=1,
                validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

Epoch 1/1


<keras.callbacks.History at 0x7f55a79cc090>

In [33]:
def predict_for(imgs):
    preds = model.predict(imgs)
    idxs = np.argmax(preds, axis=1)
    
    probs = []
    for i in range(len(idxs)):
        # only dogs interest us...
        #print ('  {:.4f}'.format(preds[i, 1]))
        probs.append(preds[i, 1])
    return probs

In [34]:
test_batches = get_batches('test', batch_size=100, shuffle=False, class_mode=None)

Found 12500 images belonging to 1 classes.


In [35]:
ids = sorted(map(lambda s: s.split('.')[0], os.listdir(path+'test/unknown')))

In [36]:
import csv

def make_submission(batches, ids):
    """Make a Kaggle contest submission"""
    id_idx = 0
    with open(out_dir+'submission.csv', 'wb') as submission_file:
        wr = csv.writer(submission_file, quoting=csv.QUOTE_NONE)
        wr.writerow(['id', 'label'])
        for imgs in batches:
            probs = predict_for(imgs)
            for prob in probs:
                wr.writerow([ids[id_idx], prob])
                id_idx += 1

In [None]:
make_submission(test_batches, ids)