In [1]:
%matplotlib inline
import pandas as pd

In [2]:
#path = "data/kaggle/dogscats/"
#path = "data/kaggle/dogscats/sample/"
path = "data/kaggle/dogscats/all/"  # no validation data diff from training. for submission purposes.

In [3]:
from __future__ import division,print_function

import os, json
from glob import glob
import numpy as np
np.set_printoptions(precision=4, linewidth=100)
from matplotlib import pyplot as plt

In [4]:
from utils import plots

Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5103)
Using Theano backend.


In [5]:
import keras
from keras.preprocessing import image

In [6]:
# Mean of each channel as provided by VGG researchers
vgg_mean = np.array([123.68, 116.779, 103.939]).reshape((3,1,1))

def vgg_preprocess(x):
    x = x - vgg_mean     # subtract mean
    return x[:, ::-1]    # reverse axis bgr->rgb

In [7]:
def addConvBlock(layers, model, filters):
    for i in range(layers): 
        model.add(keras.layers.convolutional.ZeroPadding2D((1,1)))
        model.add(keras.layers.convolutional.Convolution2D(filters, 3, 3, activation='relu'))
    model.add(keras.layers.convolutional.MaxPooling2D((2,2), strides=(2,2)))

In [8]:
def addFCBlock(model):
    model.add(keras.layers.Dense(4096, activation='relu'))
    model.add(keras.layers.Dropout(0.5))

In [9]:
def create_vgg16():
    model = keras.models.Sequential()
    model.add(keras.layers.core.Lambda(vgg_preprocess, input_shape=(3,224,224)))
    addConvBlock(2, model, 64)
    addConvBlock(2, model, 128)
    addConvBlock(3, model, 256)
    addConvBlock(3, model, 512)
    addConvBlock(3, model, 512)    
    model.add(keras.layers.Flatten())
    addFCBlock(model)
    addFCBlock(model)
    model.add(keras.layers.core.Dense(1000, activation='softmax'))
    return model

In [10]:
model = create_vgg16()

In [11]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
lambda_1 (Lambda)                (None, 3, 224, 224)   0           lambda_input_1[0][0]             
____________________________________________________________________________________________________
zeropadding2d_1 (ZeroPadding2D)  (None, 3, 226, 226)   0           lambda_1[0][0]                   
____________________________________________________________________________________________________
convolution2d_1 (Convolution2D)  (None, 64, 224, 224)  1792        zeropadding2d_1[0][0]            
____________________________________________________________________________________________________
zeropadding2d_2 (ZeroPadding2D)  (None, 64, 226, 226)  0           convolution2d_1[0][0]            
___________________________________________________________________________________________

In [12]:
from keras.utils.data_utils import get_file

In [13]:
weights_path = get_file(
   fname='vgg16.h5',
   origin='http://www.platform.ai/models/vgg16.h5',
   cache_subdir='models'
)

In [14]:
model.load_weights(weights_path)

In [15]:
batch_size = 64

In [16]:
train_batches = image.ImageDataGenerator().flow_from_directory(
        path + 'train', target_size=(224,224), class_mode='categorical', shuffle = True, batch_size = batch_size
    )

Found 25000 images belonging to 2 classes.


In [17]:
train_batches.nb_sample

25000

In [18]:
valid_batches = image.ImageDataGenerator().flow_from_directory(
        path + 'valid', target_size=(224,224), class_mode='categorical', shuffle = True, batch_size = batch_size
    )

Found 2000 images belonging to 2 classes.


In [19]:
imgs, labels = next(train_batches)

In [20]:
#plots(imgs, titles=labels)

In [21]:
#imgs[0].shape

In [22]:
model.pop()

In [23]:
for layer in model.layers: 
    layer.trainable=False

In [24]:
model.add(keras.layers.core.Dense(2, activation='softmax'))

In [25]:
model.compile(optimizer=keras.optimizers.Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [26]:
model.fit_generator(train_batches, samples_per_epoch=train_batches.nb_sample, nb_epoch=1,
                validation_data=valid_batches, nb_val_samples=valid_batches.nb_sample)

Epoch 1/1


<keras.callbacks.History at 0x7fa6c5128d10>

In [27]:
model.fit_generator(train_batches, samples_per_epoch=train_batches.nb_sample, nb_epoch=1,
                validation_data=valid_batches, nb_val_samples=valid_batches.nb_sample)

Epoch 1/1


<keras.callbacks.History at 0x7fa6baa0b750>

In [28]:
model.compile(optimizer=keras.optimizers.Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

In [29]:
model.fit_generator(train_batches, samples_per_epoch=train_batches.nb_sample, nb_epoch=1,
                validation_data=valid_batches, nb_val_samples=valid_batches.nb_sample)

Epoch 1/1


<keras.callbacks.History at 0x7fa6baeaf690>

In [30]:
test_batches = image.ImageDataGenerator().flow_from_directory(
        path + 'test', target_size=(224,224), class_mode=None, shuffle = False, batch_size = 8
    )

Found 12500 images belonging to 1 classes.


In [31]:
#plots(next(test_batches))

In [32]:
preds = model.predict_generator(test_batches, test_batches.nb_sample)

In [33]:
results = [(b.split('/')[1][:-4], pred[1]) for b, pred in zip(test_batches.filenames, preds)]

In [34]:
results[0:5]

[('9292', 2.7533845e-11),
 ('12026', 0.11292451),
 ('9688', 1.9169815e-06),
 ('4392', 1.4102052e-14),
 ('779', 1.0)]

In [35]:
df = pd.DataFrame(results, columns=['id', 'label'])

In [36]:
df['label'] = df['label'].clip(0.02, 0.98)

In [37]:
df.head()

Unnamed: 0,id,label
0,9292,0.02
1,12026,0.112925
2,9688,0.02
3,4392,0.02
4,779,0.98


In [38]:
df.to_csv('data/kaggle/dogscats/plain_vgg_assgn2_clip2_all.csv', index=False)