# Steps:

1. Download data
    * create validation data set
    * create sample set
2. setup VGG model
3. load train and validation data sets
4. train on batches
5. predict

In [1]:
%matplotlib inline

## 1. Download data

Run:

password=[...] sh download.sh

In [2]:
import os 
dir_path = os.getcwd()
dir_path

'/fastai'

In [36]:
path=dir_path + "/data/sample/"
results_path = dir_path + "/results/"
%mkdir $results_path

mkdir: cannot create directory '/fastai/results/': File exists


## 2. Setup Inception V3

In [4]:
from inception_v3 import InceptionV3
from keras.layers import Input

Using Theano backend.


In [5]:
input = Input(shape=(3,299,299))
inception = InceptionV3(include_top=False, input_tensor=input)

theano ordering
Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.2/inception_v3_weights_th_dim_ordering_th_kernels_notop.h5


## 3. Load data

In [21]:
batch_size=4
nb_epochs = 10
lr = 0.01
target_size = (299, 299)

In [7]:
from keras.preprocessing import image

In [8]:
# gen = image.ImageDataGenerator(rotation_range=5,width_shift_range=0.1,height_shift_range=0.1,zoom_range=0.3,horizontal_flip=True)
gen = image.ImageDataGenerator()
batches = gen.flow_from_directory(path+"train", target_size=(299,299), class_mode="categorical", batch_size=batch_size)
val_batches = gen.flow_from_directory(path+"valid", target_size=(299,299), class_mode="categorical", batch_size=batch_size*2)

Found 160 images belonging to 2 classes.
Found 40 images belonging to 2 classes.


## 4. Train

In [9]:
from keras.layers.core import Dense
from keras.layers import AveragePooling2D, Flatten
from keras.models import Model
from keras.optimizers import Adam

In [17]:
for layer in inception.layers: layer.trainable = False

x = AveragePooling2D((8, 8), strides=(8, 8), name='avg_pool')(inception.output)
x = Flatten(name='flatten')(x)
final_layer = Dense(batches.nb_class, activation="softmax")(x)
#inception.compile(optimizer=Adam(lr=lr), loss="categorical_crossentropy", metrics=["accuracy"])
model = Model(input, final_layer)
model.compile(optimizer=Adam(lr=lr), loss="categorical_crossentropy", metrics=["accuracy"])

In [18]:
latest_weights_fname = None
for e in range(nb_epochs):
    # vgg.fit(batches, val_batches, nb_epoch=1)
    model.fit_generator(batches, samples_per_epoch=batches.nb_sample, nb_epoch=1, validation_data=val_batches, nb_val_samples=val_batches.nb_sample)
    latest_weights_fname = "inception-epoch-%d.h5" % e
    model.save_weights(latest_weights_fname)
    # model.optimizer.lr /= e



Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1

KeyboardInterrupt: 

In [None]:
model.load_weights(latest_weights_fname)

## 5. Test

In [25]:
test_batches = gen.flow_from_directory(path+"test", target_size=target_size, class_mode=None, batch_size=batch_size*2)
preds = model.predict_generator(test_batches, test_batches.nb_sample)

Found 200 images belonging to 1 classes.


In [26]:
filenames = test_batches.filenames

Here's the format Kaggle requires for new submissions:
```
imageId,isDog
1242, .3984
3947, .1000
4539, .9082
2345, .0000
```

Kaggle wants the imageId followed by the probability of the image being a dog. Kaggle uses a metric called [Log Loss](http://wiki.fast.ai/index.php/Log_Loss) to evaluate your submission.

In [27]:
is_dog = preds[:, 1]

In [28]:
import numpy as np
ids = np.array([int(f[8:f.find('.')]) for f in filenames])

In [29]:
sub = np.stack([ids, is_dog], axis=1)
sub[:5]

array([[  5.28400000e+03,   9.99739289e-01],
       [  9.38000000e+02,   2.25643518e-10],
       [  7.99500000e+03,   9.99391913e-01],
       [  2.90000000e+03,   9.99999762e-01],
       [  2.08000000e+02,   1.73032924e-14]])

In [33]:
%cd $results_path

/fastai/results


In [34]:
np.savetxt("subm.csv", sub, fmt="%d,%.5f", header="id,label", comments="")

In [35]:
from IPython.display import FileLink
%cd ..
FileLink("data/subm.csv")

/fastai
