# Steps:

1. Download data
    * create validation data set
    * create sample set
2. setup VGG model
3. load train and validation data sets
4. train on batches
5. predict

In [None]:
%matplotlib inline

## 1. Download data

Run:

password=[...] sh download.sh

In [1]:
import os 
dir_path = os.getcwd()
dir_path

'/fastai'

In [2]:
path=dir_path + "/data/sample/"
results_path = dir_path + "/results/"
%mkdir $results_path

mkdir: cannot create directory '/fastai/results/': File exists


## 2. Setup Inception V3

In [5]:
from inception_v3 import InceptionV3, preprocess_input
from keras.layers import Input
from keras.layers.core import Lambda
from keras.models import Model
from keras import backend as K

In [21]:
input = Input(shape=(3,299,299))
preprocessor = Lambda(preprocess_input, input_shape=(3,299,299), output_shape=(3,299,299))
pre = Input(shape=(3,299,299))
inception = InceptionV3(include_top=False, input_tensor=pre)
for layer in inception.layers: layer.trainable = False
inception_output = inception(preprocessor(input))

# use these instead if no preprocessing
# input = pre
# inception_output = inception.output

theano ordering


## 3. Load data

In [23]:
batch_size=4
nb_epochs = 1
lr = 0.01
target_size = (299, 299)

In [13]:
from keras.preprocessing import image

In [14]:
# gen = image.ImageDataGenerator(rotation_range=5,width_shift_range=0.1,height_shift_range=0.1,zoom_range=0.3,horizontal_flip=True)
gen = image.ImageDataGenerator()
batches = gen.flow_from_directory(path+"train", target_size=(299,299), class_mode="categorical", batch_size=batch_size)
val_batches = gen.flow_from_directory(path+"valid", target_size=(299,299), class_mode="categorical", batch_size=batch_size*2)

Found 160 images belonging to 2 classes.
Found 40 images belonging to 2 classes.


## 4. Train

In [26]:
from keras.layers.core import Dense
from keras.layers import AveragePooling2D, Flatten, GlobalAveragePooling2D
from keras.optimizers import Adam

### Either run this cell:

In [22]:
x = AveragePooling2D((8, 8), strides=(8, 8), name='avg_pool')(inception_output)
x = Flatten(name='flatten')(x)
final_layer = Dense(batches.nb_class, activation="softmax")(x)
#inception.compile(optimizer=Adam(lr=lr), loss="categorical_crossentropy", metrics=["accuracy"])
model = Model(input, final_layer)
model.compile(optimizer=Adam(lr=lr), loss="categorical_crossentropy", metrics=["accuracy"])

### OR this one:

In [27]:
x = GlobalAveragePooling2D()(inception_output)
x = Dense(1024, activation='relu')(x)
final_layer = Dense(batches.nb_class, activation="softmax")(x)
model = Model(input, final_layer)
# model.compile(optimizer=Adam(lr=lr), loss="categorical_crossentropy", metrics=["accuracy"])
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')

In [None]:
latest_weights_fname = None
for e in range(nb_epochs):
    # vgg.fit(batches, val_batches, nb_epoch=1)
    model.fit_generator(batches, samples_per_epoch=batches.nb_sample, nb_epoch=1, validation_data=val_batches, nb_val_samples=val_batches.nb_sample)
    latest_weights_fname = "inception-epoch-%d.h5" % e
    model.save_weights(latest_weights_fname)
    # model.optimizer.lr /= e

In [None]:
model.load_weights(latest_weights_fname)

## 5. Test

In [None]:
test_batches = gen.flow_from_directory(path+"test", target_size=target_size, class_mode=None, batch_size=batch_size*2)
preds = model.predict_generator(test_batches, test_batches.nb_sample)

In [None]:
filenames = test_batches.filenames

Here's the format Kaggle requires for new submissions:
```
imageId,isDog
1242, .3984
3947, .1000
4539, .9082
2345, .0000
```

Kaggle wants the imageId followed by the probability of the image being a dog. Kaggle uses a metric called [Log Loss](http://wiki.fast.ai/index.php/Log_Loss) to evaluate your submission.

In [None]:
is_dog = preds[:, 1]

In [None]:
import numpy as np
ids = np.array([int(f[8:f.find('.')]) for f in filenames])

In [None]:
sub = np.stack([ids, is_dog], axis=1)
sub[:5]

In [None]:
%cd $results_path

In [None]:
np.savetxt("subm.csv", sub, fmt="%d,%.5f", header="id,label", comments="")

In [None]:
from IPython.display import FileLink
%cd ..
FileLink("data/subm.csv")