To start you will need to download and unzip the competition data from Kaggle and ensure your directory structure looks like this
```
utils/
    vgg16.py
    utils.py
lesson1/
    redux.ipynb
    data/
        redux/
            train/
                cat.437.jpg
                dog.9924.jpg
                cat.1029.jpg
                dog.4374.jpg
            test/
                231.jpg
                325.jpg
                1235.jpg
                9923.jpg
```

You can download the data files from the competition page [here](https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition/data) or you can download them from the command line using the [Kaggle CLI](https://github.com/floydwch/kaggle-cli).

You should launch your notebook inside the lesson1 directory
```
cd lesson1
jupyter notebook
```

In [1]:
%pwd

u'/home/ubuntu/courses/deeplearning1/nbs/lesson1'

In [2]:
import os, sys
cur_dir = os.getcwd()
lesson_home_dir = cur_dir
data_dir = cur_dir + '/data/redux'

In [3]:
#Allow relative imports to directories above lesson1/
sys.path.insert(1, os.path.join(sys.path[0], '..'))

#import modules
from utils import *
from vgg16 import Vgg16

#Instantiate plotting tool
#In Jupyter notebooks, you will need to run this command before doing any plotting
%matplotlib inline

Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5103)
Using Theano backend.


## Action Plan
1. Create Validation and Sample sets - done
2. Rearrange image files into their respective directories - done
3. Finetune and Train model
4. Generate predictions
5. Validate predictions
6. Submit predictions to Kaggle

In [5]:
#Start by setting up the directories. This only should be run once

%cd $data_dir
%mkdir valid
%mkdir results
%mkdir -p sample/train
%mkdir -p sample/test
%mkdir -p sample/valid
%mkdir -p sample/results
%mkdir -p test/unknown

/home/ubuntu/courses/deeplearning1/nbs/lesson1/data/redux


In [12]:
#Move 2000 training instances to the validation folder - only run once

%cd $data_dir/train
g = glob('*jpg') #gets a list of every file matching the pattern
shuf = np.random.permutation(g)
for i in range(2000):    
    os.rename(shuf[i], data_dir + '/valid/' + shuf[i]) #renaming similar to mv

/home/ubuntu/courses/deeplearning1/nbs/lesson1/data/redux/train


In [14]:
#copy over 200,50 training and validation images to the sample folder - only run once
from shutil import copyfile
g = glob('*.jpg')
shuf = np.random.permutation(g)
for i in range(250):
    if i < 200:
        copyfile(shuf[i], data_dir + '/sample/train/' + shuf[i])
    else:
        copyfile(shuf[i], data_dir +'/sample/valid/' + shuf[i])


In [16]:
#Now create a directory for cats and for dogs - only run once
%cd $data_dir/train
#%mkdir dogs
#%mkdir cats
%mv dog.*.jpg dogs/
%mv cat.*.jpg cats/

%cd $data_dir/valid
#%mkdir dogs
#%mkdir cats
%mv dog.*.jpg dogs/
%mv cat.*.jpg cats/

%cd $data_dir/sample/train
#%mkdir dogs
#%mkdir cats
%mv dog.*.jpg dogs/
%mv cat.*.jpg cats/

%cd $data_dir/sample/valid
#%mkdir dogs
#%mkdir cats
%mv dog.*.jpg dogs/
%mv cat.*.jpg cats/

/home/ubuntu/courses/deeplearning1/nbs/lesson1/data/redux/train
/home/ubuntu/courses/deeplearning1/nbs/lesson1/data/redux/valid
/home/ubuntu/courses/deeplearning1/nbs/lesson1/data/redux/sample/train
/home/ubuntu/courses/deeplearning1/nbs/lesson1/data/redux/sample/valid


## Finetune the models and train

In [22]:
#set up packages and path
sys.path.append('/home/ubuntu/courses/deeplearning1/nbs/utils')
from __future__ import division,print_function

import os, json
from glob import glob
import numpy as np
np.set_printoptions(precision=4, linewidth=100)
from matplotlib import pyplot as plt

#pre-trained model package
import vgg16; reload(vgg16)
from vgg16 import Vgg16

def getPath(sample):
    #Takes in a boolean
    if sample:
        return "/home/ubuntu/courses/deeplearning1/nbs/lesson1/data/redux/sample"
    else:
        return "/home/ubuntu/courses/deeplearning1/nbs/lesson1/data/redux"

In [24]:
#Now train and fine tune, using code from lesson 1

batch_size = 64
path = getPath(True)

vgg = Vgg16()
# Grab a few images at a time for training and validation.
# NB: They must be in subdirectories named based on their category
batches = vgg.get_batches(path + '/train', batch_size = batch_size)
val_batches = vgg.get_batches(path + '/valid', batch_size = batch_size * 2)

vgg.finetune(batches)
vgg.fit(batches, val_batches, nb_epoch = 1)

Found 200 images belonging to 2 classes.
Found 50 images belonging to 2 classes.
Epoch 1/1


## Create a prediction function

In [None]:
def getPreds(mod, pred_path, batch_size = 8):
    pred_batches, preds = mod.test(path, batch_size)
