In [None]:
%matplotlib inline
import utils
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Flatten, Lambda
from keras.layers import Convolution2D, MaxPooling2D
from keras.preprocessing.image import ImageDataGenerator
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam
from __future__ import print_function
import os, sys, glob, gc

### Global Variables

In [None]:
bdir="/home/ubuntu/nbs/courses/deeplearning1/nbs/data/statefarm/"
dosetup=0

ddir=bdir
#ddir=bdir + "/sample/"
sdir=bdir + "/sample/"
trdir=ddir + "/train/"
tedir=ddir + "/test/"
vdir=ddir + "/valid/"
rdir=ddir + "/results/"

In [None]:
def moveallimages(src, pattern, dest):
    imgs=glob.glob(src + "/" + pattern)
    for img in imgs:
        os.rename(img, dest + "/" + os.path.basename(img))

# Function to copy/move nimages random images from 'src' to 'dest'.
from shutil import copyfile        
def moverandimages(src, nimages, dest, docopy=0):
    imgs=glob.glob(src + "/" + "*.jpg")
    shuff = np.random.permutation(imgs)
    for i in range(nimages):
        if (docopy == 0):
            os.rename(shuff[i], dest + "/" + os.path.basename(shuff[i]))
        else:
            copyfile(shuff[i], dest + "/" + os.path.basename(shuff[i]))

if (dosetup == 1):
    for c in range(10):
        cdir="c%d" %c
        # Create validation set with 100 images from each class
        moverandimages(trdir + "/" + cdir, 100, vdir + "/" + cdir)
        # Create sample training set with 100 images from each class
        moverandimages(trdir + "/" + cdir, 100, sdir + "/train/" + cdir, docopy=1)
        # Create sample validation set with 50 images from each class
        moverandimages(trdir + "/" + cdir, 50, sdir + "/valid/" + cdir, docopy=1)
    
    # Create sample test set with 4000 random images
    moverandimages(tedir + "/unknown/", 4000, sdir + "/test/unknown/", docopy=1)        
    print("Setup Done")

### Start with VGG16 finetuned for 10 class classification

In [None]:
from vgg16 import Vgg16
vgg=Vgg16()

In [None]:
trn_batch=utils.get_batches(trdir)
val_batch=utils.get_batches(vdir)

In [None]:
vgg.finetune(trn_batch)

In [None]:
vgg.fit(trn_batch, val_batch, nb_epoch=1)

In [None]:
vgg.model.summary()

### Create a convolution model based on 'more filters and larger convolution kernel'

In [None]:
cm = Sequential([
            BatchNormalization(axis=1, input_shape=(3, 224, 224)),
            Convolution2D(32, 3, 3, border_mode='same', activation='relu'),
            BatchNormalization(axis=1),
            MaxPooling2D(border_mode='same'),
            Convolution2D(64, 3, 3, border_mode='same', activation='relu'),
            BatchNormalization(axis=1),
            MaxPooling2D(border_mode='same'),
            Convolution2D(128, 3, 3, border_mode='same', activation='relu'),
            BatchNormalization(axis=1)
            Flatten(),
            Dense(512, activation='relu'),
            Dropout(0.5),
            BatchNormalization(),
            Dense(256, activation='relu'),
            Dropout(0.5),
            BatchNormalization(),
            Dense(10, activation='softmax')
        ])
cm.summary()             

In [None]:
cm.compile(optimizer=Adam(lr=1e-3), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
trn_batch=utils.get_batches(trdir, batch_size=8)
val_batch=utils.get_batches(vdir, batch_size=8)

In [None]:
cm.fit_generator(trn_batch, trn_batch.N, nb_epoch=5, validation_data=val_batch, nb_val_samples=val_batch.N)

### VGG16 with dense layer finetuning

In [None]:
from vgg16 import Vgg16
vgg=Vgg16()
vm = vgg.model

In [None]:
conv_layers_idx=[i for (i, l) in enumerate(vm.layers) if type(l)==Convolution2D] # all convolution layers
last_conv_layer=conv_layers_idx[-1]
conv_layers=vm.layers[:last_conv_layer+1] # All layers upto last convolution layer

In [None]:
cvm=Sequential(conv_layers)  # VGG model with convolution layers
cvm.summary()

In [None]:
trn_batch=utils.get_batches(trdir, batch_size=128, shuffle=False)
val_batch=utils.get_batches(vdir, batch_size=128, shuffle=False)
tst_batch=utils.get_batches(tedir, batch_size=128, shuffle=False)

trn_features = cvm.predict_generator(trn_batch, trn_batch.nb_sample)
val_features = cvm.predict_generator(val_batch, val_batch.nb_sample)
tst_features = cvm.predict_generator(tst_batch, tst_batch.nb_sample)

utils.save_array(rdir + "cvm_train_features.dat", trn_features)
utils.save_array(rdir + "cvm_val_features.dat", val_features)
utils.save_array(rdir + "cvm_test_features.dat", tst_features)
utils.save_array(rdir + "cvm_train_labels.dat", trn_batch.classes)
utils.save_array(rdir + "cvm_val_labels.dat", val_batch.classes)

In [None]:
# Load saved features that were output by convolution layers of vgg model
trn_features = utils.load_array(rdir + "cvm_train_features.dat")
val_features = utils.load_array(rdir + "cvm_val_features.dat")
#tst_features = utils.load_array(rdir + "cvm_test_features.dat") # Causes memory overrun, load this just before predict
trn_labels = utils.load_array(rdir + "cvm_train_labels.dat")
val_labels = utils.load_array(rdir + "cvm_val_labels.dat")

In [None]:
fvm = Sequential([
        MaxPooling2D(input_shape=conv_layers[-1].output_shape[1:]),
        Flatten(),
        Dropout(0.5),
        Dense(256, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(10, activation='softmax')
    ])
fvm.summary()

In [None]:
fvm.compile(Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
fvm.fit(trn_features, utils.onehot(trn_labels), batch_size=64, nb_epoch=5, 
        validation_data=(val_features,utils.onehot(val_labels)))

In [None]:
fvm.save_weights(rdir + "fvm_wts.dat")

In [None]:
tst_features = utils.load_array(rdir + "cvm_test_features.dat")

In [None]:
# Explicitly free some memory corresponding to unused variables
del(trn_features)
del(trn_labels)
del(val_features)
del(val_labels)
gc.collect()

In [None]:
fvm_pred = fvm.predict(tst_features, batch_size=128)

In [None]:
utils.save_array(rdir + "fvm_pred.dat", fvm_pred)

In [None]:
tst_batch=utils.get_batches(tedir, batch_size=128, shuffle=False)
trn_batch=utils.get_batches(trdir, batch_size=128, shuffle=False)

In [None]:
fnames=tst_batch.filenames
imgs = np.array([f[8:] for f in fnames])

In [None]:
fvm_pred = np.clip(fvm_pred, 0.03, 0.97)
subm = pd.DataFrame(fvm_pred, columns=sorted(trn_batch.class_indices, key=trn_batch.class_indices.get))
subm.insert(0, 'img', imgs)
subm.to_csv(rdir + "fvm_results.csv", index=False)

In [None]:
subm.head()

### VGG with data augmentation