In [1]:
%matplotlib inline
%pwd

'/home/gabe/work/fast-ai/nbs'

In [2]:
%%html
<style>
  .end_space {
      min-height: 1000px;
  }
  .container {
      width: 100%;
  }
</style

In [3]:
import sys
import os
import datetime as dt
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from importlib import reload
from mpl_toolkits.axes_grid1 import ImageGrid
from tqdm import tqdm
from os import listdir, makedirs
from os.path import join, abspath, exists, isdir, isfile

np.random.seed(seed=2017)
sys.path.append('../src')

In [88]:
import utils.dogbreed
reload(utils.dogbreed)

import utils.utils
reload(utils.utils)

import utils.trainhelper
reload(utils.trainhelper)

import utils.mix_iterator
reload(utils.mix_iterator)

from utils.dogbreed import *
from utils.trainhelper import save_model, read_model, get_classes, get_batches, get_data, un_onehot
from utils.utils import save_array, load_array, read_img, get_steps, do_clip
from utils.mix_iterator import MixIterator

In [5]:
from keras.models import Model
from keras.models import Sequential
from keras.layers import Flatten
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Input
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import GlobalAveragePooling2D
from keras.layers import GlobalMaxPooling2D
from keras.layers import BatchNormalization
from keras.optimizers import Adam, Nadam
from keras.regularizers import l2
from keras.preprocessing import image
from keras.applications.vgg16 import VGG16
from keras.applications.vgg19 import VGG19
from keras.applications.resnet50 import ResNet50
from keras.applications import xception, inception_v3
from keras.utils import to_categorical as onehot

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss, accuracy_score

In [51]:
DATA_DIR = '../data/dogbreed'
SAMPLE_DIR = join(DATA_DIR, 'sample')

BATCH_SIZE = 64
IMG_SIZE = 299
TARGET_SIZE = (IMG_SIZE, IMG_SIZE)
TARGET_SHAPE = (IMG_SIZE, IMG_SIZE, 3)

POOLING = 'avg'
NUM_CLASSES = 120
SEED = 1987

In [69]:
train_dir = join(DATA_DIR, 'train')
valid_dir = join(DATA_DIR, 'valid')
save_dir = join(DATA_DIR, 'imgsave')

train_gen = image.ImageDataGenerator(
    preprocessing_function=xception.preprocess_input,
    rotation_range=10,
    height_shift_range=0.05,
    horizontal_flip=True,
    shear_range=0.1,
    zoom_range=0.1,
    channel_shift_range=10,
    width_shift_range=0.1
)
valid_gen = image.ImageDataGenerator(preprocessing_function=xception.preprocess_input)

train_batches = get_batches(train_dir, gen=train_gen, batch_size=BATCH_SIZE, target_size=TARGET_SIZE, shuffle=True, save_to_dir=save_dir)
valid_batches = get_batches(valid_dir, gen=valid_gen, batch_size=BATCH_SIZE, target_size=TARGET_SIZE, shuffle=False)

train_steps = get_steps(train_batches)
valid_steps = get_steps(valid_batches)

Found 8221 images belonging to 120 classes.
Found 2001 images belonging to 120 classes.


In [19]:
x_model = xception.Xception(weights='imagenet', include_top=False, pooling=None, input_shape=TARGET_SHAPE)

for layer in x_model.layers:
    layer.trainable = False
    
x = MaxPooling2D()(x_model.output)
x = Flatten()(x)
x = Dropout(0.5)(x)
x = Dense(512, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.6)(x)
x = Dense(512, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.7)(x)
x = Dense(NUM_CLASSES, activation='softmax')(x)

d_model= Model(x_model.inputs, x, name='xception-extended')
d_model.compile(Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
d_model.fit_generator(train_batches, train_steps, epochs=3, validation_data=valid_batches, validation_steps=valid_steps)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f0a5bfb4d68>

In [32]:
d_model.optimizer.lr = 0.01
d_model.fit_generator(train_batches, train_steps, epochs=5, validation_data=valid_batches, validation_steps=valid_steps)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f0a5b56bac8>

In [70]:
valid_batches.batch_size
valid_batches.samples

batches = MixIterator([train_batches, valid_batches])
batches.next()

(array([[[[-0.77089763, -0.67194068, -0.61144429],
          [-0.77089763, -0.67194068, -0.61144429],
          [-0.77089763, -0.67194068, -0.61144429],
          ..., 
          [-0.77874076, -0.45233279, -0.34477764],
          [-0.77874076, -0.47586221, -0.36046392],
          [-0.77874076, -0.47586221, -0.36046392]],
 
         [[-0.75521135, -0.64841127, -0.58007175],
          [-0.75521135, -0.64841127, -0.58007175],
          [-0.75521135, -0.64841127, -0.58007175],
          ..., 
          [-0.77874076, -0.45233279, -0.34477764],
          [-0.77874076, -0.47586221, -0.36046392],
          [-0.73952508, -0.43664652, -0.32124823]],
 
         [[-0.75521135, -0.62488186, -0.56438547],
          [-0.75521135, -0.62488186, -0.56438547],
          [-0.75521135, -0.62488186, -0.56438547],
          ..., 
          [-0.77874076, -0.45233279, -0.34477764],
          [-0.77874076, -0.47586221, -0.36046392],
          [-0.73952508, -0.43664652, -0.32124823]],
 
         ..., 
         [

In [71]:
batches = MixIterator([train_batches, valid_batches])
d_model.optimizer.lr = 0.001
d_model.fit_generator(batches, batches.steps, epochs=5, validation_data=valid_batches, validation_steps=valid_steps)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f0a5ca61898>

In [72]:
save_model(DATA_DIR, d_model)

In [73]:
test_dir = join(DATA_DIR, 'test')
test_gen = image.ImageDataGenerator(preprocessing_function=xception.preprocess_input)
test_batches = get_batches(test_dir, gen=test_gen, batch_size=BATCH_SIZE, target_size=TARGET_SIZE, shuffle=False)
test_steps = get_steps(test_batches)

Found 10357 images belonging to 1 classes.


In [76]:
test_preds = d_model.predict_generator(test_batches, steps=test_steps)

In [86]:
from os.path import basename, splitext
test_filenames = [basename(f) for f in test_batches.filenames]
test_ids = [splitext(f)[0] for f in test_filenames]

test_preds[:5]

array([[  1.05461140e-06,   2.90331997e-07,   2.12970735e-06,
          4.46580401e-07,   6.94316498e-08,   9.64339051e-06,
          9.79048536e-07,   2.33807938e-07,   2.70102959e-07,
          1.04174649e-06,   1.63911068e-07,   5.88475359e-06,
          2.57698298e-06,   3.40607642e-07,   3.46654389e-07,
          1.70070010e-07,   4.13343258e-07,   9.41586080e-08,
          2.97309128e-07,   1.02109802e-07,   5.64581114e-07,
          3.72415514e-07,   1.62197509e-08,   3.44622919e-07,
          1.95238840e-06,   2.16882384e-07,   1.18625458e-07,
          2.36946676e-07,   1.63276161e-06,   7.77639144e-08,
          1.39571540e-07,   1.86114039e-06,   5.45826481e-07,
          4.76970456e-07,   7.42538120e-08,   2.31275248e-04,
          2.40888289e-06,   3.30002621e-07,   7.99512065e-07,
          8.89042951e-07,   1.00835109e-06,   1.02805508e-07,
          1.19583228e-05,   7.97866790e-07,   2.01207513e-06,
          2.77586363e-08,   4.35983125e-08,   1.63236692e-07,
        

In [89]:
do_clip(test_preds[:5], 0.93)


array([[ 0.00777778,  0.00777778,  0.00777778,  0.00777778,  0.00777778,
         0.00777778,  0.00777778,  0.00777778,  0.00777778,  0.00777778,
         0.00777778,  0.00777778,  0.00777778,  0.00777778,  0.00777778,
         0.00777778,  0.00777778,  0.00777778,  0.00777778,  0.00777778,
         0.00777778,  0.00777778,  0.00777778,  0.00777778,  0.00777778,
         0.00777778,  0.00777778,  0.00777778,  0.00777778,  0.00777778,
         0.00777778,  0.00777778,  0.00777778,  0.00777778,  0.00777778,
         0.00777778,  0.00777778,  0.00777778,  0.00777778,  0.00777778,
         0.00777778,  0.00777778,  0.00777778,  0.00777778,  0.00777778,
         0.00777778,  0.00777778,  0.00777778,  0.00777778,  0.00777778,
         0.00777778,  0.00777778,  0.00777778,  0.00777778,  0.00777778,
         0.00777778,  0.00777778,  0.00777778,  0.00777778,  0.00777778,
         0.00777778,  0.00777778,  0.00777778,  0.00777778,  0.00777778,
         0.00777778,  0.00777778,  0.00777778,  0.0

In [106]:
raw_labels = pd.read_csv(join(DATA_DIR, 'labels.csv'))
sorted_breeds = list(set(raw_labels.breed))
sorted_breeds.sort()

sorted_breeds

['affenpinscher',
 'afghan_hound',
 'african_hunting_dog',
 'airedale',
 'american_staffordshire_terrier',
 'appenzeller',
 'australian_terrier',
 'basenji',
 'basset',
 'beagle',
 'bedlington_terrier',
 'bernese_mountain_dog',
 'black-and-tan_coonhound',
 'blenheim_spaniel',
 'bloodhound',
 'bluetick',
 'border_collie',
 'border_terrier',
 'borzoi',
 'boston_bull',
 'bouvier_des_flandres',
 'boxer',
 'brabancon_griffon',
 'briard',
 'brittany_spaniel',
 'bull_mastiff',
 'cairn',
 'cardigan',
 'chesapeake_bay_retriever',
 'chihuahua',
 'chow',
 'clumber',
 'cocker_spaniel',
 'collie',
 'curly-coated_retriever',
 'dandie_dinmont',
 'dhole',
 'dingo',
 'doberman',
 'english_foxhound',
 'english_setter',
 'english_springer',
 'entlebucher',
 'eskimo_dog',
 'flat-coated_retriever',
 'french_bulldog',
 'german_shepherd',
 'german_short-haired_pointer',
 'giant_schnauzer',
 'golden_retriever',
 'gordon_setter',
 'great_dane',
 'great_pyrenees',
 'greater_swiss_mountain_dog',
 'groenendael',


In [112]:
result = pd.DataFrame(do_clip(test_preds, 0.93), columns=sorted_breeds)
result.loc[:, 'id'] = pd.Series(test_ids, index=result.index)
result.to_csv(join(DATA_DIR, 'results', 'submission2.csv'), index=False)