In [1]:
%matplotlib inline
%pwd

'/home/gabe/work/fast-ai/nbs'

In [2]:
%%html
<style>
  .end_space {
      min-height: 1000px;
  }
  .container {
      width: 100%;
  }
</style

In [3]:
import sys
import os
import datetime as dt
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from importlib import reload
from mpl_toolkits.axes_grid1 import ImageGrid
from tqdm import tqdm
from os import listdir, makedirs
from os.path import join, abspath, exists, isdir, isfile

np.random.seed(seed=2017)
sys.path.append('../src')

In [88]:
import utils.dogbreed
reload(utils.dogbreed)

import utils.utils
reload(utils.utils)

import utils.trainhelper
reload(utils.trainhelper)

import utils.mix_iterator
reload(utils.mix_iterator)

from utils.dogbreed import *
from utils.trainhelper import save_model, read_model, get_classes, get_batches, get_data, un_onehot
from utils.utils import save_array, load_array, read_img, get_steps, do_clip
from utils.mix_iterator import MixIterator

In [5]:
from keras.models import Model
from keras.models import Sequential
from keras.layers import Flatten
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Input
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import GlobalAveragePooling2D
from keras.layers import GlobalMaxPooling2D
from keras.layers import BatchNormalization
from keras.optimizers import Adam, Nadam
from keras.regularizers import l2
from keras.preprocessing import image
from keras.applications.vgg16 import VGG16
from keras.applications.vgg19 import VGG19
from keras.applications.resnet50 import ResNet50
from keras.applications import xception, inception_v3
from keras.utils import to_categorical as onehot

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss, accuracy_score

In [51]:
DATA_DIR = '../data/dogbreed'
SAMPLE_DIR = join(DATA_DIR, 'sample')

BATCH_SIZE = 64
IMG_SIZE = 299
TARGET_SIZE = (IMG_SIZE, IMG_SIZE)
TARGET_SHAPE = (IMG_SIZE, IMG_SIZE, 3)

POOLING = 'avg'
NUM_CLASSES = 120
SEED = 1987

In [127]:
train_dir = join(DATA_DIR, 'train')
valid_dir = join(DATA_DIR, 'valid')
save_dir = join(DATA_DIR, 'imgsave')

train_gen = image.ImageDataGenerator(
    preprocessing_function=xception.preprocess_input,
    rotation_range=10,
    height_shift_range=0.05,
    horizontal_flip=True,
    shear_range=0.1,
    zoom_range=0.1,
    channel_shift_range=10,
    width_shift_range=0.1
)
valid_gen = image.ImageDataGenerator(preprocessing_function=xception.preprocess_input)

train_batches = get_batches(train_dir, gen=train_gen, batch_size=BATCH_SIZE, target_size=TARGET_SIZE, shuffle=True, save_to_dir=save_dir)
valid_batches = get_batches(valid_dir, gen=valid_gen, batch_size=BATCH_SIZE, target_size=TARGET_SIZE, shuffle=False)

train_steps = get_steps(train_batches)
valid_steps = get_steps(valid_batches)

Found 8221 images belonging to 120 classes.
Found 2001 images belonging to 120 classes.


In [129]:
x_model = xception.Xception(weights='imagenet', include_top=False, pooling=POOLING, input_shape=TARGET_SHAPE)

for layer in x_model.layers:
    layer.trainable = False
    
# x = Dense(512, activation='relu')(x_model.output)
# x = BatchNormalization()(x)
# x = Dropout(0.5)(x)
# x = Dense(512, activation='relu')(x)

x = BatchNormalization()(x_model.output)
x = Dropout(0.7)(x)
x = Dense(NUM_CLASSES, activation='softmax')(x)

d_model= Model(x_model.inputs, x, name='xception-extended')
d_model.compile(Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
d_model.fit_generator(train_batches, train_steps, epochs=5, validation_data=valid_batches, validation_steps=valid_steps)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f0a098cdf98>

In [132]:
d_model.optimizer.lr = 0.0001
d_model.fit_generator(train_batches, train_steps, epochs=10, validation_data=valid_batches, validation_steps=valid_steps)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f0a0b44d3c8>

In [133]:
valid_batches.batch_size
valid_batches.samples

batches = MixIterator([train_batches, valid_batches])
batches.next()

(array([[[[ -4.43771601e-01,  -4.29686010e-01,  -6.36167526e-01],
          [ -4.43771601e-01,  -4.29686010e-01,  -6.36167526e-01],
          [ -4.51614738e-01,  -4.37529147e-01,  -6.44010663e-01],
          ..., 
          [ -9.14359808e-01,  -9.15960550e-01,  -1.00000000e+00],
          [ -9.14359808e-01,  -9.15960550e-01,  -1.00000000e+00],
          [ -9.14359808e-01,  -9.15960550e-01,  -1.00000000e+00]],
 
         [[ -4.43771601e-01,  -4.29686010e-01,  -6.36167526e-01],
          [ -4.43771601e-01,  -4.29686010e-01,  -6.36167526e-01],
          [ -4.51614738e-01,  -4.37529147e-01,  -6.44010663e-01],
          ..., 
          [ -9.14359808e-01,  -9.15960550e-01,  -1.00000000e+00],
          [ -9.14359808e-01,  -9.15960550e-01,  -1.00000000e+00],
          [ -9.14359808e-01,  -9.15960550e-01,  -1.00000000e+00]],
 
         [[ -4.43771601e-01,  -4.29686010e-01,  -6.36167526e-01],
          [ -4.43771601e-01,  -4.29686010e-01,  -6.36167526e-01],
          [ -4.43771601e-01,  -4.29686

In [134]:
batches = MixIterator([train_batches, valid_batches])
d_model.optimizer.lr = 0.001
d_model.fit_generator(batches, batches.steps, epochs=10, validation_data=valid_batches, validation_steps=valid_steps)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f0a0b44d5f8>

In [135]:
save_model(DATA_DIR, d_model)

In [136]:
test_dir = join(DATA_DIR, 'test')
test_gen = image.ImageDataGenerator(preprocessing_function=xception.preprocess_input)
test_batches = get_batches(test_dir, gen=test_gen, batch_size=BATCH_SIZE, target_size=TARGET_SIZE, shuffle=False)
test_steps = get_steps(test_batches)

Found 10357 images belonging to 1 classes.


In [137]:
test_preds = d_model.predict_generator(test_batches, steps=test_steps)

In [138]:
from os.path import basename, splitext
test_filenames = [basename(f) for f in test_batches.filenames]
test_ids = [splitext(f)[0] for f in test_filenames]

test_preds[:5]

array([[  7.32356568e-07,   5.60396984e-07,   8.66024766e-06,
          4.47591759e-07,   3.25035944e-06,   1.68472752e-06,
          1.37572924e-05,   5.55590486e-06,   6.36619291e-07,
          5.14192561e-06,   4.52097811e-06,   2.97192787e-06,
          3.11454755e-06,   1.43063903e-06,   1.23223344e-06,
          7.90973331e-07,   1.73490264e-06,   4.96114581e-07,
          1.33603794e-06,   7.65998152e-07,   5.75782178e-06,
          7.48831008e-06,   1.11969734e-06,   5.13862403e-07,
          5.81237964e-06,   3.43993293e-06,   3.19207015e-06,
          1.56473436e-06,   2.73481237e-06,   1.83006819e-06,
          1.65191682e-06,   3.59594196e-05,   2.57856118e-06,
          3.06704635e-07,   4.31889305e-07,   6.80549070e-04,
          1.48308641e-06,   6.37404185e-07,   1.23921825e-06,
          4.36802566e-06,   1.09482721e-06,   1.40691918e-06,
          2.12319583e-06,   8.52903941e-06,   6.33385469e-07,
          7.15151782e-06,   2.49848381e-06,   2.33636638e-06,
        

In [139]:
do_clip(test_preds[:5], 0.93)


array([[ 0.00777778,  0.00777778,  0.00777778,  0.00777778,  0.00777778,
         0.00777778,  0.00777778,  0.00777778,  0.00777778,  0.00777778,
         0.00777778,  0.00777778,  0.00777778,  0.00777778,  0.00777778,
         0.00777778,  0.00777778,  0.00777778,  0.00777778,  0.00777778,
         0.00777778,  0.00777778,  0.00777778,  0.00777778,  0.00777778,
         0.00777778,  0.00777778,  0.00777778,  0.00777778,  0.00777778,
         0.00777778,  0.00777778,  0.00777778,  0.00777778,  0.00777778,
         0.00777778,  0.00777778,  0.00777778,  0.00777778,  0.00777778,
         0.00777778,  0.00777778,  0.00777778,  0.00777778,  0.00777778,
         0.00777778,  0.00777778,  0.00777778,  0.00777778,  0.00777778,
         0.00777778,  0.00777778,  0.00777778,  0.00777778,  0.00777778,
         0.00777778,  0.00777778,  0.00777778,  0.00777778,  0.00777778,
         0.00777778,  0.00777778,  0.00777778,  0.00777778,  0.00777778,
         0.00777778,  0.00777778,  0.00777778,  0.0

In [140]:
raw_labels = pd.read_csv(join(DATA_DIR, 'labels.csv'))
sorted_breeds = list(set(raw_labels.breed))
sorted_breeds.sort()

sorted_breeds

['affenpinscher',
 'afghan_hound',
 'african_hunting_dog',
 'airedale',
 'american_staffordshire_terrier',
 'appenzeller',
 'australian_terrier',
 'basenji',
 'basset',
 'beagle',
 'bedlington_terrier',
 'bernese_mountain_dog',
 'black-and-tan_coonhound',
 'blenheim_spaniel',
 'bloodhound',
 'bluetick',
 'border_collie',
 'border_terrier',
 'borzoi',
 'boston_bull',
 'bouvier_des_flandres',
 'boxer',
 'brabancon_griffon',
 'briard',
 'brittany_spaniel',
 'bull_mastiff',
 'cairn',
 'cardigan',
 'chesapeake_bay_retriever',
 'chihuahua',
 'chow',
 'clumber',
 'cocker_spaniel',
 'collie',
 'curly-coated_retriever',
 'dandie_dinmont',
 'dhole',
 'dingo',
 'doberman',
 'english_foxhound',
 'english_setter',
 'english_springer',
 'entlebucher',
 'eskimo_dog',
 'flat-coated_retriever',
 'french_bulldog',
 'german_shepherd',
 'german_short-haired_pointer',
 'giant_schnauzer',
 'golden_retriever',
 'gordon_setter',
 'great_dane',
 'great_pyrenees',
 'greater_swiss_mountain_dog',
 'groenendael',


In [141]:
result = pd.DataFrame(do_clip(test_preds, 0.93), columns=sorted_breeds)
result.loc[:, 'id'] = pd.Series(test_ids, index=result.index)
result.to_csv(join(DATA_DIR, 'results', 'submission3.csv'), index=False)