In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import load_img
from keras.applications import inception_v3
from keras.applications.inception_v3 import InceptionV3
from keras.layers import *
from keras.models import Sequential
from keras.metrics import categorical_crossentropy
from sklearn.model_selection import train_test_split
from os import makedirs
from os.path import expanduser, exists, join

In [2]:
from os.path import expanduser, exists, join
!ls ../input/keras-pretrained-models/   # just to be sure the data is here

cache_dir = expanduser(join('~', '.keras'))
if not exists(cache_dir):
    makedirs(cache_dir)
models_dir = join(cache_dir, 'models')
if not exists(models_dir):
    makedirs(models_dir)
    
!cp ../input/keras-pretrained-models/*notop* ~/.keras/models/
!cp ../input/keras-pretrained-models/imagenet_class_index.json ~/.keras/models/
!cp ../input/keras-pretrained-models/resnet50* ~/.keras/models/

Kuszma.JPG
imagenet_class_index.json
inception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5
inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5
inception_v3_weights_tf_dim_ordering_tf_kernels.h5
inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
resnet50_weights_tf_dim_ordering_tf_kernels.h5
resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
xception_weights_tf_dim_ordering_tf_kernels.h5
xception_weights_tf_dim_ordering_tf_kernels_notop.h5


In [3]:
train_folder = '../input/dog-breed-identification/train/'
test_folder = '../input/dog-breed-identification/test/'

In [4]:
labels = pd.read_csv('../input/dog-breed-identification/labels.csv')
labels.head()

Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo
2,001cdf01b096e06d78e9e5112d419397,pekinese
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever


In [5]:
num_classes = len(labels.breed.unique())
num_classes

120

In [6]:
top_breeds = sorted(list(labels['breed'].value_counts().head(50).index))
labels = labels[labels['breed'].isin(top_breeds)]
top_breeds

['afghan_hound',
 'african_hunting_dog',
 'airedale',
 'australian_terrier',
 'basenji',
 'beagle',
 'bedlington_terrier',
 'bernese_mountain_dog',
 'blenheim_spaniel',
 'bluetick',
 'border_terrier',
 'boston_bull',
 'bouvier_des_flandres',
 'cairn',
 'chow',
 'collie',
 'dandie_dinmont',
 'english_foxhound',
 'entlebucher',
 'great_pyrenees',
 'ibizan_hound',
 'irish_setter',
 'irish_wolfhound',
 'italian_greyhound',
 'japanese_spaniel',
 'kelpie',
 'lakeland_terrier',
 'leonberg',
 'lhasa',
 'maltese_dog',
 'miniature_pinscher',
 'newfoundland',
 'norwegian_elkhound',
 'old_english_sheepdog',
 'papillon',
 'pembroke',
 'pomeranian',
 'pug',
 'rhodesian_ridgeback',
 'saluki',
 'samoyed',
 'schipperke',
 'scottish_deerhound',
 'sealyham_terrier',
 'shih-tzu',
 'siberian_husky',
 'silky_terrier',
 'tibetan_terrier',
 'weimaraner',
 'whippet']

In [7]:
labels.shape

(4890, 2)

In [8]:
target_labels = labels['breed']


In [9]:
one_hot = pd.get_dummies(target_labels, sparse = True)
one_hot_labels = np.asarray(one_hot)
one_hot_labels.shape

(4890, 50)

In [10]:
selected_breed_list = list(labels.groupby('breed').count().sort_values(by='id', ascending=False).head(num_classes).index)
labels = labels[labels['breed'].isin(selected_breed_list)]
labels['filename'] = labels.apply(lambda x: ( train_folder+ x['id'] + '.jpg'), axis=1)
labels.head()

Unnamed: 0,id,breed,filename
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull,../input/dog-breed-identification/train/000bec...
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick,../input/dog-breed-identification/train/00214f...
5,002211c81b498ef88e1b40b9abf84e1d,bedlington_terrier,../input/dog-breed-identification/train/002211...
6,00290d3e1fdd27226ba27a8ce248ce85,bedlington_terrier,../input/dog-breed-identification/train/00290d...
8,003df8b8a8b05244b1d920bb6cf451f9,basenji,../input/dog-breed-identification/train/003df8...


In [11]:
train_data = np.array([img_to_array(load_img(img, target_size=(299, 299))) for img in labels['filename'].values.tolist()]).astype('float32')

In [12]:
x_train, x_validation, y_train, y_validation = train_test_split(train_data,
                                                                target_labels,
                                                                test_size=0.2,
                                                                stratify=np.array(target_labels),
                                                                random_state=100)

In [13]:
print ('x_train shape = ', x_train.shape)
print ('x_validation shape = ', x_validation.shape)

x_train shape =  (3912, 299, 299, 3)
x_validation shape =  (978, 299, 299, 3)


In [14]:
y_train = pd.get_dummies(y_train.reset_index(drop=True))
y_validation = pd.get_dummies(y_validation.reset_index(drop=True))

In [15]:
y_train.head()

Unnamed: 0,afghan_hound,african_hunting_dog,airedale,australian_terrier,basenji,beagle,bedlington_terrier,bernese_mountain_dog,blenheim_spaniel,bluetick,...,samoyed,schipperke,scottish_deerhound,sealyham_terrier,shih-tzu,siberian_husky,silky_terrier,tibetan_terrier,weimaraner,whippet
0,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [16]:
train_datagen = ImageDataGenerator(rescale=1./255, 
                                   rotation_range=30, 
                                   zoom_range = 0.3, 
                                   width_shift_range=0.2,
                                   height_shift_range=0.2, 
                                   horizontal_flip = 'true')
train_generator = train_datagen.flow(x_train, y_train, shuffle=False, batch_size=10, seed=10)

In [17]:
val_datagen = ImageDataGenerator(rescale = 1./255)
val_generator = train_datagen.flow(x_validation, y_validation, shuffle=False, batch_size=10, seed=10)

In [18]:
model = InceptionV3(weights = 'imagenet', include_top = False, input_shape=(299, 299, 3))

In [19]:
model.output

<tf.Tensor 'mixed10/Identity:0' shape=(None, 8, 8, 2048) dtype=float32>

In [20]:
from keras.layers import Flatten

In [21]:
model = Sequential()
model.add(InceptionV3(weights = 'imagenet', include_top = False, input_shape=(299, 299, 3)))
for i in range(len(model.layers)):
    model.layers[i].trainable = False 
model.add(GlobalAveragePooling2D())
model.add(Dense(1024, activation = 'relu'))
model.add(Dense(512,activation = 'relu'))
model.add(Dense(50, activation = 'softmax'))


In [22]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inception_v3 (Model)         (None, 8, 8, 2048)        21802784  
_________________________________________________________________
global_average_pooling2d (Gl (None, 2048)              0         
_________________________________________________________________
dense (Dense)                (None, 1024)              2098176   
_________________________________________________________________
dense_1 (Dense)              (None, 512)               524800    
_________________________________________________________________
dense_2 (Dense)              (None, 50)                25650     
Total params: 24,451,410
Trainable params: 2,648,626
Non-trainable params: 21,802,784
_________________________________________________________________


In [23]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [24]:
model.fit_generator(train_generator,
                      steps_per_epoch = 175,
                      validation_data = val_generator,
                      validation_steps = 44,
                      epochs = 10,
                      verbose =1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f781c0eac90>