## Dog Breed Classifier 
## 전이학습, (Transfer Learning)

In [1]:
!conda list scikit-learn

# packages in environment at C:\ProgramData\Anaconda3\envs\cvtf2:
#
# Name                    Version                   Build  Channel
scikit-learn              0.23.2           py37h47e9c7a_0  


In [1]:
%matplotlib inline

In [2]:
import numpy as np
import pandas as pd
from numpy.random import rand

pd.options.display.max_colwidth = 800

from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [4]:
from tensorflow.keras import regularizers
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Activation, Dense
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from tensorflow.keras.applications.inception_v3 import InceptionV3

## Load Dataset

In [3]:
path = '../dataset/'
train_folder =path+ 'dog-breed-identification/train/'
test_folder = path+ 'dog-breed-identification/test/'

In [5]:
data_labels = pd.read_csv(path+'dog-breed-identification/labels.csv')

In [6]:
data_labels

Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo
2,001cdf01b096e06d78e9e5112d419397,pekinese
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever
...,...,...
10217,ffd25009d635cfd16e793503ac5edef0,borzoi
10218,ffd3f636f7f379c51ba3648a9ff8254f,dandie_dinmont
10219,ffe2ca6c940cddfee68fa3cc6c63213f,airedale
10220,ffe5f6d8e2bff356e9482a80a6e29aac,miniature_pinscher


In [7]:
data_labels.head()

Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo
2,001cdf01b096e06d78e9e5112d419397,pekinese
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever


In [8]:
target_labels = data_labels['breed']
len(set(target_labels))

120

## Prepare Labels

In [10]:
labels_ohe_names = pd.get_dummies(target_labels, sparse=True)
labels_ohe = np.asarray(labels_ohe_names)

In [11]:
print(labels_ohe.shape)
print(labels_ohe[:2])

(10222, 120)
[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0]]


In [13]:
data_labels['image_path'] = data_labels \
    .apply(lambda row: (train_folder + row['id'] + '.jpg'), axis=1)

In [15]:
data_labels.head()

Unnamed: 0,id,breed,image_path
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull,dog-breed-identification/train/000bec180eb18c7604dcecc8fe0dba07.jpg
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo,dog-breed-identification/train/001513dfcb2ffafc82cccf4d8bbaba97.jpg
2,001cdf01b096e06d78e9e5112d419397,pekinese,dog-breed-identification/train/001cdf01b096e06d78e9e5112d419397.jpg
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick,dog-breed-identification/train/00214f311d5d2247d5dfe4fe24b2303d.jpg
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever,dog-breed-identification/train/0021f9ceb3235effd7fcde7f7538ed62.jpg


## Prepare Train-Test Datasets

In [16]:
train_data = np.array([img_to_array(load_img(img, target_size=(299, 299))) \
                       for img in data_labels['image_path'].values.tolist()]).astype('float32')

In [17]:
train_data.shape

(10222, 299, 299, 3)

In [18]:
x_train, x_test, y_train, y_test = train_test_split(train_data, target_labels, 
                                                   test_size=0.3,
                                                   stratify=np.array(target_labels),
                                                   random_state=55)

In [19]:
x_train.shape, x_test.shape

((7155, 299, 299, 3), (3067, 299, 299, 3))

In [20]:
# Prepare Validation Datasets
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train,
                                                 test_size=0.15,
                                                 stratify=np.array(y_train),
                                                 random_state=55)

In [21]:
x_train.shape, x_val.shape

((6081, 299, 299, 3), (1074, 299, 299, 3))

In [22]:
# Prepare target variables for train, test and validation datasets
y_train_ohe = pd.get_dummies(y_train.reset_index(drop=True))
y_val_ohe = pd.get_dummies(y_val.reset_index(drop=True))
y_test_ohe = pd.get_dummies(y_test.reset_index(drop=True))

In [23]:
y_train_ohe.shape, y_val_ohe.shape, y_test_ohe.shape

((6081, 120), (1074, 120), (3067, 120))

## Data (image) Augmentation

In [24]:
BATCH_SIZE = 32

In [25]:
# Create train generator.
train_datagen = ImageDataGenerator(rescale=1./255,
                                  rotation_range=30,
                                  width_shift_range=0.2,
                                  height_shift_range=0.2,
                                  horizontal_flip='true')
train_generator = train_datagen.flow(x_train, y_train_ohe, shuffle=False, batch_size=BATCH_SIZE, seed=1)

In [27]:
# Create validation generator
val_datagen = ImageDataGenerator(rescale=1./255)
val_generator = val_datagen.flow(x_val, y_val_ohe, shuffle=False, batch_size=BATCH_SIZE, seed=1)

## Prepare Deep Learning Classifier
- Load InceptionV3
- Add additional custom layers on top of InceptionV3 to prepare custom classifier

In [28]:
base_inception = InceptionV3(weights='imagenet', include_top=False, input_shape=(299, 299, 3))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
base_inception.summary()

In [31]:
y_train_ohe.shape[1]

120

In [32]:
# Add a global spatial averasge pooling layer
out = base_inception.output
out = GlobalAveragePooling2D()(out)
out = Dense(512, activation='relu')(out)
out = Dense(512, activation='relu')(out)
total_classes = y_train_ohe.shape[1]
predictions = Dense(total_classes, activation='softmax')(out)

In [33]:
model = Model(inputs=base_inception.input, outputs=predictions)

In [None]:
model.summary()

In [35]:
# only if we want to freeze layers
for layer in base_inception.layers:
    layer.trainable = False

In [36]:
# model compile
model.compile(Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.summary()

## Model Training