<a href="https://colab.research.google.com/github/jjone36/Colab/blob/master/98.%20cnn_breed.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Reference : [Kaggle kernel](https://www.kaggle.com/orangutan/keras-vgg19-starter)

# Dog Breed Classification

In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

## Importing data

In [0]:
from google.colab import files
uploaded = files.upload()

Saving kaggle.json to kaggle.json


In [0]:
ls -lha kaggle.json

-rw-r--r-- 1 root root 63 Jan 24 08:40 kaggle.json


In [0]:
from getpass import getpass
user = ''
key = ''

if '.kaggle' not in os.listdir('/root'):
    !mkdir ~/.kaggle
!touch /root/.kaggle/kaggle.json
!chmod 666 /root/.kaggle/kaggle.json
with open('/root/.kaggle/kaggle.json', 'w') as f:
    f.write('{"username":"%s","key":"%s"}' % (user, key))
!chmod 600 /root/.kaggle/kaggle.json

In [0]:
!kaggle competitions download -c dog-breed-identification

Downloading labels.csv.zip to /content
  0% 0.00/214k [00:00<?, ?B/s]
100% 214k/214k [00:00<00:00, 86.3MB/s]
Downloading sample_submission.csv.zip to /content
  0% 0.00/281k [00:00<?, ?B/s]
100% 281k/281k [00:00<00:00, 88.0MB/s]
Downloading test.zip to /content
 99% 343M/346M [00:02<00:00, 175MB/s]
100% 346M/346M [00:02<00:00, 144MB/s]
Downloading train.zip to /content
 98% 337M/345M [00:02<00:00, 134MB/s]
100% 345M/345M [00:02<00:00, 143MB/s]


In [0]:
pwd

'/content'

In [0]:
im_height = 128
im_weight = 128
im_size = (im_height, im_weight)

In [0]:
# Importing labels 
labels = pd.read_csv('labels.csv.zip', compression='zip', header=0, sep=',', quotechar='"')

In [0]:
# Unzipping trainset
from zipfile import ZipFile
zf = ZipFile('train.zip', 'r')
zf.extractall('')
zf.close()

# Preprocessing

In [0]:
labels.head()

Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo
2,001cdf01b096e06d78e9e5112d419397,pekinese
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever


In [0]:
# Encoding labels 
label_oh = pd.get_dummies(labels.breed, prefix_sep='_')

In [0]:
label_oh.head()

Unnamed: 0,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,beagle,...,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [0]:
label_oh = np.asarray(label_oh)
label_oh

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)

In [0]:
# Importing trainset 
from tqdm import tqdm 
import cv2

In [0]:
X_train = []
y_train = []
i = 0

for id, breed in tqdm(labels.values):
  img = cv2.imread('train/{}.jpg'.format(id))
  X_train.append(cv2.resize(img, im_size))
  y_train.append(label_oh[i])
  i += 1         

100%|██████████| 10222/10222 [00:27<00:00, 368.63it/s]


In [0]:
# ??? 
y_train_raw = np.array(y_train, np.uint8)
y_train_raw.max()

1

In [0]:
# Transforming the raw RGB numbers from img 
X_train_raw = np.array(X_train, np.float32) / 255.
X_train_raw.shape

(10222, 128, 128, 3)

10222 numbers of training set, 128px * 128px of image with 3 channels of RGB

In [0]:
y_train_raw.shape

(10222, 120)

In [0]:
n_class = y_train_raw.shape[1]

In [0]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X_train_raw, y_train_raw, test_size = .3, random_state = 1)

## Modeling

In [0]:
import keras
from keras.applications.vgg19 import VGG19

Using TensorFlow backend.


In [0]:
# VGG19 Application
model = VGG19(weights = 'imagenet', include_top = False, input_shape = (im_height, im_weight, 3))

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5


In [0]:
model.output

<tf.Tensor 'block5_pool/MaxPool:0' shape=(?, 4, 4, 512) dtype=float32>

The outcome is the result of maxpooling at the 5th ConvNet

In [0]:
from keras.layers import Flatten, Dense
X = model.output

# Flattening
X = Flatten()(X)
pred = Dense(n_class, activation = 'softmax')(X)

This is like just extracting the outcome from the VGG19 and add new layers for prediction

In [0]:
from keras.models import Model
# Initialization 
myModel = Model(model.input, outputs = pred)

In [0]:
# Don't train application model's layers
for layer in model.layers:
  layer.trainable = False

In [0]:
# Compile
myModel.compile(optimizer= 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

In [0]:
# Early Stop
from keras.callbacks import EarlyStopping
stopper = EarlyStopping(patience = 3)

In [0]:
# Learning rate reduction
from keras.callbacks import ReduceLROnPlateau
reducer = ReduceLROnPlateau(monitor='val_acc',
                                          patience=2,
                                          verbose=1,
                                          factor=0.5,
                                          min_lr=0.00001)

In [0]:
classbacks = [stopper, reducer]

In [0]:
myModel.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 128, 128, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 128, 128, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 128, 128, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 64, 64, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 64, 64, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 64, 64, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 32, 32, 128)       0         
__________

In [0]:
# Fitting
myModel.fit(X_train, y_train, epochs = 5, batch_size = 30, validation_data = (X_val, y_val), verbose = 1)

Train on 7155 samples, validate on 3067 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7ff5904390f0>

## Prediction

In [0]:
myModel.evaluate(X_val, y_val)



[4.158553734795838, 0.1649820671763294]