# Dog Breed Classification Using Tensorflow Keras

## Dataset can be downloaded from here:
#### Kaggle: https://www.kaggle.com/c/dog-breed-identification/data

## Importing All essential libraries

In [1]:
import os 
import numpy as np 
import pandas as pd 
import cv2 
from glob import glob 

In [2]:
import tensorflow as tf 
from tensorflow.keras.layers import * 
from tensorflow.keras.applications import MobileNetV2 
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau 
from tensorflow.keras.optimizers import Adam 

from sklearn.model_selection import train_test_split

In [3]:
path = './'
train_path = os.path.join(path, 'train/*')
test_path = os.path.join(path, 'test/*')
labels_path = os.path.join(path, 'labels.csv')

In [4]:
# Checking the csv file as a Pandas Dataframe
labels_df = pd.read_csv(labels_path)
labels_df.head()

Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo
2,001cdf01b096e06d78e9e5112d419397,pekinese
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever


In [5]:
# Exploring breeds column
breed = labels_df["breed"].unique() # stores list of unique breeds
print(breed)

['boston_bull' 'dingo' 'pekinese' 'bluetick' 'golden_retriever'
 'bedlington_terrier' 'borzoi' 'basenji' 'scottish_deerhound'
 'shetland_sheepdog' 'walker_hound' 'maltese_dog' 'norfolk_terrier'
 'african_hunting_dog' 'wire-haired_fox_terrier' 'redbone'
 'lakeland_terrier' 'boxer' 'doberman' 'otterhound' 'standard_schnauzer'
 'irish_water_spaniel' 'black-and-tan_coonhound' 'cairn' 'affenpinscher'
 'labrador_retriever' 'ibizan_hound' 'english_setter' 'weimaraner'
 'giant_schnauzer' 'groenendael' 'dhole' 'toy_poodle' 'border_terrier'
 'tibetan_terrier' 'norwegian_elkhound' 'shih-tzu' 'irish_terrier'
 'kuvasz' 'german_shepherd' 'greater_swiss_mountain_dog' 'basset'
 'australian_terrier' 'schipperke' 'rhodesian_ridgeback' 'irish_setter'
 'appenzeller' 'bloodhound' 'samoyed' 'miniature_schnauzer'
 'brittany_spaniel' 'kelpie' 'papillon' 'border_collie' 'entlebucher'
 'collie' 'malamute' 'welsh_springer_spaniel' 'chihuahua' 'saluki' 'pug'
 'malinois' 'komondor' 'airedale' 'leonberg' 'mexican_h

## Building the model

In [6]:
def build_model(size, num_classes):
    inputs = Input((size, size, 3)) # we use the RGB color scheme 
    backbone = MobileNetV2(input_tensor = inputs, include_top = False, weights = "imagenet")
    backbone.trainable = True 
    x = backbone.output 
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.2)(x)
    x = Dense(1024, activation = "relu")(x)
    x = Dense(num_classes, activation = "softmax")(x)
    
    model = tf.keras.Model(inputs, x)
    return model 

## Image Preprocessing

In [7]:
def read_image(path, size):
    image = cv2.imread(path, cv2.IMREAD_COLOR)
    image = cv2.resize(image, (size, size))
    image = image / 255.0 
    image = image.astype(np.float32)
    return image 

In [8]:
def parse_data(x, y):
    x = x.decode()

    num_class = 120
    size = 224

    image = read_image(x, size)
    label = [0] * num_class
    label[y] = 1
    label = np.array(label)
    label = label.astype(np.int32)

    return image, label

In [9]:
def tf_parse(x, y):
    x, y = tf.numpy_function(parse_data, [x, y], [tf.float32, tf.int32])
    x.set_shape((224, 224, 3))
    y.set_shape((120))
    return x, y

In [10]:
def tf_dataset(x, y, batch=8):
    dataset = tf.data.Dataset.from_tensor_slices((x, y))
    dataset = dataset.map(tf_parse)
    dataset = dataset.batch(batch)
    dataset = dataset.repeat()
    return dataset

In [11]:
# So, it's a multiclassification problem 
# We use enumerate() over a dictionary that transcribes 
# each breed it's breedid 
labels = []
breed2id = {name: i for i , name in enumerate(breed)}

ids = glob(train_path) # used to fetch addresses of all images 
# inside the train folder 
# Preprocessing the training data 
for image_id in ids:
    image_id = image_id.split('\\')[-1].split('.')[0]
    # print(image_id) <-- all image ids 
    breed_name = list(labels_df[labels_df.id == image_id]['breed'])[0]
    #print(image_id, '\t', breed_name)
    breed_idx = breed2id[breed_name]
    labels.append(breed_idx)

ids = ids[:1000]
labels = labels[:1000]

## Splitting the dataset

In [12]:
train_x, valid_x = train_test_split(ids, test_size = 0.2, random_state = 42)
train_y, valid_y = train_test_split(labels, test_size = 0.2, random_state = 42)

In [13]:
# Parameters 
# Parameters 
size = 224
num_classes = 120
lr = 1e-4
batch = 16
epochs = 5

In [14]:
# Our model 
model = build_model(size, num_classes)
model.compile(loss = "categorical_crossentropy", optimizer = Adam(lr), metrics = ["acc"])



In [15]:
model.summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
Conv1_pad (ZeroPadding2D)       (None, 225, 225, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
Conv1 (Conv2D)                  (None, 112, 112, 32) 864         Conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_Conv1 (BatchNormalization)   (None, 112, 112, 32) 128         Conv1[0][0]                      
_______________________________________________________________________________________

## Create the dataset

In [16]:
train_dataset = tf_dataset(train_x, train_y, batch = batch)
valid_dataset = tf_dataset(valid_x, valid_y, batch = batch)


# Training

In [17]:
callbacks = [
    ModelCheckpoint("model.h5", verbose = 1, save_best_only = True),
    ReduceLROnPlateau(factor = 0.1, patience = 5, min_lr = 1e-6)
]

train_steps = (len(train_x)//batch) + 1
valid_steps = (len(valid_x)//batch) + 1
model.fit(train_dataset,
        steps_per_epoch=train_steps,
        validation_steps=valid_steps,
        validation_data=valid_dataset,
        epochs=epochs,
        callbacks=callbacks)

Epoch 1/5
Epoch 00001: val_loss improved from inf to 4.56922, saving model to model.h5
Epoch 2/5
Epoch 00002: val_loss improved from 4.56922 to 4.06108, saving model to model.h5
Epoch 3/5
Epoch 00003: val_loss improved from 4.06108 to 3.60281, saving model to model.h5
Epoch 4/5
Epoch 00004: val_loss improved from 3.60281 to 3.34102, saving model to model.h5
Epoch 5/5
 1/51 [..............................] - ETA: 0s - loss: 0.3756 - acc: 1.0000

KeyboardInterrupt: 