<a href="https://colab.research.google.com/github/bukits/dog-breed-identification/blob/main/dog_breed_identification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!git clone https://github.com/bukits/dog-breed-identification.git

Cloning into 'dog-breed-identification'...
remote: Enumerating objects: 20884, done.[K
remote: Counting objects: 100% (27/27), done.[K
remote: Compressing objects: 100% (19/19), done.[K
remote: Total 20884 (delta 9), reused 24 (delta 6), pack-reused 20857[K
Receiving objects: 100% (20884/20884), 733.82 MiB | 33.52 MiB/s, done.
Resolving deltas: 100% (9/9), done.
Checking out files: 100% (20600/20600), done.


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [2]:
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from shutil import copyfile
import numpy as np

In [None]:
#only slpitting script (do not run)
def split_data(base_folder, train_dir, valid_dir, test_dir, valid_split = 0.2, test_split = 0.1):
  if not os.path.exists(base_folder):
        raise OSError('base_folder does not exist')

  if not os.path.exists(train_dir):
      os.makedirs(train_dir)
  if not os.path.exists(valid_dir):
      os.makedirs(valid_dir)
  if not os.path.exists(test_dir):
      os.makedirs(test_dir)

  for label in sorted(os.listdir(base_folder)):
    original_path = os.path.join(base_folder, label)
    train_path = os.path.join(train_dir, label)
    valid_path = os.path.join(valid_dir, label)
    test_path = os.path.join(test_dir, label)

    if not os.path.exists(train_path):
            os.makedirs(train_path)
    if not os.path.exists(valid_path):
            os.makedirs(valid_path)
    if not os.path.exists(test_path):
            os.makedirs(test_path)

    images = sorted(os.listdir(os.path.join(base_folder, label)))
    v_point = int(len(images)*(1-valid_split-test_split))
    t_point = int(len(images)*(1-test_split))

    for i in range(0, v_point):
      img = images[i]
      copyfile(os.path.join(original_path, img), os.path.join(train_path, img))

    for i in range(v_point, t_point):
      img = images[i]
      copyfile(os.path.join(original_path, img), os.path.join(valid_path, img))

    for i in range(t_point, len(images)):
      img = images[i]
      copyfile(os.path.join(original_path, img), os.path.join(test_path, img))

In [None]:
#split the data (do not run)
data_dir = '/content/drive/MyDrive/Images'
train_dir = '/content/data/train'
valid_dir = '/content/data/valid'
test_dir = '/content/data/test'

split_data(data_dir, train_dir, valid_dir, test_dir)

In [42]:
train_dir = '/content/dog-breed-identification/data/train'
valid_dir = '/content/dog-breed-identification/data/valid'
test_dir = '/content/dog-breed-identification/data/test'

train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.1,
        zoom_range=0.1,
        horizontal_flip=True)

valid_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

img_height = 64
img_width  = 64

train_generator = train_datagen.flow_from_directory(train_dir, target_size=(img_height,img_width),
                                                    batch_size=128, class_mode='categorical', shuffle=True)
valid_generator = valid_datagen.flow_from_directory(valid_dir, target_size=(img_height, img_width),
                                                   batch_size=128, class_mode='categorical', shuffle=True)
test_generator = test_datagen.flow_from_directory(test_dir, target_size=(img_height, img_width), 
                                                  batch_size=1, class_mode=None, shuffle=False)

labels = train_generator.class_indices
nb_classes = len(labels)

num_train = train_generator.samples
num_valid = valid_generator.samples

Found 14369 images belonging to 120 classes.
Found 4117 images belonging to 120 classes.
Found 2112 images belonging to 120 classes.


In [43]:
import tensorflow.keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

In [44]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(5, 5), activation='relu', kernel_initializer='he_normal', input_shape=(img_height, img_width, 3,))) 
model.add(MaxPooling2D(pool_size=(2, 2), strides=2))

model.add(Conv2D(64, kernel_size=(5, 5), activation='relu', kernel_initializer='he_normal'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=2))

model.add(Flatten())
model.add(Dropout(0.5))

model.add(Dense(128, activation='relu', kernel_initializer='he_normal'))
model.add(Dropout(0.5))
model.add(Dense(nb_classes, activation='softmax'))

In [45]:
model.summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_14 (Conv2D)          (None, 60, 60, 32)        2432      
                                                                 
 max_pooling2d_14 (MaxPoolin  (None, 30, 30, 32)       0         
 g2D)                                                            
                                                                 
 conv2d_15 (Conv2D)          (None, 26, 26, 64)        51264     
                                                                 
 max_pooling2d_15 (MaxPoolin  (None, 13, 13, 64)       0         
 g2D)                                                            
                                                                 
 flatten_7 (Flatten)         (None, 10816)             0         
                                                                 
 dropout_14 (Dropout)        (None, 10816)            

In [46]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
es = EarlyStopping(patience=5, verbose=1, restore_best_weights=True)
rlrop = ReduceLROnPlateau(factor=0.5, patience=3, verbose=1)

model.fit(train_generator, validation_data=valid_generator, epochs=100, callbacks=[es, rlrop],
          steps_per_epoch=int(num_train/128)+1, validation_steps=int(num_valid/128)+1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100

In [19]:
test_loss = model.evaluate(test_generator)

