<a href="https://colab.research.google.com/github/bukits/dog-breed-identification/blob/main/dog_breed_identification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!git clone https://github.com/bukits/dog-breed-identification.git

In [4]:
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from shutil import copyfile, rmtree
import numpy as np

In [5]:
#only slpitting script (do not run)
def split_data(base_folder, train_dir, valid_dir, test_dir, valid_split = 0.2, test_split = 0.1):
  if not os.path.exists(base_folder):
        raise OSError('base_folder does not exist')

  if not os.path.exists(train_dir):
      os.makedirs(train_dir)
  if not os.path.exists(valid_dir):
      os.makedirs(valid_dir)
  if not os.path.exists(test_dir):
      os.makedirs(test_dir)

  for label in sorted(os.listdir(base_folder)):
    original_path = os.path.join(base_folder, label)
    train_path = os.path.join(train_dir, label)
    valid_path = os.path.join(valid_dir, label)
    test_path = os.path.join(test_dir, label)

    if not os.path.exists(train_path):
            os.makedirs(train_path)
    if not os.path.exists(valid_path):
            os.makedirs(valid_path)
    if not os.path.exists(test_path):
            os.makedirs(test_path)

    images = sorted(os.listdir(os.path.join(base_folder, label)))
    v_point = int(len(images)*(1-valid_split-test_split))
    t_point = int(len(images)*(1-test_split))

    for i in range(0, v_point):
      img = images[i]
      copyfile(os.path.join(original_path, img), os.path.join(train_path, img))

    for i in range(v_point, t_point):
      img = images[i]
      copyfile(os.path.join(original_path, img), os.path.join(valid_path, img))

    for i in range(t_point, len(images)):
      img = images[i]
      copyfile(os.path.join(original_path, img), os.path.join(test_path, img))

In [None]:
#split the data (do not run)
data_dir = '/content/drive/MyDrive/Images'
train_dir = '/content/data/train'
valid_dir = '/content/data/valid'
test_dir = '/content/data/test'

split_data(data_dir, train_dir, valid_dir, test_dir)

In [6]:
train_dir = '/content/dog-breed-identification/data/train'
valid_dir = '/content/dog-breed-identification/data/valid'
test_dir = '/content/dog-breed-identification/data/test'

train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

valid_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

img_height = 64
img_width  = 64

train_generator = train_datagen.flow_from_directory(train_dir, target_size=(img_height,img_width),
                                                    batch_size=20, class_mode='categorical')
valid_generator = valid_datagen.flow_from_directory(valid_dir, target_size=(img_height, img_width),
                                                   batch_size=20, class_mode='categorical')
test_generator = test_datagen.flow_from_directory(test_dir, target_size=(img_height, img_width), 
                                                  batch_size=1, class_mode=None, shuffle=False)

labels = train_generator.class_indices
nb_classes = len(labels)

Found 14369 images belonging to 120 classes.
Found 4117 images belonging to 120 classes.
Found 2112 images belonging to 120 classes.


In [12]:
import tensorflow.keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(img_height, img_width, 3,))) 
model.add(MaxPooling2D(pool_size=(2, 2), strides=2))
model.add(Dropout(0.75))

model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=2))
model.add(Dropout(0.75))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(nb_classes, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()

In [16]:
es = EarlyStopping(patience=10, verbose=1, restore_best_weights=True)

model.fit(train_generator, validation_data=valid_generator, epochs=3, callbacks=[es],
          steps_per_epoch=100, validation_steps=1000/20)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f1ba44f3d50>

In [20]:
test_loss = model.evaluate(valid_generator)

