# Pets Breeds Classification

## Import TensorFlow and other libraries

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import PIL
from PIL import Image
from pathlib import Path

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

import tensorflow_datasets as tfds

## Download the dataset

Datased used for this code title is 'The Oxford-IIIT Pet Dataset' containing photos of 37 different breeds of cats and dogs with roughly 200 images for each class.

In [None]:
import pathlib

dataset_url = "https://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz"
data_dir = tf.keras.utils.get_file(origin=dataset_url, untar=True)
data_dir = pathlib.Path(data_dir)

The downloaded dataset should contain 7,390 images:

In [None]:
image_count = len(list(data_dir.glob('*.jpg')))
print(str(image_count) + " images successfully downloaded.")

Example of an image in the dataset:

In [None]:
images = list(data_dir.glob('*.jpg'))
PIL.Image.open(str(images[500]))

## Load dataset to a TensorFlow dataset object

So far the dataset is just a set of photos in a folder. In order to train a model, a *tf.data.Dataset* file have to be created. 

First adapt directory hierarchy to fit keras  *image_dataset_from_directory* requirements:

In [None]:
import os

breed_names = [
    "Abyssinian", "american_bulldog", "american_pit_bull_terrier",
    "basset_hound", "beagle", "Bengal", "Birman", "Bombay", "boxer",
    "British_Shorthair", "chihuahua", "Egyptian_Mau", "english_cocker_spaniel",
    "english_setter", "german_shorthaired", "great_pyrenees", "havanese",
    "japanese_chin", "keeshond", "leonberger", "Maine_Coon",
    "miniature_pinscher", "newfoundland", "Persian", "pomeranian", "pug",
    "Ragdoll", "Russian_Blue", "saint_bernard", "samoyed", "scottish_terrier",
    "shiba_inu", "Siamese", "Sphynx", "staffordshire_bull_terrier",
    "wheaten_terrier", "yorkshire_terrier"
]

for breed in breed_names:
    breed_dir = data_dir.joinpath(breed)
    if not breed_dir.is_dir():
        breed_dir.mkdir()
        
for file in data_dir.glob("*"):
    if file.suffix == '.jpg':
        floor_pos = str(file).rfind('_')
        new_file_path = str(file)[:floor_pos] + '\\' \
            + str(file)[floor_pos + 1:]
        file.replace(new_file_path)     
    elif not file.is_dir():
        os.remove(file)    

Define loader parameters.

In [None]:
batch_size = 64
img_height = 224
img_width = 224

Use 80% of the images for training and 20% for validation.

In [None]:
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset="training",
    seed=2021,
    image_size=(img_height, img_width),
    batch_size=batch_size)

In [None]:
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset="validation",
    seed=2021,
    image_size=(img_height, img_width),
    batch_size=batch_size)

## Configure the dataset

In [None]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

## Create the model

In [None]:
model = Sequential([
    layers.experimental.preprocessing.Rescaling(1./255, input_shape=(img_height, img_width, 3)),
    layers.Conv2D(32, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(len(breed_names))
])

## Compile the model

In [None]:
model.compile(optimizer='adam', 
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.summary()

## Train the model

In [None]:
epochs = 15
history = model.fit(train_ds, validation_data=val_ds, epochs=epochs)