# Dog Race Classification Model

## Package Imports

In [20]:
from os import listdir

import numpy as np

import pandas as pd

from PIL import Image

import tensorflow as tf

## Image Compression

In [21]:
# for dataset in ["train", "test"]:
#     original_directory = "./data/dogs/original/" + dataset + "/"
#     compressed_directory = "./data/dogs/compressed/" + dataset + "/"

#     original_files = listdir(original_directory)

#     for file in original_files:
#         image = Image.open(original_directory + file)

#         ratio = image.size[0] / image.size[1]
#         size = (2 * round(32 * ratio), 64) if ratio > 1 else (64, 2 * round(32 / ratio))
#         image = image.resize(size)

#         left = (size[0] - 64) / 2
#         top = (size[1] - 64) / 2
#         right = (size[0] + 64) / 2
#         bottom = (size[1] + 64) / 2
#         image = image.crop((left, top, right, bottom))

#         image.save(compressed_directory + file)

## Label Loading

In [22]:
labels = pd.read_csv("./data/dogs/labels.csv")
labels.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10222 entries, 0 to 10221
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   id      10222 non-null  object
 1   breed   10222 non-null  object
dtypes: object(2)
memory usage: 159.8+ KB


In [23]:
labels.sample(n=10)

Unnamed: 0,id,breed
6876,aca5fb5688fbbd2949a369534af84c6b,doberman
2971,49505d68a14b91047e77fdb8361bc5d7,african_hunting_dog
5936,95ec164b9719385742168962be4eda40,pembroke
7666,bffc7036e3c90a13e85e540837dfa9df,irish_terrier
9516,ee88ac6bfed3abb8713bbc988b87eb80,boston_bull
4838,7976034449c9a5d65d00d83ef443ebd0,lakeland_terrier
1850,2d96d7258d31f2e7430c91ce4cee2cb6,bull_mastiff
3356,535178fe793bcbd86d5c24ce8209810f,gordon_setter
9378,eb4ddd17cbdda67c5bab81f6407fc1ba,english_springer
3441,5591ad33cb935413bc58d4e37449023c,curly-coated_retriever


In [24]:
unique_labels = sorted(labels["breed"].unique())
unique_labels

['affenpinscher',
 'afghan_hound',
 'african_hunting_dog',
 'airedale',
 'american_staffordshire_terrier',
 'appenzeller',
 'australian_terrier',
 'basenji',
 'basset',
 'beagle',
 'bedlington_terrier',
 'bernese_mountain_dog',
 'black-and-tan_coonhound',
 'blenheim_spaniel',
 'bloodhound',
 'bluetick',
 'border_collie',
 'border_terrier',
 'borzoi',
 'boston_bull',
 'bouvier_des_flandres',
 'boxer',
 'brabancon_griffon',
 'briard',
 'brittany_spaniel',
 'bull_mastiff',
 'cairn',
 'cardigan',
 'chesapeake_bay_retriever',
 'chihuahua',
 'chow',
 'clumber',
 'cocker_spaniel',
 'collie',
 'curly-coated_retriever',
 'dandie_dinmont',
 'dhole',
 'dingo',
 'doberman',
 'english_foxhound',
 'english_setter',
 'english_springer',
 'entlebucher',
 'eskimo_dog',
 'flat-coated_retriever',
 'french_bulldog',
 'german_shepherd',
 'german_short-haired_pointer',
 'giant_schnauzer',
 'golden_retriever',
 'gordon_setter',
 'great_dane',
 'great_pyrenees',
 'greater_swiss_mountain_dog',
 'groenendael',


## Data Preparation

In [27]:
train_directory = "./data/dogs/compressed/train/"
train_files = listdir(train_directory)

X = []
y = []

for file in train_files:
    image_id = file.split(".")[0]

    image_array = np.array(Image.open(train_directory + file))
    X.append(image_array)

    image_label = labels[labels["id"] == image_id]["breed"].to_list()[0]
    image_label_id = unique_labels.index(image_label)
    y.append(image_label_id)

n_train = round(0.8 * len(train_files))

X_train, X_valid = np.array(X[:n_train]), np.array(X[n_train:])
y_train, y_valid = np.array(y[:n_train]), np.array(y[n_train:])

## Data Exploration

In [28]:
print("--------------------------------------------------")
print(f"Shape of X_train: {X_train.shape}")
print(f"Shape of X_valid: {X_valid.shape}")
print(f"Shape of y_train: {y_train.shape}")
print(f"Shape of y_valid: {y_valid.shape}")
print("--------------------------------------------------")

--------------------------------------------------
Shape of X_train: (8178, 64, 64, 3)
Shape of X_valid: (2044, 64, 64, 3)
Shape of y_train: (8178,)
Shape of y_valid: (2044,)
--------------------------------------------------


## Model Definition

In [29]:
model = tf.keras.models.Sequential(
    [
        tf.keras.layers.InputLayer(input_shape=(64, 64, 3)),
        tf.keras.layers.Rescaling(1 / 255),
        tf.keras.layers.Conv2D(16, 3, padding="same", activation="relu"),
        tf.keras.layers.MaxPooling2D(),
        tf.keras.layers.Conv2D(32, 3, padding="same", activation="relu"),
        tf.keras.layers.MaxPooling2D(),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation="relu"),
        tf.keras.layers.MaxPooling2D(),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation="relu"),
        tf.keras.layers.Dense(len(unique_labels)),
    ]
)

## Model Compiling

In [30]:
model.compile(
    optimizer="adam",
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=["accuracy"],
)

model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling_4 (Rescaling)     (None, 64, 64, 3)         0         
                                                                 
 conv2d_10 (Conv2D)          (None, 64, 64, 16)        448       
                                                                 
 max_pooling2d_10 (MaxPooli  (None, 32, 32, 16)        0         
 ng2D)                                                           
                                                                 
 conv2d_11 (Conv2D)          (None, 32, 32, 32)        4640      
                                                                 
 max_pooling2d_11 (MaxPooli  (None, 16, 16, 32)        0         
 ng2D)                                                           
                                                                 
 conv2d_12 (Conv2D)          (None, 16, 16, 64)       

In [31]:
model.fit(X_train, validation_data=X_valid, epochs=10)

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [None]:
# model.evaluate(X_test, y_test)