# Dog Race Classification Model

## Package Imports

In [None]:
from os import listdir

import numpy as np

import pandas as pd

from PIL import Image

import tensorflow as tf

## Image Compression

In [None]:
for dataset in ["train", "test"]:
    original_directory = "./data/dogs/original/" + dataset + "/"
    compressed_directory = "./data/dogs/compressed/" + dataset + "/"

    original_files = listdir(original_directory)

    for file in original_files:
        image = Image.open(original_directory + file)

        ratio = image.size[0] / image.size[1]
        size = (2 * round(32 * ratio), 64) if ratio > 1 else (64, 2 * round(32 / ratio))
        image = image.resize(size)

        left = (size[0] - 64) / 2
        top = (size[1] - 64) / 2
        right = (size[0] + 64) / 2
        bottom = (size[1] + 64) / 2
        image = image.crop((left, top, right, bottom))

        image.save(compressed_directory + file)

## Label Loading

In [None]:
labels = pd.read_csv("./data/dogs/labels.csv")
labels.info()

In [None]:
labels.sample(n=10)

In [None]:
unique_labels = sorted(labels["breed"].unique())
unique_labels

## Training Data Preprocessing

In [None]:
train_directory = "./data/dogs/compressed/train/"
train_files = listdir(train_directory)

X_train = []
y_train = []

for file in train_files:
    image_id = file.split(".")[0]

    image_array = np.array(Image.open(train_directory + file))
    X_train.append(image_array)

    image_label = labels[labels["id"] == image_id]["breed"].to_list()[0]
    image_label_id = unique_labels.index(image_label)
    y_train.append(image_label_id)

X_train = np.array(X_train)
y_train = np.array(y_train)