# Dog Breed Classification Project (120 Classes)

## Importing Dependancies

In [None]:
# File related dependancies
import os
import shutil

# Data Science dependencies
import pandas as pd
import numpy as np
import tensorflow as tf

In [None]:
df = pd.read_csv('/content/drive/MyDrive/dog_classification_data/raw_data/labels.csv')

In [None]:
df.head()

Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo
2,001cdf01b096e06d78e9e5112d419397,pekinese
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever


## Building the file structure

In [None]:
os.makedirs('/content/drive/MyDrive/dog_classification_data/working/data')
os.makedirs('/content/drive/MyDrive/dog_classification_data/working/models')
os.makedirs('/content/drive/MyDrive/dog_classification_data/working/data/dataframes')

os.makedirs('/content/drive/MyDrive/dog_classification_data/working/data/train')
os.makedirs('/content/drive/MyDrive/dog_classification_data/working/data/val')
os.makedirs('/content/drive/MyDrive/dog_classification_data/working/data/test')

In [None]:
RAW_DATA_PATH = '/content/drive/MyDrive/dog_classification_data/raw_data/train'
FINAL_TEST_DIR = '/content/drive/MyDrive/dog_classification_data/raw_data/final_test'
DATA_PATH = '/content/drive/MyDrive/dog_classification_data/working/data'

TRAIN_DIR = os.path.join(DATA_PATH, 'train')
VAL_DIR = os.path.join(DATA_PATH, 'val')
TEST_DIR = os.path.join(DATA_PATH, 'test')
DATA_FRAME_DIR = '/content/drive/MyDrive/dog_classification_data/working/data/dataframes'

MODEL_DIR = '/content/drive/MyDrive/dog_classification_data/working/models'
XCEPTION_DIR = os.path.join(MODEL_DIR, 'xception')
RUNTIME_MODEL_DIR = os.path.join(MODEL_DIR, 'runtime')
RESNET_DIR = os.path.join(MODEL_DIR, 'resnet50')

DIR = os.path.join(FINAL_TEST_DIR, 'test')

In [None]:
TRAIN_DIR

'/content/drive/MyDrive/dog_classification_data/working/data/train'

## Defining the Labels

In [None]:
labels = np.unique(df['breed'].to_numpy())
np.sort(labels)
labels

array(['affenpinscher', 'afghan_hound', 'african_hunting_dog', 'airedale',
       'american_staffordshire_terrier', 'appenzeller',
       'australian_terrier', 'basenji', 'basset', 'beagle',
       'bedlington_terrier', 'bernese_mountain_dog',
       'black-and-tan_coonhound', 'blenheim_spaniel', 'bloodhound',
       'bluetick', 'border_collie', 'border_terrier', 'borzoi',
       'boston_bull', 'bouvier_des_flandres', 'boxer',
       'brabancon_griffon', 'briard', 'brittany_spaniel', 'bull_mastiff',
       'cairn', 'cardigan', 'chesapeake_bay_retriever', 'chihuahua',
       'chow', 'clumber', 'cocker_spaniel', 'collie',
       'curly-coated_retriever', 'dandie_dinmont', 'dhole', 'dingo',
       'doberman', 'english_foxhound', 'english_setter',
       'english_springer', 'entlebucher', 'eskimo_dog',
       'flat-coated_retriever', 'french_bulldog', 'german_shepherd',
       'german_short-haired_pointer', 'giant_schnauzer',
       'golden_retriever', 'gordon_setter', 'great_dane',
      

## Internal File Structure for each dataset

In [None]:
for label in labels:
    os.makedirs(os.path.join(TRAIN_DIR, label))
    os.makedirs(os.path.join(VAL_DIR, label))
    os.makedirs(os.path.join(TEST_DIR, label))

## Spiltting the labeled data into Train, Test and Validation Data

In [None]:
df_train = pd.read_csv(os.path.join(DATA_FRAME_DIR, 'df_train.csv'))
df_test = pd.read_csv(os.path.join(DATA_FRAME_DIR, 'df_test.csv'))
df_val = pd.read_csv(os.path.join(DATA_FRAME_DIR, 'df_val.csv'))

In [None]:
df_train

Unnamed: 0.1,Unnamed: 0,id,breed
0,4528,70f9f88c25bfdaab568342b7163171e4,norwegian_elkhound
1,7065,b14fba6e4c376bbb8b7ca5c803816551,weimaraner
2,4565,72291724d7c5e8df27e615a177dfb151,ibizan_hound
3,821,1435300899e9180c90de6e100643b402,chesapeake_bay_retriever
4,9311,e9b8e25755fcc201168fdf2d299e5350,doberman
...,...,...,...
8173,6312,9f682747d236dbaf8c530933e5dfcb44,bernese_mountain_dog
8174,8288,cf954483c0d2e3004083144ffc432092,basenji
8175,1022,193a03557eb1c0fed9b2ef8f9f9d892f,toy_poodle
8176,7572,bdcb7a341f67363503f974d7596780db,schipperke


In [None]:
df_val

Unnamed: 0.1,Unnamed: 0,id,breed
0,3325,52b0bca440f70ab144b1fdce2a8c0148,shetland_sheepdog
1,2836,45d4d86af38c934a078fc85417c95735,chihuahua
2,3922,616ad943c7574a20b90e93fa0a61a21e,toy_terrier
3,218,04fab70baaf7cd53f8bf78225dbf5409,walker_hound
4,9324,ea113a236df0ae1d6875259ed2d2bcde,german_short-haired_pointer
...,...,...,...
1017,8920,dfdb153c5100737c782c5f9578717594,basenji
1018,7754,c21ebf600c0136815b16143e205638ff,shetland_sheepdog
1019,3966,62759db195cd415727d87d344726f5ea,tibetan_mastiff
1020,9646,f2cb6f6dfc542d47bfcea63b0dd78c6e,brittany_spaniel


In [None]:
df_test

Unnamed: 0.1,Unnamed: 0,id,breed
0,4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever
1,14,0075dc49dab4024d12fafe67074d8a81,norfolk_terrier
2,22,009509be3ca7cce0ff9e37c8b09b1125,otterhound
3,25,00a366d4b4a9bbb6c8a63126697b7656,golden_retriever
4,65,019ff93e03802e661577b5869e099dcb,rhodesian_ridgeback
...,...,...,...
1017,10198,ff6f47aa8e181b6efa4d0be7b09b5628,golden_retriever
1018,10203,ff91c3c095a50d3d7f1ab52b60e93638,affenpinscher
1019,10207,ffa4e1bf959425bad9228b04af40ac76,basenji
1020,10214,ffcb610e811817766085054616551f9c,briard


## Defining a simple function which moves Raw Data to the created File Structure

In [None]:
def move(df, source=RAW_DATA_PATH, dest=''):

    for row in df.to_numpy():
        id = row[0]
        label = row[1]
        shutil.copy(os.path.join(source, id + '.jpg'), os.path.join(dest, label))


In [None]:
move(df_val, dest=VAL_DIR)

In [None]:
move(df_test, dest=TEST_DIR)

In [None]:
move(df_train, dest=TRAIN_DIR)

## Creating Data Generators for Training, Testing and Validation (Xception)

In [None]:
train_data_gen = tf.keras.preprocessing.image.ImageDataGenerator(preprocessing_function = tf.keras.applications.xception.preprocess_input,
                                                                 rotation_range = 20,
                                                                 zoom_range = 0.1
                                                                 #width_shift_range=0.2,
                                                                 #height_shift_range=0.2,
                                                                 #horizontal_flip = True,
                                                                 #fill_mode='nearest'
                                                                )

train_generator = train_data_gen.flow_from_directory(TRAIN_DIR,
                                                     target_size=(299, 299),
                                                     color_mode = 'rgb',
                                                     batch_size=32,
                                                     class_mode='categorical',
                                                     shuffle = True)

Found 8178 images belonging to 120 classes.


In [None]:
val_data_gen = tf.keras.preprocessing.image.ImageDataGenerator(preprocessing_function = tf.keras.applications.xception.preprocess_input)
val_generator = val_data_gen.flow_from_directory(VAL_DIR,
                                                 target_size=(299, 299),
                                                 color_mode = 'rgb',
                                                 batch_size = 32,
                                                 class_mode = 'categorical',
                                                 shuffle = True)

Found 1022 images belonging to 120 classes.


In [None]:
test_data_gen = tf.keras.preprocessing.image.ImageDataGenerator(preprocessing_function = tf.keras.applications.xception.preprocess_input)
test_generator = test_data_gen.flow_from_directory(TEST_DIR,
                                                   target_size=(299, 299),
                                                   color_mode = 'rgb',
                                                   batch_size = 64,
                                                   class_mode = 'categorical',
                                                   shuffle = False)

Found 1022 images belonging to 120 classes.


## Xception Moodel (Accuracy:-)

In [None]:
base_model = tf.keras.applications.Xception(input_shape=(299, 299, 3), include_top = False)
base_model.trainable = False

xception_model = tf.keras.models.Sequential([
    base_model,
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(120, activation='softmax')
])

xception_model.compile(
    optimizer = 'adam',
    loss = 'categorical_crossentropy',
    metrics = ['accuracy']
)

In [None]:
checkpoint = tf.keras.callbacks.ModelCheckpoint(XCEPTION_DIR, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')

In [None]:
xception_model.fit(train_generator, epochs=40, validation_data=val_generator, callbacks=[checkpoint], verbose=1)

In [None]:
xception_model.evaluate(test_generator)



[13.620423316955566, 0.8620352149009705]

In [None]:
loaded_model = tf.keras.models.load_model(XCEPTION_DIR)
loaded_model.evaluate(test_generator)



[10.981363296508789, 0.8639921545982361]

## Creating Data Generators for Training, Testing and Validation (ResNet50)

In [None]:
train_data_gen = tf.keras.preprocessing.image.ImageDataGenerator(preprocessing_function = tf.keras.applications.resnet50.preprocess_input,
                                                                 rotation_range = 20,
                                                                 zoom_range = 0.1
                                                                 #width_shift_range=0.2,
                                                                 #height_shift_range=0.2,
                                                                 #horizontal_flip = True,
                                                                 #fill_mode='nearest'
                                                                )

train_generator = train_data_gen.flow_from_directory(TRAIN_DIR,
                                                     target_size=(244, 244),
                                                     color_mode = 'rgb',
                                                     batch_size=32,
                                                     class_mode='categorical',
                                                     shuffle = True)

Found 8178 images belonging to 120 classes.


In [None]:
val_data_gen = tf.keras.preprocessing.image.ImageDataGenerator(preprocessing_function = tf.keras.applications.resnet50.preprocess_input)
val_generator = val_data_gen.flow_from_directory(VAL_DIR,
                                                 target_size=(244, 244),
                                                 color_mode = 'rgb',
                                                 batch_size = 32,
                                                 class_mode = 'categorical',
                                                 shuffle = True)

Found 1022 images belonging to 120 classes.


In [None]:
test_data_gen = tf.keras.preprocessing.image.ImageDataGenerator(preprocessing_function = tf.keras.applications.resnet50.preprocess_input)
test_generator = test_data_gen.flow_from_directory(TEST_DIR,
                                                   target_size=(244, 244),
                                                   color_mode = 'rgb',
                                                   batch_size = 64,
                                                   class_mode = 'categorical',
                                                   shuffle = False)

Found 1022 images belonging to 120 classes.


## ResNet50 (Accuracy:-)

In [None]:
base_model = tf.keras.applications.ResNet50(input_shape = (224, 244, 3), include_top=False, weights='imagenet')
base_model.trainable = False

resnet_model = tf.keras.models.Sequential([
    base_model,
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(120, activation='softmax')
])

resnet_model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

In [None]:
base_model.summary()

In [None]:
resnet_model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet50 (Functional)       (None, 7, 8, 2048)        23587712  
                                                                 
 flatten_4 (Flatten)         (None, 114688)            0         
                                                                 
 dense_5 (Dense)             (None, 120)               13762680  
                                                                 
Total params: 37,350,392
Trainable params: 13,762,680
Non-trainable params: 23,587,712
_________________________________________________________________


In [None]:
checkpoint = tf.keras.callbacks.ModelCheckpoint(RESNET_DIR, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
earlystopping = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=5)

In [None]:
history = resnet_model.fit(train_generator, epochs=10, validation_data=val_generator, callbacks=[checkpoint, earlystopping], verbose=1)

## Submission

In [None]:
sample_submission = pd.read_csv('/content/drive/MyDrive/dog_classification_data/raw_data/sample_submission.csv')

In [None]:
sample_submission.head(64)

Unnamed: 0,id,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,...,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier
0,000621fb3cbb32d8935728e48679680e,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
1,00102ee9d8eb90812350685311fe5890,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
2,0012a730dfa437f5f3613fb75efcd4ce,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
3,001510bc8570bbeee98c8d80c8a95ec1,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
4,001a5f3114548acdefa3d4da05474c2e,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,013c030b78079c77fbe8133d74d58acd,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
60,013c469ce5bd1117b248883dee30a38d,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
61,0143aa47cc7cb5926cc32daf3a6a3b3d,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
62,014da249523b906a840f8c33ae055cf3,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333


In [None]:
final_test_gen = tf.keras.preprocessing.image.ImageDataGenerator(preprocessing_function=tf.keras.applications.xception.preprocess_input)
final_test_generator = final_test_gen.flow_from_directory(FINAL_TEST_DIR,
                                                          target_size=(299, 299),
                                                          color_mode = 'rgb',
                                                          batch_size = 64,
                                                          class_mode = None,
                                                          shuffle = False)

Found 10357 images belonging to 1 classes.


In [None]:
predictions = model.predict(final_test_generator)



In [None]:
submission = sample_submission
submission

Unnamed: 0,id,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,...,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier
0,000621fb3cbb32d8935728e48679680e,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
1,00102ee9d8eb90812350685311fe5890,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
2,0012a730dfa437f5f3613fb75efcd4ce,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
3,001510bc8570bbeee98c8d80c8a95ec1,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
4,001a5f3114548acdefa3d4da05474c2e,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10352,ffeda8623d4eee33c6d1156a2ecbfcf8,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
10353,fff1ec9e6e413275984966f745a313b0,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
10354,fff74b59b758bbbf13a5793182a9bbe4,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
10355,fff7d50d848e8014ac1e9172dc6762a3,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333


In [None]:
for i in range(120):
    submission[labels[i]] = pd.Series(np.take(predictions, i, axis=1))

Unnamed: 0,id,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,...,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier
0,000621fb3cbb32d8935728e48679680e,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,00102ee9d8eb90812350685311fe5890,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0012a730dfa437f5f3613fb75efcd4ce,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,001510bc8570bbeee98c8d80c8a95ec1,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,001a5f3114548acdefa3d4da05474c2e,0.001831,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10352,ffeda8623d4eee33c6d1156a2ecbfcf8,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10353,fff1ec9e6e413275984966f745a313b0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
10354,fff74b59b758bbbf13a5793182a9bbe4,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10355,fff7d50d848e8014ac1e9172dc6762a3,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
submission.to_csv('/content/drive/MyDrive/dog_classification_data/working/submission0.csv')