In [1]:
import tensorflow as tf
from tensorflow import keras

train_dataset_path = "../dataset/dog-breed-imagefolder/train"
training_dataset, validation_dataset = tf.keras.utils.image_dataset_from_directory(train_dataset_path, 
                                            image_size = (224, 224),  # Default value is (256, 256). **source of potential error**
                                            validation_split = 0.1, subset = "both", seed = 10, # Need to set subset & seed both for validation_split
                                            batch_size = 32, shuffle= True, # Default values used automatically. **SOURCE OF POTENTIAL ERROR**
                                            labels = "inferred", # need to set none for test
                                            color_mode = "rgb", # Channels = 3. Hidden from us
                                            label_mode = "categorical" ) # MUST. OTHERWISE causes error at LOSS value calculation. Need to do one hot encoding there


Found 10222 files belonging to 120 classes.
Using 9200 files for training.
Using 1022 files for validation.


## Neural Network - Training from Scratch

In [None]:
"""
    NN: Architecture
    Input Layer  = Shape (224, 224, 3)
    Hidden Layer = 240 Neurons
    Output Layer = 120 Neurons
"""
class simple_fcnn(keras.Model):
    def __init__(self):
        super().__init__()
        
        INPUT_SHAPE  = (224, 224, 3)
        NUM_CLASSES  = 120

        self.internal_model = tf.keras.models.Sequential([
            keras.layers.Input(shape = INPUT_SHAPE),
            keras.layers.Flatten(),

            keras.layers.Dense(units = 240,             activation="relu",      kernel_initializer='random_normal', bias_initializer='zeros'),
            keras.layers.Dense(units = NUM_CLASSES,     activation="softmax",   kernel_initializer='random_normal', bias_initializer='zeros'),
        ])

    def call(self, input_single_batch):
        final_layer_output = self.internal_model(input_single_batch)

        return final_layer_output

model_simple_fcnn  = simple_fcnn()
model_simple_fcnn.compile(
    loss      = keras.losses.CategoricalCrossentropy(),     # "categorical_cross_entropy"
    optimizer = keras.optimizers.Adam(),                    # "adam"
    metrics   = ["accuracy"],                               # keras.metrics.Accuracy()
)

In [3]:
model_simple_fcnn.fit(x = training_dataset, validation_data = validation_dataset, epochs = 5)



### Bugs while writing end to end pipeline
**IMP: 4 Bugs in the code**
1. `dir = dog-breed-imagefolder`. It lead to 2 classes 1st class -> train & 2nd class -> test, instead of 120 classes as dog breeds
2. default value of `label_mode=int`. it doesn't convert class to vector. categorical loss function needs vector of class not int. `tf.keras.losses.categorical_cross_entropy` vs `tf.keras.losses.sparse_categorical_cross_entropy`. I needed to write custom training loop to debug this problem.
3. `batch_size = 32, shuffle= True`. These default values can lead to confusion while debugging with single element batch.
4. `validation_split` needs subset & seed. Tensorflow requirements


## Transfer Learning

In [3]:
NUM_CLASSES = 120
"""
    NN: Architecture
    PRETRAINED MODEL
    Input Layer  = Shape (224, 224, 3)
    Hidden Layer = 240 Neurons
    Output Layer = 120 Neurons
"""
class transfer_learning_fcnn(keras.Model):
    def __init__(self):
        super().__init__()
        PRETRAINED_MODEL           = tf.keras.applications.efficientnet_v2.EfficientNetV2B0(include_top=False)
        PRETRAINED_MODEL.trainable = False

        self.internal_model = tf.keras.models.Sequential([
            keras.layers.Input(shape = (224, 224, 3)),
            PRETRAINED_MODEL,
            keras.layers.Flatten(),

            keras.layers.Dense(units = 240,             activation="relu",      kernel_initializer='random_normal', bias_initializer='zeros'),
            keras.layers.Dense(units = NUM_CLASSES,     activation="softmax",   kernel_initializer='random_normal', bias_initializer='zeros'),
        ])

    def call(self, input_single_batch):
        final_layer_output = self.internal_model(input_single_batch)

        return final_layer_output

model_transfer_learning  = transfer_learning_fcnn()
model_transfer_learning.compile(
    loss      = keras.losses.CategoricalCrossentropy(),     # "categorical_cross_entropy"
    optimizer = keras.optimizers.Adam(),                    # "adam"
    metrics   = ["accuracy"],                               # keras.metrics.Accuracy()
)

model_transfer_learning.fit(x = training_dataset, validation_data = validation_dataset, epochs = 1)





<keras.src.callbacks.History at 0x284d3a210>

## Kaggle Submission

In [23]:
import os
import pandas as pd
import numpy as np
np.set_printoptions(precision=17)

submission_file = open("submission_file.csv", "w")
header = "id,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,beagle,bedlington_terrier,bernese_mountain_dog,black-and-tan_coonhound,blenheim_spaniel,bloodhound,bluetick,border_collie,border_terrier,borzoi,boston_bull,bouvier_des_flandres,boxer,brabancon_griffon,briard,brittany_spaniel,bull_mastiff,cairn,cardigan,chesapeake_bay_retriever,chihuahua,chow,clumber,cocker_spaniel,collie,curly-coated_retriever,dandie_dinmont,dhole,dingo,doberman,english_foxhound,english_setter,english_springer,entlebucher,eskimo_dog,flat-coated_retriever,french_bulldog,german_shepherd,german_short-haired_pointer,giant_schnauzer,golden_retriever,gordon_setter,great_dane,great_pyrenees,greater_swiss_mountain_dog,groenendael,ibizan_hound,irish_setter,irish_terrier,irish_water_spaniel,irish_wolfhound,italian_greyhound,japanese_spaniel,keeshond,kelpie,kerry_blue_terrier,komondor,kuvasz,labrador_retriever,lakeland_terrier,leonberg,lhasa,malamute,malinois,maltese_dog,mexican_hairless,miniature_pinscher,miniature_poodle,miniature_schnauzer,newfoundland,norfolk_terrier,norwegian_elkhound,norwich_terrier,old_english_sheepdog,otterhound,papillon,pekinese,pembroke,pomeranian,pug,redbone,rhodesian_ridgeback,rottweiler,saint_bernard,saluki,samoyed,schipperke,scotch_terrier,scottish_deerhound,sealyham_terrier,shetland_sheepdog,shih-tzu,siberian_husky,silky_terrier,soft-coated_wheaten_terrier,staffordshire_bullterrier,standard_poodle,standard_schnauzer,sussex_spaniel,tibetan_mastiff,tibetan_terrier,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier"
submission_file.write(header)


test_dataset_path = "../dataset/dog-breed-imagefolder/test/"
for filename in os.listdir(test_dataset_path):
    image  = tf.keras.utils.load_img(test_dataset_path + filename, color_mode='rgb', target_size=(224, 224) )
    tensor = tf.keras.utils.img_to_array(image)
    single_batch = tf.expand_dims(tensor, axis=0)

    output_probs = model_transfer_learning(single_batch)
    submission_string = "\n" + filename[:-4]
    counter = 0
    for index in range(120):
        class_prob = output_probs[0][index].numpy()
        submission_string = submission_string + ",{:.17f}".format(class_prob)
    submission_file.write(submission_string)
    