<a href="https://www.kaggle.com/code/captaindeadpool53/dog-breed-classifier?scriptVersionId=171547743" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [25]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os


# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [26]:
import tensorflow as tf
import matplotlib.pyplot as plt

from PIL import Image

from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, activations,losses


In [None]:
def addAugmentedData(dataset):
    augmentationLayer = Sequential([
        layers.RandomFlip(seed=1111),
        layers.RandomRotation(0.2, seed=1111),
        layers.RandomZoom(0.2, seed=1111)
    ])
    augmentedData = dataset.map(lambda x, y: (augmentationLayer(x), y))
    augmentedData = augmentedData.map(lambda img, label: (tf.image.convert_image_dtype(img, tf.uint8), label))

    completeDataset = dataset.concatenate(augmentedData)
    return completeDataset


In [None]:
def loadImagesWithClassName(csv_file_path, imagePath):
    labelsDf = pd.read_csv(csv_file_path)
    image_class_mapping = dict(zip(labelsDf['id'], labelsDf['breed']))

    file_paths = [imagePath + image_name + '.jpg' for image_name in labelsDf['id']]
    labels = [class_name for class_name in labelsDf['breed']]
    
    num_classes = len(labelsDf['breed'].unique())
    labels_one_hot = tf.one_hot(pd.Categorical(labels).codes, num_classes) #see
    
    dataset = tf.data.Dataset.from_tensor_slices((file_paths, labels_one_hot))
    dataset = dataset.map(load_image)
    
#     dataset = addAugmentedData(dataset)
    dataset = dataset.shuffle(buffer_size=(dataset.cardinality().numpy())/2)
    
    train_dataset = dataset.skip(int(0.2*(dataset.cardinality().numpy()))).batch(64)
    test_dataset =dataset.take(int(0.2*(dataset.cardinality().numpy()))).batch(64)

    return  train_dataset, test_dataset

def load_image(file_path, label):
    image = tf.io.read_file(file_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize_with_crop_or_pad(image,500,500)
    return image, label

train, test = loadImagesWithClassName('/kaggle/input/dog-breed-identification/labels.csv', '/kaggle/input/dog-breed-identification/train/')


train = train.prefetch(buffer_size=tf.data.AUTOTUNE)
test = test.prefetch(buffer_size=tf.data.AUTOTUNE)

In [None]:
firstTake = train.take(4)
images, labels1 = list(firstTake)[0]

firstImage = images[0]
numpyImage = firstImage.numpy().astype("uint8")
img = Image.fromarray(numpyImage)
img


In [45]:
def initializeModel(networkArray):
    savePath = '/kaggle/working/model.h5'
    model = Sequential(networkArray)
    
    if os.path.exists(savePath):
        model.load_weights(savePath)
        print(":::Loaded saved weights:::")
        
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
def createAndTrainModel(networkArray, train, test):
        savePath = '/kaggle/working/model.h5'
        
        model = initializeModel(networkArray)
        checkpointCallback = tf.keras.callbacks.ModelCheckpoint(
            savePath,
            monitor = 'val_loss',
            verbose = 1,
            save_best_only = True,
            save_weights_only = True,
        )
        
        history = model.fit(train, validation_data=test, epochs = 120, callbacks = [checkpointCallback])
    
        return pd.DataFrame(history.history), model

In [43]:
#AlexNet architecture
network = [
        layers.InputLayer(input_shape=( 500, 500, 3)),
        layers.Rescaling(1/255),
        layers.Conv2D(96, kernel_size=(11,11), strides= (4, 4),padding= 'valid', activation= 'relu'),
        layers.MaxPooling2D(pool_size=(3,3), strides= (2,2),padding= 'valid'),
        layers.Conv2D(256, kernel_size=(5,5), strides= (1,1),padding= 'same', activation= 'relu'),
        layers.MaxPooling2D(pool_size=(3,3), strides= (2,2), padding= 'valid'),
        layers.Conv2D(384, kernel_size=(3,3), strides= (1,1),padding= 'same', activation= 'relu'),
#         layers.Conv2D(384, kernel_size=(3,3), strides= (1,1), padding= 'same', activation= 'relu'),
#         layers.Conv2D(256, kernel_size=(3,3), strides= (1,1), padding= 'same', activation= 'relu'),
        layers.MaxPooling2D(pool_size=(3,3), strides= (2,2),padding= 'valid'),
        layers.Flatten(),
        layers.Dense(4098, activation= 'relu'),
        layers.Dense(2048, activation= 'relu'), 
        layers.Dense(1024, activation= 'relu'),
        layers.Dense(units = 120, activation = 'softmax')
]


In [None]:

history, model = createAndTrainModel(network,  train, test)

In [None]:
model.summary()
model.save_weights('/kaggle/working/model.h5')

In [None]:
history[['accuracy','val_accuracy']].plot()

In [None]:
labelsDf = pd.read_csv('/kaggle/input/dog-breed-identification/labels.csv')
labels = [class_name for class_name in labelsDf['breed']]

breeds = pd.Categorical(labels).categories
breeds

In [None]:

# print("Example"+ "     |     "+ "Label"+"     |     "+"Prediction"+ "\n")

# for images, labels in test:
    
#     predictions = model.predict(images)
#     predictions = predictions.argmax(axis=1)
#     predictions = [breeds[i] for i in predictions]
#     labels = [breeds[i] for i in np.array(labels).argmax(axis=1)]
#     for i in range(10):
#         # Get the image and label
#         image = images[i]
#         label = labels[i]
        
#         # Convert the image tensor to a NumPy array
#         image_np = image.numpy().astype("uint8")
#         im=plt.imshow(image_np) 
#         plt.axis('off')
        
#         plt.show()
#         print(str(i+1) + "     |     "+label+"     |     "+predictions[i]+ "\n" )

In [27]:
def load_prediction_image(file_path):
    image = tf.io.read_file(file_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize_with_crop_or_pad(image,500,500)
    return image

In [30]:
filenameList = []
for root, folders, files in os.walk('/kaggle/input/dog-breed-identification/test/'):
    for file in files:
        filenameList.append(file)
        
predictionDataset = tf.data.Dataset.from_tensor_slices([os.path.join('/kaggle/input/dog-breed-identification/test/',fileName) for fileName in filenameList])
predictionDataset = predictionDataset.map(load_prediction_image).batch(64)

In [46]:
model = initializeModel(network)
predictionResult = model.predict(predictionDataset)

dataframe1 = pd.DataFrame({"id": filenameList})
dataframe2 = pd.DataFrame(predictionResult, columns = breeds)

finalPredictionDataFrame = pd.concat([dataframe1,dataframe2],axis=1)
finalPredictionDataFrame.to_csv('submission.csv', index=False)

:::Loaded saved weights:::
