In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Activation, Flatten, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.applications import resnet50
import random 
import matplotlib.pyplot as plt

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import os

breeds = []

# The parent directory where the images are stored
baseDir = '/kaggle/input/stanford-dogs-dataset-traintest/cropped'

# Count the number of breeds in the dataset 
for dirname, _, filenames in os.walk(baseDir+'/train'): 
    breed_name = dirname.split('/')[-1]
    
    # Skip the 'test' folder
    if dirname in ['train', 'test']: continue
    
    # Remove the id from the breed_name
    breed_name = [c for c in breed_name if c == '_' or c.isalpha()]
    # Save the breed name as a string
    breeds.append(''.join(breed_name))

# Remove the test breed 
del breeds[0]
num_breeds = len(breeds)


In [None]:
def load_dataset(filePath, num_samples = None): 
    images = [] 
    labels = []
    for dirname, _, filenames in os.walk(filePath): 
        # Get the breed name 
        breed_name = dirname.split('/')[-1]
        # Clean the breed_name 
        breed_name = ''.join([c for c in breed_name if c == '_' or c.isalpha()])
        for index, filename in enumerate(filenames):
            # Break the loop if length exceeds num_samples
            if num_samples is not None: 
                if index > num_samples: 
                    break
            # Get the extension of the file 
            ext = filename.split('.')
            # Check if the file is an image
            if ext[-1] != 'jpg': continue
            img = load_img(os.path.join(dirname, filename)) 
            # Convert the image to numpy array 
            img_array = img_to_array(img)
            # Standardize the img array 
            img_array = img_array/255
            # Save the image
            images.append(img_array)
            # Save the label 
            labels.append(breed_name)
    return images, labels

In [None]:
images_train, labels_train = load_dataset(os.path.join(baseDir, 'train'), num_samples=30)
images_test, labels_test = load_dataset(os.path.join(baseDir, 'test'), num_samples=30)
# Convert the lists into numpy arrays
images_train = np.asarray(images_train)
images_test = np.asarray(images_test)

# Convert the labels into breed ids
labels_train = [breeds.index(breed) for breed in labels_train]
labels_test = [breeds.index(breed) for breed in labels_test]

In [None]:
# one hot encode the labels 
labels_train = to_categorical(labels_train)
labels_test = to_categorical(labels_test)

# Create model from scratch (11.5% Test Accuracy)

In [None]:
def create_model(parameters):
    model = Sequential() 
    
    # First block 
    model.add(Conv2D(16, (3,3), padding='same', input_shape=parameters['input_shape']))
    model.add(BatchNormalization(axis=3))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(4,4), strides=(4,4), padding='same'))
    model.add(Dropout(0.2))
    
    # Second block 
    model.add(Conv2D(32, (3,3), padding='same'))
    model.add(BatchNormalization(axis=3))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(4,4), strides=(4,4), padding='same'))
    model.add(Dropout(0.2))
    
    # Third block 
    model.add(Conv2D(64, (3,3), padding='same'))
    model.add(BatchNormalization(axis=3))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(4,4), strides=(4,4), padding='same'))
    model.add(Dropout(0.2))
    
    # Final Block 
    model.add(Conv2D(128, (3,3), padding='same'))
    model.add(BatchNormalization(axis=3))
    model.add(Activation('relu'))
  
    # Fully connected layers
    model.add(Flatten())
    model.add(Dropout(0.2))
    model.add(Dense(512, activation='relu'))
    model.add(Dense(parameters['n'], activation='softmax'))
    
    if parameters['get_summary'] is True: 
        model.summary()
    
    return model 

In [None]:
# Create a new model 
model = create_model({
    'input_shape': (224,224,3), 
    'n': num_breeds, 
    'get_summary': True
    
})

In [None]:
# Plot a random image (check manually) 
index = random.randint(0, len(images_train))
plt.imshow(images_train[index][:,:,:])

In [None]:
# Define the hyperparameters for the model 
num_epochs = 10 
batchSize = 64

# Compile the model 
model.compile(optimizer='adam', loss="categorical_crossentropy", metrics=['accuracy'])

# Fit the model 
print(images_test.shape)
model.fit(images_train, labels_train, 
         validation_data=(images_test, labels_test),
         epochs=num_epochs, batch_size=batchSize, verbose=1)

# Using the ResNet-50 Model

In [None]:
# Load the ResNet50 Model 


#