In [1]:
import numpy as np
import pandas as pd
import os, cv2, random, time, shutil, csv
import tensorflow as tf
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from tqdm import tqdm
np.random.seed(42)
%matplotlib inline 

import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras.layers import BatchNormalization, Dense, GlobalAveragePooling2D, Lambda, Dropout, InputLayer, Input
from keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import load_img

Using TensorFlow backend.


In [2]:
image_directory = '/kaggle/input/cat-breed-12/images'

labels = []

for root, dirs, files in os.walk(image_directory):
    for file in files:
        image_name = file
        breed = os.path.basename(root)
        labels.append((image_name, breed))

csv_file = 'label.csv'

with open(csv_file, 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['id', 'breed'])
    writer.writerows(labels)

print(f'{len(labels)} labels have been written to {csv_file}.')


2400 labels have been written to label.csv.


In [3]:
def get_num_files(path):

    if not os.path.exists(path):
        return 0
    return sum([len(files) for r, d, files in os.walk(path)])

In [4]:
train_dir = '/kaggle/input/cat-breed-12/images'
data_size = get_num_files(train_dir)
print('Data samples size: ', data_size)

Data samples size:  2400


In [5]:

labels_dataframe = pd.read_csv('/kaggle/working/label.csv')
labels_dataframe.head(5)

Unnamed: 0,id,breed
0,Ragdoll_76.jpg,Ragdoll
1,Ragdoll_78.jpg,Ragdoll
2,Ragdoll_158.jpg,Ragdoll
3,Ragdoll_79.jpg,Ragdoll
4,Ragdoll_122.jpg,Ragdoll


In [6]:
cat_breeds = sorted(list(set(labels_dataframe['breed'])))
n_classes = len(cat_breeds)
print(n_classes)
dog_breeds[:10]

12


['Abyssinian',
 'Bengal',
 'Birman',
 'Bombay',
 'British',
 'Egyptian',
 'Maine',
 'Persian',
 'Ragdoll',
 'Russian']

In [7]:
#Map each label string to an integer label.
class_to_num = dict(zip(cat_breeds, range(n_classes)))
class_to_num

{'Abyssinian': 0,
 'Bengal': 1,
 'Birman': 2,
 'Bombay': 3,
 'British': 4,
 'Egyptian': 5,
 'Maine': 6,
 'Persian': 7,
 'Ragdoll': 8,
 'Russian': 9,
 'Siamese': 10,
 'Sphynx': 11}

In [8]:
def images_to_array(data_dir, labels_dataframe, img_size = (224,224,3)):

    images_names = labels_dataframe['id']
    images_labels = labels_dataframe['breed']
    data_size = len(images_names)

    X = np.zeros([data_size, img_size[0], img_size[1], img_size[2]], dtype=np.uint8)
    y = np.zeros([data_size,1], dtype=np.uint8)

    for i in tqdm(range(data_size)):
        image_name = images_names[i]
        img_dir = os.path.join(data_dir,images_labels[i], image_name)
        img_pixels = load_img(img_dir, target_size=img_size)
        X[i] = img_pixels
        
        image_breed = images_labels[i]
        y[i] = class_to_num[image_breed]
    
    y = to_categorical(y)
   
    ind = np.random.permutation(data_size)
    X = X[ind]
    y = y[ind]
    print('Ouptut Data Size: ', X.shape)
    print('Ouptut Label Size: ', y.shape)
    return X, y



In [9]:
#img_size chosen to be 331 to suit the used architectures.
dir = '/kaggle/input/cat-breed-12/images'
img_size = (224,224,3)
X, y = images_to_array(dir,labels_dataframe, img_size)

100%|██████████| 2400/2400 [00:29<00:00, 81.20it/s]


Ouptut Data Size:  (2400, 224, 224, 3)
Ouptut Label Size:  (2400, 12)


In [10]:
def get_features(model_name, data_preprocessor, input_size, data):

    input_layer = Input(input_size)
    preprocessor = Lambda(data_preprocessor)(input_layer)
    base_model = model_name(weights='imagenet', include_top=False,
                            input_shape=input_size)(preprocessor)
    avg = GlobalAveragePooling2D()(base_model)
    feature_extractor = Model(inputs = input_layer, outputs = avg)

    feature_maps = feature_extractor.predict(data, batch_size=64, verbose=1)
    print('Feature maps shape: ', feature_maps.shape)
    return feature_maps

In [11]:
from keras.applications.inception_v3 import InceptionV3, preprocess_input
inception_preprocessor = preprocess_input
inception_features = get_features(InceptionV3,
                                  inception_preprocessor,
                                  img_size, X)

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.5/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
Feature maps shape:  (2400, 2048)


In [12]:
from keras.applications.xception import Xception, preprocess_input
xception_preprocessor = preprocess_input
xception_features = get_features(Xception,
                                 xception_preprocessor,
                                 img_size, X)

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.4/xception_weights_tf_dim_ordering_tf_kernels_notop.h5
Feature maps shape:  (2400, 2048)


In [13]:
from keras.applications.inception_resnet_v2 import InceptionResNetV2, preprocess_input
inc_resnet_preprocessor = preprocess_input
inc_resnet_features = get_features(InceptionResNetV2,
                                   inc_resnet_preprocessor,
                                   img_size, X)

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.7/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5
Feature maps shape:  (2400, 1536)


In [15]:
final_features = np.concatenate([inception_features,
                                 xception_features,
                                 inc_resnet_features,], axis=-1)
print('Final feature maps shape', final_features.shape)

Final feature maps shape (2400, 5632)


In [16]:
from keras.callbacks import EarlyStopping
EarlyStop_callback = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
my_callback=[EarlyStop_callback]

In [17]:
final_features.shape[1:]

(5632,)

In [18]:
dnn = keras.models.Sequential([
    InputLayer(final_features.shape[1:]),
    Dropout(0.7),
    Dense(12, activation='softmax')
])

dnn.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

h = dnn.fit(final_features, y,
            batch_size=128,
            epochs=60,
            validation_split=0.1,
            callbacks=my_callback)

Train on 2160 samples, validate on 240 samples
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60


In [19]:
dnn.save_weights('cat_final_weights_5632.h5')

Inspired by: [https://www.kaggle.com/c/dog-breed-identification/discussion/40779](http://)