# Libraries

In [1]:
import os
import numpy as np
import pandas as pd

import cv2
import matplotlib.pyplot as plt

from sklearn import preprocessing
from sklearn.model_selection import train_test_split

from keras.utils import to_categorical
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from keras.optimizers import RMSprop,Adam
from keras.preprocessing.image import ImageDataGenerator
from keras.applications import VGG16

# Path

In [2]:
path = '../input/'
os.listdir(path)

# Load Data

In [3]:
train_data = pd.read_csv(path+'labels.csv')
samp_subm = pd.read_csv(path+'sample_submission.csv')

# Overview

In [4]:
print('Number train samples:', len(train_data))
print('Number train images:', len(os.listdir(path+'train/')))
print('Number test images:', len(os.listdir(path+'test/')))

In [5]:
train_data.head()

# EDA

There are 120 breeds. Here you can see the distribution of the top 10: 

In [6]:
train_data['breed'].value_counts().head(10)

# Load Single Image
We plot the first image of of the train data.

In [7]:
df = train_data.loc[5, 'id']
breed = train_data.loc[0, 'breed']
file = df+'.jpg'
img = cv2.imread(path+'train/'+file)
print('Shape:', img.shape)

In [8]:
fig, ax = plt.subplots(1, 1, figsize=(7, 7))
ax.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.set_title(breed)
plt.show()

# Plot Examples
We plot example images of the breed top 10.

In [9]:
def plot_examples(category = 'scottish_deerhound'):
    """ Plot 5 images of a given category """
    
    fig, axs = plt.subplots(1, 5, figsize=(25, 20))
    fig.subplots_adjust(hspace = .1, wspace=.1)
    axs = axs.ravel()
    temp = train_data[train_data['breed']==category].copy()
    temp.index = range(len(temp.index))
    for i in range(5):
        df = temp.loc[i, 'id']
        breed = temp.loc[i, 'breed']
        file = df+'.jpg'
        img = cv2.imread(path+'train/'+file)
        axs[i].imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        axs[i].set_title(breed)
        axs[i].set_xticklabels([])
        axs[i].set_yticklabels([])
    plt.show()

In [10]:
plot_examples()

In [11]:
plot_examples('maltese_dog')

In [12]:
plot_examples('pomeranian')

In [13]:
plot_examples('afghan_hound')

# Image Preprocessing
As we can see the images have different format: landscape or portrait. For the neural network we need a standard size. So we have to prepare the data. 

In [14]:
def image_preprocessing(image, image_size):
    """ Image Preprocessing """

    # Load Image
    readFlag=cv2.COLOR_BGR2GRAY
    #image = np.asarray(bytearray(resp.read()), dtype="uint8")
    #image = cv2.imdecode(image, readFlag)
    image_gray = cv2.cvtColor(image, readFlag)
    
    # Crop Image
    mid_row = int(image_gray.shape[0]/2)
    mid_col = int(image_gray.shape[1]/2)
    if image_gray.shape[0]>image_gray.shape[1]:
        image_cropped = image_gray[mid_row-mid_col:mid_row+mid_col,
                                   0:image_gray.shape[1]]
    else:
        image_cropped = image_gray[0:image_gray.shape[0],
                                   mid_col-mid_row:mid_col+mid_row]
    
    # Rescale Image
    image_rescale = cv2.resize(image_cropped,
                               dsize=(image_size, image_size),
                               interpolation=cv2.INTER_AREA)
    return image_rescale

def plot_befor_after(image):
    """ Compare original and prepared image """
    
    fig, axs = plt.subplots(1, 2, figsize=(15, 10))
    fig.subplots_adjust(hspace = .1, wspace=.1)
    axs = axs.ravel()
    # Plot Original Image
    axs[0].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    axs[0].set_title('original shape: '+str(image.shape))
    # Image Preprocessing
    image_rescale = image_preprocessing(image, image_size)
    # Plot Prepared Image
    axs[1].imshow(image_rescale, cmap='gray')
    axs[1].set_title('rescaled shape: '+str(image_rescale.shape))
    for i in range(2):
        axs[i].set_xticklabels([])
        axs[i].set_yticklabels([])
    plt.show()

We set the image size:

In [15]:
image_size = 128

Example Landscape:

In [16]:
row = 6
df = train_data.loc[row, 'id']
breed = train_data.loc[row, 'breed']
file = df+'.jpg'
image = cv2.imread(path+'train/'+file)
print('Shape:', image.shape)

In [17]:
plot_befor_after(image)

Example Portrait:

In [18]:
row = 10
df = train_data.loc[row, 'id']
breed = train_data.loc[row, 'breed']
file = df+'.jpg'
image = cv2.imread(path+'train/'+file)
print('Shape:', image.shape)

In [19]:
plot_befor_after(image)

# Read All Images

In [20]:
image_size = 128

In [21]:
def prepare_data(path, data, image_size):
    """ Read all images into a numpy array """
    
    X = np.empty((len(data), image_size, image_size), dtype=np.uint8)
    for row in data.index:
        df = data.loc[row, 'id']
        file = df + '.jpg'
        image = cv2.imread(path+file)
        image_rescaled = image_preprocessing(image, image_size)
        X[row, :, :] = image_rescaled
    X = X.astype('float32')/255
    return X

In [22]:
X_train = prepare_data(path+'train/', train_data, image_size)
X_test = prepare_data(path+'test/', samp_subm, image_size)

Encode the labels

In [23]:
y_train = train_data['breed']
y_train = pd.get_dummies(y_train)

# Split Train Data 

In [24]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size = 0.3, random_state=100)

In [25]:
print('Shape train data:', X_train.shape)
print('Shape val data:', X_val.shape)

# Reshape Data

In [26]:
X_train = X_train.reshape(-1,image_size,image_size,1)
X_val = X_val.reshape(-1,image_size,image_size,1)
X_test = X_test.reshape(-1,image_size,image_size,1)

# Define Model

In [27]:
model = Sequential()

model.add(Conv2D(filters=32, kernel_size = (3, 3), activation='relu', input_shape=(image_size, image_size, 1)))
model.add(MaxPool2D((2, 2)))

model.add(Conv2D(32, (3, 3),activation='relu'))
model.add(MaxPool2D((2, 2)))

model.add(Conv2D(64, (3, 3),activation='relu'))
model.add(MaxPool2D((2, 2)))

model.add(Conv2D(128, (3, 3),activation='relu'))
model.add(MaxPool2D((2, 2)))

#model.add(Conv2D(128, (3, 3),activation='relu'))
#model.add(MaxPool2D((2, 2)))

model.add(Flatten())

model.add(Dense(120, activation = 'softmax'))

In [28]:
model = Sequential()
model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu', input_shape = (image_size, image_size, 1)))
model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.15))


model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.15))


model.add(Flatten())
#model.add(Dense(256, activation = "relu"))
#model.add(Dropout(0.3))
model.add(Dense(120, activation = "softmax"))

In [29]:

model.compile(optimizer=Adam(lr=1e-3), loss='categorical_crossentropy', metrics=['accuracy'])

In [30]:
model.summary()

In [33]:
epochs = 25
batch_size = 128

In [34]:
# training the model

history = model.fit(X_train, y_train,
                    epochs=epochs,
                    batch_size=batch_size,
                    validation_data=(X_val, y_val))

# Analyse Training

In [35]:
loss = history.history['loss']
loss_val = history.history['val_loss']
epochs = range(1, len(loss)+1)
plt.plot(epochs, loss, 'bo', label='loss_train')
plt.plot(epochs, loss_val, 'b', label='loss_val')
plt.title('value of the loss function')
plt.xlabel('epochs')
plt.ylabel('value of the loss function')
plt.legend()
plt.grid()
plt.show()

In [36]:
acc = history.history['acc']
acc_val = history.history['val_acc']
epochs = range(1, len(loss)+1)
plt.plot(epochs, acc, 'bo', label='accuracy_train')
plt.plot(epochs, acc_val, 'b', label='accuracy_val')
plt.title('accuracy')
plt.xlabel('epochs')
plt.ylabel('value of accuracy')
plt.legend()
plt.grid()
plt.show()

# Predict Test Data

In [37]:
y_test = model.predict(X_test)

In [38]:
y_test