# Dog Breed Identification

**Overview:**
- What I've learnt?
- Download and Prepare Data
- Visualize/Inspect Data
- Building Model
- Training Model
- Evaluating Model
- Submission

**General Steps:**

1. Download and move the dataset to `../dataset` folder.
2. Unpack the zipped files (optionally delete the zip files after unpacking)
3. Visualize/Inspect the dataset.
4. Follow chapter 5 of 'Deep Learning with Python' by Franchois Chollet

### What I've learnt?
- Concepts:
- Code:

### Imports:

In [None]:
import pandas as pd
import numpy as np
import os
import cv2

from helper_scripts import my_func_utils

from sklearn.model_selection import train_test_split

from keras.models import Sequential, Model, load_model
from keras.layers import Dense, Dropout, Flatten, Input, Conv2D, MaxPool2D
from keras.layers.normalization import BatchNormalization
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from keras.optimizers import RMSprop
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras.utils.np_utils import to_categorical # convert to one-hot-encoding

import matplotlib.pyplot as plt
%matplotlib inline

%reload_ext autoreload
%autoreload 2

np.random.seed(1)

### Loading, Testing  and Cleaning Data:

In [None]:
mapping = pd.read_csv('..\datasets\dog_breed_identification\labels.csv')

In [None]:
#my_func_utils.bin_dataset('..\datasets\dog_breed_identification\copy-train-2', mapping.values, validation_split=0.2)

In [None]:
input_shape = (150,150,3)

### Preparing Data:

In [None]:
labels = np.unique(mapping.values[:,1])
num_classes = len(labels)

In [None]:
train_directory = '../datasets/dog_breed_identification/copy-train-2/train'
val_directory = '../datasets/dog_breed_identification/copy-train-2/val'

### Visualizing Data:

In [None]:
path = os.path.join(train_directory,labels[0])
fnames = [fname for fname in os.listdir(path)]
fpath = os.path.join(path, fnames[0])
img = cv2.imread(fpath,-1)
print(fpath)
plt.imshow(img, interpolation='none')
plt.xticks([]), plt.yticks([])
plt.title(labels[0])

### Building Model:
**Custom model:** Load a pre-trained model or create your own here!

In [None]:
def custom_model_1(input_shape=None):

    if input("If you want to load a model, enter 'yes'.\n") == 'yes': 
        return my_func_utils.load_model()
    
    assert input_shape != None
    
    # model
    model = Sequential()
    
    model.add(Conv2D(32, (3, 3), padding='same', activation='relu', input_shape=input_shape))
    model.add(MaxPool2D((2,2,)))
    
    model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
    model.add(MaxPool2D((2,2,)))
    
    model.add(Conv2D(128, (3, 3), padding='same', activation='relu'))
    model.add(MaxPool2D((2,2,)))
    
    model.add(Conv2D(128, (3, 3), padding='same', activation='relu'))
    model.add(MaxPool2D((2,2,)))
    
    model.add(Flatten())
    model.add(Dense(512, activation = 'relu'))
    model.add(Dense(256, activation = 'relu'))
    
    model.add(Dense(num_classes, activation='softmax'))
    
    # compile params
    optimizer = RMSprop(lr=1e-4)
    
    # compile
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model

In [None]:
model = custom_model(input_shape)
model.summary()

### Data Preprocessing:

In [None]:
train_dir = '../datasets/dog_breed_identification/copy-train-2/train'
val_dir = '../datasets/dog_breed_identification/copy-train-2/val'

In [None]:
ImageDataGenerator.flow_from_directory?

**Problem #1**: Loading images with/without using generators.

**Solution: **
- Write custom library for flowing data from directory (or)
- Write scripts to allow preprocessing using existing `Keras` implementation.

In [None]:
train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(train_dir,
                                                    target_size=(150,150),
                                                   )

val_generator = val_datagen.flow_from_directory(val_dir,
                                                target_size=(150,150),
                                                   )

In [None]:
# testing generators
for data_batch, labels_batch in train_generator:
    print('data batch shape:', data_batch.shape)
    print('labels batch shape:', labels_batch.shape)
    fig = plt.figure(figsize=(12,8))
    for i in range(32):
        plt.subplot(5,8,i+1)
        img = data_batch[i].reshape(150,150,3)
        plt.imshow(img, interpolation='none')
        plt.xticks([]), plt.yticks([])
        plt.title(i)
        plt.tight_layout()
    break

In [None]:
data_batch.shape, labels_batch.shape

In [None]:
# training
history = model.fit_generator(train_generator,
                             steps_per_epoch = 100,
                             epochs = 30,
                             validation_data = val_generator,
                             validation_steps = 50,
                             )

In [None]:
# save model
model.save('../saved_models/dog_breed_identification_small.h5')

In [None]:
# plot the loss and accuracy curves for training and validation 
fig, ax = plt.subplots(1,2)
ax[0].plot(history.history['loss'], color='b', label="Training loss")
ax[0].plot(history.history['val_loss'], color='r', label="Validation loss",axes =ax[0])
legend = ax[0].legend(loc='best', shadow=True)

ax[1].plot(history.history['acc'], color='b', label="Training accuracy")
ax[1].plot(history.history['val_acc'], color='r',label="Validation accuracy")
legend = ax[1].legend(loc='best', shadow=True)

In [None]:
# using data augmentation
datagen = ImageDataGenerator(rotation_range=40,
                            width_shift_range=0,
                            height_shift_range=0.2,
                            shear_range=0.2,
                            zoom_range=0.2,
                            horizontal_flip=True,
                            fill_mode='nearest',
                            )

In [None]:
# displaying randomly augmented images
fnames = [os.path.join(train_class_dir, fname) for fname in os.listdir(train_class_dir)]

img_path = fnames[3]

img = image.load_img(img_path, target_size = (150,150))
x = image.img_to_array(img)
x = x.reshape(1, *x.shape)

i = 0
for batch in datagen.flow(x, batch_size = 1):
    plt.figure(i)
    imgplot = plt.imshow(image.array_to_img(batch[0]))
    i += 1
    if i % 4 == 0: break

plt.show()

In [None]:
def custom_model_2(input_shape=None):

    if input("If you want to load a model, enter 'yes'.\n") == 'yes': 
        return my_func_utils.load_model()
    
    assert input_shape != None
    
    # model
    model = Sequential()
    
    model.add(Conv2D(32, (3, 3), padding='same', activation='relu', input_shape=input_shape))
    model.add(MaxPool2D((2,2,)))
    
    model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
    model.add(MaxPool2D((2,2,)))
    
    model.add(Conv2D(128, (3, 3), padding='same', activation='relu'))
    model.add(MaxPool2D((2,2,)))
    
    model.add(Conv2D(128, (3, 3), padding='same', activation='relu'))
    model.add(MaxPool2D((2,2,)))
    
    model.add(Flatten())
    model.add(Dropout(0.5))
    model.add(Dense(512, activation = 'relu'))
    model.add(Dense(256, activation = 'relu'))
    
    model.add(Dense(num_classes, activation='softmax'))
    
    # compile params
    optimizer = RMSprop(lr=1e-4)
    
    # compile
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model

In [None]:
# training network using data augmentation and dropout
train_datagen = ImageDataGenerator(rescale = 1./255,
                                   rotation_range=40,
                                   width_shift_range=0,
                                   height_shift_range=0.2,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True,
                                   fill_mode='nearest',
                                   )

val_datagen = ImageDataGenerator(rescale = 1./255,
                                )

train_generator = train_datagen.flow_from_directory(train_dir,
                                                    target_size=(150,150),
                                                   )

val_generator = val_datagen.flow_from_directory(val_dir,
                                                target_size=(150,150),
                                                   )

# training
history = model.fit_generator(train_generator,
                             steps_per_epoch = 100,
                             epochs = 100,
                             validation_data = val_generator,
                             validation_steps = 50,
                             )

In [None]:
# save model
model.save('../saved_models/dog_breed_identification_small_2.h5')

In [None]:
# plot the loss and accuracy curves for training and validation 
fig, ax = plt.subplots(1,2)
ax[0].plot(history.history['loss'], color='b', label="Training loss")
ax[0].plot(history.history['val_loss'], color='r', label="Validation loss",axes =ax[0])
legend = ax[0].legend(loc='best', shadow=True)

ax[1].plot(history.history['acc'], color='b', label="Training accuracy")
ax[1].plot(history.history['val_acc'], color='r',label="Validation accuracy")
legend = ax[1].legend(loc='best', shadow=True)

### Usinf Pre-trained Convnet:

#### Feature Extraction:

In [None]:
from keras.applications import VGG16

conv_base = VGG16(weights = 'imagenet',
                  include_top = False,
                  input_shape = (150,150,3),
                 )

conv_base.summary()

In [None]:
# fast feature extraction w/o data aug

import os
import numpy as np
from keras.preprocessing.image import ImageDatagenerator

base_dir = ''
train_dir = ''
val_dir = ''
test_dir = ''

datagen = ImageDataGenerator(rescale = 1./255,
                            )

def extract_features(directory,sample_count):
    features = np.zeros(shape=(sample_count,4,4,512))
    labels = np.zeros(shape=(sample_count))
    generator = datagen.flow-from_directory(directory,
                                           target_size = (150, 150),
                                           batch_size batch_size,
                                           class_mode = 'categorical',
                                           )
    
    i = 0
    for input_batch, labels_batch in generrator:
        features_batch = conv_base.predict(inputs_batch)
        features[i * batch-size : (i + 1) * batch_size] = features_batch
        labels[i * batch-size : (i + 1) * batch_size] = labels_batch
        
        i += 1
        if i * batch_size >= sample_count: break
    
    return features, labels

train_features, train_labels = extract_features(train, 2000)
val_features, val_labels = extract_feattures(val_dir, 2000)
test_features, test_labels = extract_feattures(test_dir, 2000)

train_features = train_features.reshape(2000,-1)
val_features = val_features.reshape(2000,-1)
test_features = test_features.reshape(2000,-1)

In [None]:
# defining and training densely connected classifier
from keras.models import Sequential, Model
from keras.layers import Dropout dense
from keras.optimizers import RMSprop

model = Sequential()
model.add(Dense(256, activation = 'relu', input_dims = 4 * 4 * 512))
model.add(Dropout(0.5))
model.add(Dense(120, activation = 'softmax'))

model.compile(optimizers = RMSprop(lr = 2e-5),
             loss = 'categorical_cross_entropy',
             metrics = ['acc'])

history = model.fit(train-features, train_labels,
                    epochs = 10,
                    batch_size = 20,
                    validation_data = (validation_features, validation_labels),
                   )

In [None]:
# save model
model.save('../saved_models/dog_breed_identification_small_3.h5')

In [None]:
# plot the loss and accuracy curves for training and validation 
fig, ax = plt.subplots(1,2)
ax[0].plot(history.history['loss'], color='b', label="Training loss")
ax[0].plot(history.history['val_loss'], color='r', label="Validation loss",axes =ax[0])
legend = ax[0].legend(loc='best', shadow=True)

ax[1].plot(history.history['acc'], color='b', label="Training accuracy")
ax[1].plot(history.history['val_acc'], color='r',label="Validation accuracy")
legend = ax[1].legend(loc='best', shadow=True)

In [None]:
# feature extraction with data aug

from keras.models import Sequential, Model
from keras.layers import Flatten, Dense, Dropout

model = Sequential()
model.add(conv_base)
model.add(Flatten())
model.add(Dense(256, activation = 'relu'))
modl.add(dense(120, activation = 'softamax'))

model.summary()/B

In [None]:
print('The no. of trainable weights b4 freezingt the conv_base:', len(modedl.trainable-weights))
conv_base.trainable_wewights = False
print('The no. of trainable weights after freezingt the conv_base:', len(modedl.trainable-weights))

In [None]:
# train model end to end with a feozen conv_base

from keras .preprocessing.image = ImageDatagenerator
from keras.optimizers import RMSprop

# training network using data augmentation and dropout
train_datagen = ImageDataGenerator(rescale = 1./255,
                                   rotation_range=40,
                                   width_shift_range=0,
                                   height_shift_range=0.2,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True,
                                   fill_mode='nearest',
                                   )

val_datagen = ImageDataGenerator(rescale = 1./255,
                                )

train_generator = train_datagen.flow_from_directory(train_dir,
                                                    target_size=(150,150),
                                                   )

val_generator = val_datagen.flow_from_directory(val_dir,
                                                target_size=(150,150),
                                                   )

model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# training
history = model.fit_generator(train_generator,
                             steps_per_epoch = 100,
                             epochs = 100,
                             validation_data = val_generator,
                             validation_steps = 50,
                             )


In [None]:
# plot the loss and accuracy curves for training and validation 
fig, ax = plt.subplots(1,2)
ax[0].plot(history.history['loss'], color='b', label="Training loss")
ax[0].plot(history.history['val_loss'], color='r', label="Validation loss",axes =ax[0])
legend = ax[0].legend(loc='best', shadow=True)

ax[1].plot(history.history['acc'], color='b', label="Training accuracy")
ax[1].plot(history.history['val_acc'], color='r',label="Validation accuracy")
legend = ax[1].legend(loc='best', shadow=True)

#### Fine tuning

In [None]:
conv_base.summary()

In [None]:
# freezing all layers upto a specific one
conv_base.trainable = True

set_trainable = False
for layer in conv_base.layers:
    if layer.name = '':
        set_trainable = True
    if set_trainable:
        layer.trainable = True
    else;
    layer.trainable  =False

In [None]:
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# training
history = model.fit_generator(train_generator,
                             steps_per_epoch = 100,
                             epochs = 100,
                             validation_data = val_generator,
                             validation_steps = 50,
                             )


In [None]:
# plot the loss and accuracy curves for training and validation 
fig, ax = plt.subplots(1,2)
ax[0].plot(history.history['loss'], color='b', label="Training loss")
ax[0].plot(history.history['val_loss'], color='r', label="Validation loss",axes =ax[0])
legend = ax[0].legend(loc='best', shadow=True)

ax[1].plot(history.history['acc'], color='b', label="Training accuracy")
ax[1].plot(history.history['val_acc'], color='r',label="Validation accuracy")
legend = ax[1].legend(loc='best', shadow=True)

In [None]:
# smoothing the plots
def smooth curve(points, factor = 0.8):
    smoothed_pints= []
    for point i points:
        if smoothed_points:
            previosu = smoothed_points[-1]
            smoothed_points.append(ptrevious * factor = point * (1 - faxctoe))
        else:
            smoothed_oints.append(point)
    return smoothed_points

plt.plot(epochs, smooth_curves(acc), 'bo', label = 'Smoothed Training Curve')
plt.plot(epochs, smooth_curves(val_cc), 'b-', label = 'Smoothed Va;idation Curve')
plt.title("Trainign/validation Accuracy")
plt.legend()
plt.figure()

plt.plot(epochs, smooth_curves(loss), 'bo', label = 'Smoothed Training Curve')
plt.plot(epochs, smooth_curves(val_loss), 'b-', label = 'Smoothed Training Curve')
plt.title('ead')
plt.legend()

plt.show()

In [None]:
test_generator = test_datagen.flow_from_directory(test_dir,
                                                target_size=(150,150),
                                                   )

test_loss, test_acc = model.evaluate_generator(test_generator, steps = 50)
print('test acc:', test_acc)

### Visualizing Convet

##### Intermediate Activations

from keras.models import load_model
model = load_model('')
modeel.summary()

In [None]:
img_path= ''

from keras.preprocessing import image
import numpy

img = image.load_img(img_path, target_size = (150,150))
img_tensor = image.img_to_array(img)
img_tensor = np.expand_dims(img_tenssor, axis= 0)
img_tensor = /= 255

print(img_trnsor.shape)

In [None]:
plt.imshow(img_tensor[0])
plt.show()

In [None]:
from keras.models import Model

layer_outputs = [layer.output for layers in moodel.layers[:8]]
activation_model = Model(input = model.input, outputs = layer_outputs)

activations = activation_model.predict(img_tensor)

first_layer_activation = actiation[0]
print(first_layer_activation.shape)

import matplotlib.pyplot as plt

plt.matshow(first_layer_acivatiosn[0,:,:,4], cmap = 'virdis')

plt.matshow(first_layer_acivatiosn[0,:,:,7], cmap = 'virdis')


In [None]:
# visualizing every channel in every intermediate activation
layer_names = []
for layer in model.layers[:8]:
    layer_names.append9layer.names)

    images-pera_row = 16
    
    for layer_name, layer_activation in zip(layer-names, activations):
        n_features = layer_activations.shape[-1]
        
        size = layer_activaations.shape[1]
        
        n_cols = n_features
        display_grip = np.zeros((size * n_cols, images_per_row * size))
        
        for col in rrange(n_cols):
            for row i range(images_per_roe):
                channel_img = layer_activation[0,:,:, col * images_per_row + row]
                channel_img -= channesl__imaeg.m
                channel_img *= 64
                channel_img += 128
                channel_img = np.clip(channel_image, 0 ,25).as_type('unint8')
                
                display_grid[col * size : (col + 1) * size, 
                             row * size : (row + 1) * size, ] = cahnel_image
                
                scale = 1./size
               
            
            plt.figure(figsize(=(scale + display_gri.shape[1],
                                scale + display_gri.shape[0])))
            
            plt.title(layer-name)
            plt.grid(False)
            plt.imshow(display_grid, aspect='auto', cmo= 'virdis')

In [None]:
# Visualizing ConvNet Filyters

from keras.applications imprt VGG16
from keras import backend as K

model = VGG16(weights = 'imagenet',
              include_top = False,
             )

layer_name = 'block3-conv1'
filter_index = 0

layer-output = model.getlayer(layer_name).output
loss = K.mean(layer_output[:,:,:,filter_index])

grads = K.gradients(loss, model.input[0])
grads /= (K.sqrt(K.mean(K.square(grads))) + 1e-5)

iteraate = K.function([model.input], [loss, grads])

import numpy as np
loss_value, grads_value = iterate([np.zeros([1,150,150,3])])



In [None]:
# loss maximization via stochastic gradient de=scent
