**Install requirements**

In [1]:
!pip3 install 'keras'



**Import libraries**

In [0]:
import os
import sys
import shutil
import numpy as np
from random import shuffle

from keras import layers
import keras.backend as K
from keras import optimizers
from keras import applications
from keras.models import Model
from keras.utils import plot_model
from keras.models import Sequential
from keras.utils import layer_utils
from keras.preprocessing import image
from keras.callbacks import ModelCheckpoint
from keras.utils.data_utils import get_file
from keras.utils.vis_utils import model_to_dot
from keras.applications.imagenet_utils import preprocess_input
from keras.layers import Input, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D
from keras.layers import AveragePooling2D, MaxPooling2D, Dropout, GlobalMaxPooling2D, GlobalAveragePooling2D

from keras.applications import vgg16

import pydot
from IPython.display import SVG
from matplotlib.pyplot import imshow
from sklearn.model_selection import train_test_split

np.random.seed(1000)
K.set_image_data_format('channels_last')

In [0]:
shutil.rmtree('test') 

**Utility Functions**

In [0]:
def create_model():
  model = Sequential()
  model.add(BatchNormalization(input_shape=(224, 224, 3)))
  model.add(Conv2D(filters=16, kernel_size=3, kernel_initializer='he_normal', activation='relu'))
  model.add(MaxPooling2D(pool_size=2))
  model.add(BatchNormalization())

  model.add(Conv2D(filters=32, kernel_size=3, kernel_initializer='he_normal', activation='relu'))
  model.add(MaxPooling2D(pool_size=2))
  model.add(BatchNormalization())

  model.add(Conv2D(filters=64, kernel_size=3, kernel_initializer='he_normal', activation='relu'))
  model.add(MaxPooling2D(pool_size=2))
  model.add(BatchNormalization())

  model.add(Conv2D(filters=128, kernel_size=3, kernel_initializer='he_normal', activation='relu'))
  model.add(MaxPooling2D(pool_size=2))
  model.add(BatchNormalization())

  model.add(Conv2D(filters=256, kernel_size=3, kernel_initializer='he_normal', activation='relu'))
  model.add(MaxPooling2D(pool_size=2))
  model.add(BatchNormalization())

  model.add(GlobalAveragePooling2D())

  model.add(Dense(120, activation='softmax'))

  model.summary()

  return model

# def alexnet_model():
#   #Instantiate an empty model
#   model = Sequential()

#   # 1st Convolutional Layer
#   model.add(Conv2D(filters=96, input_shape=(224,224,3), kernel_size=(11,11), strides=(4,4), padding=’valid’))
#   model.add(Activation(‘relu’))
#   # Max Pooling
#   model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding=’valid’))

#   # 2nd Convolutional Layer
#   model.add(Conv2D(filters=256, kernel_size=(11,11), strides=(1,1), padding=’valid’))
#   model.add(Activation(‘relu’))
#   # Max Pooling
#   model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding=’valid’))

#   # 3rd Convolutional Layer
#   model.add(Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), padding=’valid’))
#   model.add(Activation(‘relu’))

#   # 4th Convolutional Layer
#   model.add(Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), padding=’valid’))
#   model.add(Activation(‘relu’))

#   # 5th Convolutional Layer
#   model.add(Conv2D(filters=256, kernel_size=(3,3), strides=(1,1), padding=’valid’))
#   model.add(Activation(‘relu’))
#   # Max Pooling
#   model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding=’valid’))

#   # Passing it to a Fully Connected layer
#   model.add(Flatten())
#   # 1st Fully Connected Layer
#   model.add(Dense(4096, input_shape=(224*224*3,)))
#   model.add(Activation(‘relu’))
#   # Add Dropout to prevent overfitting
#   model.add(Dropout(0.4))

#   # 2nd Fully Connected Layer
#   model.add(Dense(4096))
#   model.add(Activation(‘relu’))
#   # Add Dropout
#   model.add(Dropout(0.4))

#   # 3rd Fully Connected Layer
#   model.add(Dense(1000))
#   model.add(Activation(‘relu’))
#   # Add Dropout
#   model.add(Dropout(0.4))

#   # Output Layer
#   model.add(Dense(17))
#   model.add(Activation(‘softmax’))

#   model.summary()

#   return model

def compile_model(model, _opt='adam', _loss='categorical_crossentropy', _metrics=['accuracy']):
  model.compile(_opt, _loss, _metrics)
  return model

def set_checkpointer(_filePath):
  checkpointer = ModelCheckpoint(filepath=_filePath, verbose=1, 
                                 save_best_only=True)
  return checkpointer

def train(model, num_epochs, batch_size, step_size, train_data, train_target, valid_data, valid_target, checkpointer):
  model.fit_generator(datagen.flow(train_data, train_target, batch_size=batch_size),
                    validation_data=(valid_data, valid_target), 
                    steps_per_epoch=train_data.shape[0] // batch_size,
                    epochs=epochs, callbacks=[checkpointer], verbose=1)

def fit_gen(model, _train_set, _steps_per_epoch, _epoch, _valid_set, _valid_steps=800):
  model.fit_generator(
    _train_set,
    steps_per_epoch = _steps_per_epoch,
    epochs = _epoch,
    validation_data = _valid_set,
    validation_steps = _valid_steps)
  
  return model

# def load_best_model(model, _filePath):
#   model.load_weights(_filePath)

def test(model, test_data, test_target):
  dog_breed_predictions = [np.argmax(model.predict(np.expand_dims(tensor, axis=0))) for tensor in test_data]

  test_accuracy = 100*np.sum(np.array(dog_breed_predictions)==np.argmax(test_target, axis=1))/len(dog_breed_predictions)
  print('Test accuracy: %.4f%%' % test_accuracy)

**Dataset preparation**

In [7]:
# Clone github repository with data
if not os.path.isdir('./mlai/Images'):
  !git clone https://github.com/jmagdeska/mlai.git

DATA_DIR = 'mlai/Images'

if not os.path.isdir('train'):
  os.mkdir('train')
if not os.path.isdir('valid'):
  os.mkdir('valid')
if not os.path.isdir('test'):
  os.mkdir('test')

for path, dirs, files in os.walk(DATA_DIR):
  dirs.sort(key = lambda x: x.lower())
  num_samples = len(files)
  i = 0
  
  l = (int)(0.8*num_samples)
  if l != 0:
    train_len = int(0.8*l)
    valid_len = l - train_len
    test_len = num_samples - l

    label = path.split("/")[2]
    shuffle(files)

    for filename in files: 
      full_path = os.path.join(path, filename)       
      if i < train_len:   
        split = 'train'     
      elif i < (train_len + valid_len):
        split = 'valid'
      else:
        split = 'test'
      
      dir_name = os.path.join(split, label)
      if not os.path.isdir(dir_name):
        os.mkdir(os.path.join(split, label))
      shutil.move(full_path, dir_name)

      i += 1  

Cloning into 'mlai'...
remote: Enumerating objects: 31, done.[K
remote: Counting objects: 100% (31/31), done.[K
remote: Compressing objects: 100% (31/31), done.[K
remote: Total 41143 (delta 10), reused 3 (delta 0), pack-reused 41112[K
Receiving objects: 100% (41143/41143), 1.38 GiB | 52.72 MiB/s, done.
Resolving deltas: 100% (10/10), done.
Checking out files: 100% (20581/20581), done.


**Dataset and Dataloader Preparation**

In [8]:
from keras.preprocessing.image import ImageDataGenerator

TRAIN_DIR = './train'
VALID_DIR = './valid'
TEST_DIR = './test'

datagen_train = ImageDataGenerator(rescale=1./255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True)
datagen_test = ImageDataGenerator(rescale=1./255)

generator_train = datagen_train.flow_from_directory(TRAIN_DIR, target_size=(224,224), batch_size=32)

generator_valid = datagen_test.flow_from_directory(VALID_DIR, target_size=(224, 224), batch_size=32)

#generator_test=datagen_test.flow_from_directory(TEST_DIR)

Found 13091 images belonging to 120 classes.
Found 3327 images belonging to 120 classes.


**Main**

In [34]:
STEP_SIZE = 200
NUM_EPOCHS = 30
NUM_CLASSES = 120
VALID_STEP_SIZE = 80

######## Custom model #########
# my_model = create_model()
# my_model = compile_model(my_model)
# best_model_path = 'saved_models/weights.bestaugmented.from_scratch.hdf5'
# chPointer = set_checkpointer(best_model_path)
# my_model = fit_gen(my_model, generator_train, STEP_SIZE, NUM_EPOCHS, generator_valid, VALID_STEP_SIZE)

######## pretrained VGG16 model ########
vgg_model = vgg16.VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
# Freeze the layers except the last 4 layers
for layer in vgg_model.layers[:-4]:
    layer.trainable = False
 
my_model = Sequential()
# Add the vgg convolutional base model
my_model.add(vgg_model)
 
# Add new layers
my_model.add(Flatten())
my_model.add(Dense(1024, activation='relu'))
my_model.add(Dropout(0.5))
my_model.add(Dense(NUM_CLASSES, activation='softmax'))
 
my_model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.RMSprop(lr=1e-4),
              metrics=['acc'])
my_model = fit_gen(my_model, generator_train, STEP_SIZE, NUM_EPOCHS, generator_valid, VALID_STEP_SIZE)


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
