# Model Trainer

## Import dependencies

In [21]:
#Import dependencies
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam
from keras.preprocessing.image import img_to_array
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from pyimagesearch.smallervggnet import SmallerVGGNet
from imutils import paths
import numpy as np
import random
import pickle
import cv2
import os

## Define variables
-  Set epochs
-  Set learning rate
-  Set batch size
-  Create empty arrays to hold data matrix and labels

In [22]:
#Set Epochs, learning rate and batch size
EPOCHS = 20
INIT_LR = 1e-3
BS = 32

#Create empty arrays to append the matrix information
data = []
labels = []

## Use Imutils to grab the image paths from the dataset directory

In [23]:
# grab the image paths and randomly shuffle them
imagePaths = list(paths.list_images("dataset"))
random.seed(42)
random.shuffle(imagePaths)

In [24]:
#Display first 10 imagePaths to check if shuffle and pathing is correct
for i in range(0,9):
    print(imagePaths[i])

dataset\corgi\00000139.jpg
dataset\xoloitzcuintli\00000030.jpg
dataset\xoloitzcuintli\00000212.jpg
dataset\chihuahua\00000190.jpg
dataset\xoloitzcuintli\00000026.jpg
dataset\xoloitzcuintli\00000205.jpg
dataset\corgi\00000087.jpg
dataset\xoloitzcuintli\00000153.jpg
dataset\chihuahua\00000180.jpg


## Populate empty arrays

### Data
1. Iterate over all imagepaths
2. Read in the image using OpenCV
3. Resize image using OpenCV
4. Fill Data array with image matrix

### Labels
1. Split the file path using os.path.sep _dataset\chihuahua\00000180.jpg_ into dataset \ chihuahua \ XXX.jpg
2. Append the label to the empty label array

In [25]:
#loop over the input images
for imagePath in imagePaths:
	image = cv2.imread(imagePath)
	image = cv2.resize(image, (96, 96))
	image = img_to_array(image)
	data.append(image)
	#Use os path sep to break apart image path into directories and separators, count back 2 to get the dog breed name
	label = imagePath.split(os.path.sep)[-2]
    #store the dog breed name as the label
	labels.append(label)

## Create Train Test Split data
1. Divide each data entry by 255 to scale integer values to float on a scale from 0-1, then create a numpy array
2. Create numpy array for labels
3. Binarize labels using binarizer alternative to one hot encoding
4. Define X and Y train and test

In [26]:
# scale the raw pixel intensities to the range [0, 1]
data = np.array(data, dtype="float") / 255.0
labels = np.array(labels)

# binarize the labels
lb = LabelBinarizer()
labels = lb.fit_transform(labels)

# partition the data into training and testing splits using 80% of
# the data for training and the remaining 20% for testing
(X_train, X_test, y_train, y_test) = train_test_split(data,
	labels, test_size=.2, random_state=42)

In [27]:
print("Training Data Info")
print("Training Data Shape:", X_train.shape)
print("Training Data Labels Shape:", y_train.shape)

Training Data Info
Training Data Shape: (559, 96, 96, 3)
Training Data Labels Shape: (559, 3)


In [28]:
# Our image is an array of pixels ranging from 0 to 1 (Floats vs Integer)
X_train[0, :, :]

array([[[0.64705882, 0.68627451, 0.7254902 ],
        [0.65490196, 0.69411765, 0.73333333],
        [0.65490196, 0.69411765, 0.73333333],
        ...,
        [0.68627451, 0.73333333, 0.78039216],
        [0.68235294, 0.72941176, 0.77647059],
        [0.68235294, 0.72941176, 0.77647059]],

       [[0.63921569, 0.68627451, 0.7254902 ],
        [0.64313725, 0.69019608, 0.72941176],
        [0.6627451 , 0.70196078, 0.74117647],
        ...,
        [0.68627451, 0.73333333, 0.78039216],
        [0.68627451, 0.73333333, 0.78039216],
        [0.68627451, 0.73333333, 0.78039216]],

       [[0.64313725, 0.69019608, 0.7372549 ],
        [0.64705882, 0.69411765, 0.73333333],
        [0.65882353, 0.70196078, 0.7372549 ],
        ...,
        [0.68627451, 0.73333333, 0.78039216],
        [0.68627451, 0.73333333, 0.78039216],
        [0.68627451, 0.73333333, 0.78039216]],

       ...,

       [[0.00784314, 0.00392157, 0.03921569],
        [0.00392157, 0.00784314, 0.04705882],
        [0.01176471, 0

In [29]:
# Our Training and Testing labels
y_train

array([[0, 0, 1],
       [1, 0, 0],
       [0, 0, 1],
       ...,
       [1, 0, 0],
       [0, 1, 0],
       [1, 0, 0]])

In [30]:
y_test

array([[0, 0, 1],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [1, 0, 0],
       [0, 0, 1],
       [0, 1, 0],
       [1, 0, 0],
       [0, 1, 0],
       [0, 0, 1],
       [0, 1, 0],
       [1, 0, 0],
       [0, 1, 0],
       [0, 1, 0],
       [1, 0, 0],
       [0, 0, 1],
       [0, 0, 1],
       [0, 1, 0],
       [1, 0, 0],
       [0, 1, 0],
       [0, 0, 1],
       [1, 0, 0],
       [1, 0, 0],
       [0, 0, 1],
       [1, 0, 0],
       [0, 1, 0],
       [1, 0, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 0, 1],
       [1, 0, 0],
       [1, 0, 0],
       [0, 1, 0],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [1, 0, 0],
       [1, 0, 0],
       [0, 1, 0],
       [0, 0, 1],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [1, 0, 0],
       [1, 0, 0],
       [0, 0, 1],
       [0, 1, 0],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [0,

## Use image transformations and optimizer to train
1. Keras Preprocessing image data generator to perform flips and rotations
2. Use Adam optimizer to train model

In [31]:
# construct the image generator for data augmentation
aug = ImageDataGenerator(rotation_range=25, width_shift_range=0.1,
	height_shift_range=0.1, shear_range=0.2, zoom_range=0.2,
	horizontal_flip=True, fill_mode="nearest")

In [32]:
# initialize the model
print("----------------------------------------")
print("----------- Compiling Model: -----------")
print("----------------------------------------")

model = SmallerVGGNet.build(width=96, height=96, depth=3, classes=len(lb.classes_))
optimize = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
model.compile(loss="categorical_crossentropy", optimizer=optimize, metrics=["accuracy"])

----------------------------------------
----------- Compiling model: -----------
----------------------------------------


In [33]:
# train the network
print("----------------------------------------")
print("--------------- Training ---------------")
print("----------------------------------------")
H = model.fit_generator(
	aug.flow(X_train, y_train, batch_size=BS),
	validation_data=(X_test, y_test),
	steps_per_epoch=len(X_train) // BS,
	epochs=EPOCHS, verbose=1)

----------------------------------------
--------------- Training ---------------
----------------------------------------
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


## Save a model file

In [34]:
#Export a model file to be used later
print("----------------------------------------")
print("Please enter a name for the saved model:")
print("----------------------------------------")
trainedModel = input()
model.save(trainedModel)

----------------------------------------
Please enter a name for the saved model:
----------------------------------------
jupyterModel


## Save a label file

In [36]:
#Save the label file to be used later
print("----------------------------------------")
print("Please enter a name for the saved labels:")
print("----------------------------------------")
labelFile = input()
f = open(labelFile, "wb")
f.write(pickle.dumps(lb))
f.close()

----------------------------------------
Please enter a name for the saved labels:
----------------------------------------
jupyterLabels
