<a href="https://colab.research.google.com/github/jdmiranda/ai_notebooks/blob/master/object_recognition_using_cnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Object Recognition using CNN

## import libraries

In [0]:
import os # for file handling
import pandas as pd # for data handling
import numpy as np # for computations
import time # to time runs
from sklearn import metrics # to evaluate classification accuracy
import tensorflow as tf # for neural networks
import matplotlib.pyplot as plt # to display images

## Specify directories and files for input and output

In [0]:
dataDir = 'data/' # directory for input files
modelDir = 'model/' # directory for saved models
outputDir = 'output/' # directory for output files

# create directories if needed

In [0]:
os.makedirs(os.path.dirname(modelDir), exist_ok=True) 
os.makedirs(os.path.dirname(outputDir), exist_ok=True)

## Input files

In [0]:
trainInput = 'hw2q3train.csv' # 60,000 labeled examples for training
testInput = 'hw2q3test.csv' # 10,000 labeled examples for testing
unlabeledInput = 'hw2q3unlabeled.csv' # 50 unlabeled examples for labeling

## read training and test data

In [0]:
train = pd.read_csv(dataDir+trainInput)
test = pd.read_csv(dataDir+testInput)
unlabeled = pd.read_csv(dataDir+unlabeledInput)

# create input feature matrices and output vectors

In [0]:
cols = list(train)
trainX, testX = train[cols[1:]].values, test[cols[1:]].values
trainY, testY = train[cols[0]].values, test[cols[0]].values
unlabeledX = unlabeled[cols[1:]].values

# print shape of matrices

In [0]:
matrices = ['trainX', 'trainY', 'testX', 'testY', 'unlabeledX']
shapes = [m.shape for m in [trainX, trainY, testX, testY, unlabeledX]]
print('Shape of:')
for m, s in zip(matrices, shapes):
    print('\t', m, ':\t', s)

# Specify model parameters for Convolution Neural Network

In [0]:
m = 'CNN' # model name
channels_layer1 = 16 # number of output channels for first convolution layer
channels_layer2 = 32 # number of output channels for second convolution layer
kernel_size = (5,5) # kernel size for convolution layers
pool_size = (2,2) # max-pooling size after convolution layers
dropOutRate = 0.3 # dropout rate for convolution layer weights
denseLayerSize = 512 # number of neurons in dense layer
dropOutRateDense = 0.3 # dropout rate for dense layer weights
nClasses = 10 # number of output classes
batchSize = 128 # batch size for training
nEpochs=10 # number of training epochs

modelFile = modelDir + m + '.h5' # name for saved model file

USE_PRETRAINED_MODEL = True # use available pretrained model?

# Define CNN model

In [0]:
def cnn(**kwargs):
    model = tf.keras.models.Sequential() # create model
    
    # first convolution layer
    model.add(tf.keras.layers.Conv2D(kwargs['channels_layer1'], 
                                     kwargs['kernel_size'],
                                     activation='relu', 
                                     input_shape=(28, 28, 1)))
    # max-pool output of first convolution layer
    model.add(tf.keras.layers.MaxPooling2D(pool_size=kwargs['pool_size']))
    # drop weights for regularization
    model.add(tf.keras.layers.Dropout(kwargs['dropOutRate']))
    
    # second convolution layer
    model.add(tf.keras.layers.Conv2D(kwargs['channels_layer2'], 
                                     kwargs['kernel_size'], 
                                     activation='relu'))
    # max-pool output of second convolution layer
    model.add(tf.keras.layers.MaxPooling2D(pool_size=kwargs['pool_size']))
    # drop weights for regularization
    model.add(tf.keras.layers.Dropout(kwargs['dropOutRate']))
    
    # flatten volume to serve as inputs for densely connected layer
    model.add(tf.keras.layers.Flatten()) 
    
    # Dense layer
    model.add(tf.keras.layers.Dense(kwargs['denseLayerSize'], 
                                    activation='relu'))
    # drop weights for regularization
    model.add(tf.keras.layers.Dropout(kwargs['dropOutRateDense']))
    
    # output softmax layer
    model.add(tf.keras.layers.Dense(kwargs['nClasses'], activation='softmax'))
    
    # compile model
    model.compile(loss=tf.keras.losses.categorical_crossentropy,
                  optimizer=tf.keras.optimizers.Adadelta(),
                  metrics=['accuracy'])
    return model

# define function to reshape CNN inputs

In [0]:
def Xform(x):
    """reshapes input to 28x28 pixels and normalizes""" 
    return x.reshape(len(x), 28, 28, 1)/255

## working model

In [0]:
model = cnn(channels_layer1=channels_layer1,
                channels_layer2=channels_layer2,
                kernel_size=kernel_size,
                pool_size=pool_size,
                dropOutRate=dropOutRate,
                denseLayerSize=denseLayerSize,
                dropOutRateDense=dropOutRateDense,
                nClasses=nClasses)

## use available pretrained model

In [0]:
if USE_PRETRAINED_MODEL:
    try:
        model = tf.keras.models.load_model(modelFile)
        print('Loaded pre-trained model %s' %modelFile)
    except:
        print('Pre-trained modelnot available')
        # create model using specified parameters

model.summary() # display model summary

# train CNN model

In [0]:
print("Training classifier: %s" %m)

st = time.time() # start time for training and testing

model.fit(Xform(trainX), 
          tf.keras.utils.to_categorical(trainY, nClasses),
          batch_size=batchSize, 
          epochs=nEpochs, 
          validation_data=None)

t = time.time() - st # time to train model
print("Time to train classifier: %4.2f seconds\n" %(t))

## save trained model

In [0]:

modelFile = modelDir + m + '.h5' # name for saved model file
model.save(modelFile) # save TensorFlow model
print('Trained model saved as %s\n' %modelFile)

print("Predictions with trained classifier: %s" %m)

##  predict test examples with trained model

In [0]:
predictedProb = model.predict(Xform(testX)) # probability of class
predicted = np.argmax(predictedProb, axis=1) # most likely label
labelProb = np.max(predictedProb, axis=1) # probability of most likely label

# evaluate quality of predictions

In [0]:
acc = metrics.accuracy_score(testY, predicted) # accuracy
print("Accuracy with test data: %4.2f%%" %(100*acc))

# save confusion_matrix

In [0]:
cm = metrics.confusion_matrix(testY, predicted) # confusion_matrix
pd.DataFrame(cm).to_csv(outputDir+m+'confusionMatrix.csv') # save

# print classification report

In [0]:
print(metrics.classification_report(testY, predicted))

# predict unlabeled examples with trained model

In [0]:
predictedUnlabeledProb = model.predict(Xform(unlabeledX)) # prob of class
predictedUnlabeled = np.argmax(predictedUnlabeledProb, axis=1) # most likely
unlabeledProb = np.max(predictedUnlabeledProb, axis=1) # probability

### display images function

In [0]:
def displayImages(images, labels):
    """Displays images with labels (5 per row)
    - images: list of vectors with 784 (28x28) grayscale values
    - labels: list of digits represented by images"""
    nCols, nRows = 5, np.ceil(len(labels)/5).astype('int') # rows and columns 
    plt.figure(figsize=(2*nCols,2*nRows)) # figure size
    for i in range(len(labels)):
        plt.subplot(nRows,nCols,i+1)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        plt.imshow(images[i].reshape(28, 28), interpolation='nearest')
        plt.xlabel(labels[i], fontsize=12)
    plt.show()
    return

# Display unlabeled images with assigned classes

In [0]:
items = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
          'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
labels = [items[i] for i in predictedUnlabeled] # labels for classes
probs = [str(round(p,2)) for p in unlabeledProb]
labels = [': '.join([e,p]) for e,p in zip(labels, probs)]
displayImages(unlabeledX, labels)


# save predictions for unlabeled examples

In [0]:
pd.DataFrame(predictedUnlabeled).to_csv(outputDir+'hw2q3unlabeled.csv', 
            header=False, index=False)