# Modeling Pipeline for the 2014 COCO dataset<br>
This notebook loads data from the 2014 COCO training dataset, trains a deep learning model, and evaluates the results. We split the training dataset into training, validation, and testing data since we do not yet require all of the images at this stage in our project development.
This notebook uses the LeNet-5 model and utilizes the method outlined in [this article](https://towardsdatascience.com/master-the-coco-dataset-for-semantic-image-segmentation-part-1-of-2-732712631047). 
Authors: Péter Hámori, Audrea Huang<br>
Date: 11 April 2021<br>
AIT Deep Learning<br>
Project Milestone 2



### Setup

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# visualize outputs
%matplotlib inline
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
import skimage.io as io

# data processing
from pycocotools.coco import COCO
import cv2
from skimage.transform import resize
import numpy as np
import pylab
pylab.rcParams['figure.figsize'] = (8.0, 10.0)

In [3]:
import random

# keras
from keras.utils import to_categorical 
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [4]:
annFileTrain = '/content/drive/MyDrive/Colab Notebooks/AIT_DeepLearning/coco project/instances_train2014.json'

Load annotations into memory

In [6]:
# start with small dataset consisting of these classes
filterClasses = ['car', 'chair', 'book', 'bottle']

# get class IDs for corresponding filterClasses
coco=COCO(annFileTrain)
catIds = coco.getCatIds(catNms=filterClasses) 

# get all images containing the category IDs
imgIds = coco.getImgIds(catIds=catIds)

loading annotations into memory...
Done (t=24.71s)
creating index...
index created!


Create helper functions

In [7]:
def getClassName(classID, cats):
  '''
  Iterate through the categories to extract the desired class name.
  :param  classID (int)         : requested class ID
  :param  cats (list of strings): requested categories
  :return: string: class name or "None" if none found
  '''
  for i in range(len(cats)):
      if cats[i]['id']==classID:
          return cats[i]['name']
  return "None"

In [8]:
def getNormalMask(imageObj, classes, coco, catIds, input_image_size):
    '''
    Mask outlining each specific class of interest.

    :param imageObj (dict): input image
    :param classes (list of strings): classes of interest
    :param coco (COCO): instance annotations
    :param catIds (list of integers): category IDs
    :param input_image_size (tuple): size of input image
    :return: train_mask(ndarray): mask
    '''
    annIds = coco.getAnnIds(imageObj['id'], catIds=catIds, iscrowd=None)
    anns = coco.loadAnns(annIds)
    cats = coco.loadCats(catIds)
    train_mask = np.zeros(input_image_size)
    for a in range(len(anns)):
        className = getClassName(anns[a]['category_id'], cats)
        pixel_value = classes.index(className)+1
        new_mask = cv2.resize(coco.annToMask(anns[a])*pixel_value, input_image_size)
        train_mask = np.maximum(new_mask, train_mask)

    # Add extra dimension for parity with train_img size [X * X * 3]
    train_mask = train_mask.reshape(input_image_size[0], input_image_size[1], 1)
    return train_mask  

In [9]:
def getBinaryMask(imageObj, coco, catIds, input_image_size):
    '''
    Mask indicating which parts of the image correspond to classes of interest.

    :param imageObj (dict): input image
    :param coco (COCO): instance annotations
    :param catIds (list of integers): category IDs
    :param input_image_size (tuple): size of input image
    :return: train_mask(ndarray): binary mask
    '''

    annIds = coco.getAnnIds(imageObj['id'], catIds=catIds, iscrowd=None)
    anns = coco.loadAnns(annIds)
    train_mask = np.zeros(input_image_size)
    for a in range(len(anns)):
        new_mask = cv2.resize(coco.annToMask(anns[a]), input_image_size)
        
        #Threshold because resizing may cause extraneous values
        new_mask[new_mask >= 0.5] = 1
        new_mask[new_mask < 0.5] = 0

        train_mask = np.maximum(new_mask, train_mask)

    # Add extra dimension for parity with train_img size [X * X * 3]
    train_mask = train_mask.reshape(input_image_size[0], input_image_size[1], 1)
    return train_mask

In [10]:
def visualizeImageAndMask(img, mask):
    '''
    Display image and its corresponding mask.
    :param  img(ndarray): specified image
    :param  mask(ndarray): corresponding mask  
    '''
 
    fig = plt.figure(figsize=(20, 10))
    outerGrid = gridspec.GridSpec(1, 2, wspace=0.1, hspace=0.1)

    ax = plt.Subplot(fig, outerGrid[0])
    ax.imshow(img);

    ax = plt.Subplot(fig, outerGrid[1])
    ax.imshow(mask[:,:,0]);

    ax.axis('off')
    fig.add_subplot(ax)

    plt.show()
    return

In [49]:
def getImage(imageObj, input_image_size):
    '''
    Return a specified image, rescaled to the desired size
    :param   imageObj (int)        : requested image object
    :param   input_image_size (tuple): size of image
    :return: image: image of specified size
    '''
    # Read and normalize an image
    train_img = io.imread(imageObj['coco_url'])/255.0

    # Resize
    train_img = cv2.resize(train_img, input_image_size)
    if (len(train_img.shape)==3 and train_img.shape[2]==3): # If it is a RGB 3 channel image
        return train_img
    else: # To handle a black and white image, increase dimensions to 3
        stacked_img = np.stack((train_img,)*3, axis=-1)
        return stacked_img

In [12]:
def getData(number_of_samples, images, classes, coco, input_image_size=(224,224), 
            batch_size=4, mode='train', mask_type='binary'):
  '''
  Get images and corresponding masks.

  :param number_of_samples (int): sample size
  :param images (list of dictionaries): images in dataset
  :param classes (list of strings): object classes of interest
  :param coco (COCO): instance annotations
  :param input_image_size (tuple): width and height of input
  :param batch_size (int): batch size
  :param mode (string): train, valid, or test
  :param mask_type (string): binary or normal
  :return im: list of images
  :return m: list of masks
  '''
  dataset_size = len(images)
  catIds = coco.getCatIds(catNms=classes)

  im = []
  m = []

  for i in range(number_of_samples):
    imageObj = images[i]

    # Retrieve Image
    train_img = getImage(imageObj, input_image_size)
            
    # Create Mask
    if mask_type == "binary":
      train_mask = getBinaryMask(imageObj, coco, catIds, input_image_size)
            
    elif mask_type == "normal":
      train_mask = getNormalMask(imageObj, classes, coco, catIds, input_image_size)     

    annIds = coco.getAnnIds(imageObj['id'], catIds=catIds, iscrowd=None)

    im.append(train_img)
    m.append(train_mask)
  
  return im, m


In [13]:
def get_targets(number_of_samples, images, classes, coco, input_image_size=(224,224)):
  y = []
  
  catIds = coco.getCatIds(catNms=classes)
  cats = coco.loadCats(catIds)

  for i in range(number_of_samples):
    o = []
    imageObj = images[i]
    annIds = coco.getAnnIds(imageObj['id'], catIds=catIds, iscrowd=None)
    anns = coco.loadAnns(annIds)
    
    for a in range(len(anns)):
        className = getClassName(anns[a]['category_id'], cats)  
        o.append(className)

    y.append(o[0])
  return y

In [15]:
def filterDataset(annFile, classes=None):  
    '''
    Extract images corresponding to the specified classes and remove duplicates.

    :param  annFile (string): relative path for annotations
    :param  classes (list of strings): objects we are interested in detecting
    :return: unique_images (list of images): list with only one instance of each 
      image, which may contain multiple objects of interest
    :return: dataset_size (int): size of dataset corresponding to annFile
    :return: coco (COCO): COCO object for instance annotations
    '''  
    # initialize COCO api for instance annotations
    coco = COCO(annFile)
    
    images = []
    if classes!=None:
        # iterate for each individual class in the list
        for className in classes:
            # get all images containing given categories
            catIds = coco.getCatIds(catNms=className)
            imgIds = coco.getImgIds(catIds=catIds)
            images += coco.loadImgs(imgIds)
    
    else:
        imgIds = coco.getImgIds()
        images = coco.loadImgs(imgIds)
    
    # Now, filter out the repeated images
    unique_images = []
    for i in range(len(images)):
        if images[i] not in unique_images:
            unique_images.append(images[i])
            
    random.shuffle(unique_images)
    dataset_size = len(unique_images)
    
    return unique_images, dataset_size, coco

In [16]:
classes = ['car', 'chair', 'book', 'bottle']
train_images, train_dataset_size, train_coco = filterDataset(annFileTrain, classes)
input_image_size = (224,224)
mask_type = 'normal'

loading annotations into memory...
Done (t=13.97s)
creating index...
index created!


In [17]:
number_of_samples = 100

In [19]:
t_images = []
t_masks = []
t_images, t_masks = getData(number_of_samples, train_images, classes, coco, input_image_size, mask_type)

In [51]:
# visualizeImageAndMask(t_images[1], t_masks[1])

In [None]:
#t_images[0].shape

In [20]:
def concatenate_image_mask(img, mask):
  '''
  Combine image and mask to feed to model.
  '''
  img = img.reshape(224*224*3)
  mask = mask.reshape(224*224*1)
  concat = np.concatenate((img, mask))
  concat = concat.reshape(224,224,4)
  return concat

In [21]:
#CONCATENATING IMAGES AND MASKS
X = []
for i in range(len(t_images)):
  X.append(concatenate_image_mask(t_images[i], t_masks[i]))

In [22]:
#PREPROCESSING INPUT
for i in range(len(X)):
    X[i] = X[i]/255.0   #normalizing
    X[i] = np.asarray(X[i]) 
    X[i] = resize(X[i], input_image_size) #reshaping 

In [23]:
Y = get_targets(number_of_samples, train_images, classes, coco, input_image_size)

In [24]:
#target classes to integers 
for i in range(len(Y)):
    if Y[i] == 'car':
      Y[i] = 0
    elif Y[i] == 'chair':
      Y[i] = 1
    elif Y[i] == 'book':
      Y[i] = 2
    elif Y[i] == 'bottle':
      Y[i] = 3

In [25]:
Y = to_categorical(Y, 4)  #one-hot encoding

Split training data into train, validation, and test sets

In [26]:
# TRAIN-VALIDATION-TEST SETS
t_point = int(0.7*len(X))
v_point = int(0.8*len(X))

X_train = []
X_val = []
X_test = []
X_train = X[:t_point]
X_val = X[t_point:v_point]
X_test = X[v_point:]

Y_train = []
Y_val = []
Y_test = []
Y_train = Y[:t_point]
Y_val = Y[t_point:v_point]
Y_test = Y[v_point:]


### Create model

In [27]:
from keras.layers import Conv2D , AveragePooling2D , Dense, Flatten
from keras.models import Sequential

In [28]:
model = Sequential(name="LeNet-5")
model.add(Conv2D(6,(5,5),strides=(1,1), activation='tanh',input_shape=(input_image_size[0], input_image_size[1], 3+1))) 
model.add(AveragePooling2D())
model.add(Conv2D(16,(5,5),strides=(1,1),activation='tanh'))
model.add(AveragePooling2D())
model.add(Flatten())
model.add(Dense(120,activation='tanh')) 
model.add(Dense(84,activation='tanh')) 
model.add(Dense(4,activation='softmax')) 

In [29]:
model.summary()

Model: "LeNet-5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 220, 220, 6)       606       
_________________________________________________________________
average_pooling2d (AveragePo (None, 110, 110, 6)       0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 106, 106, 16)      2416      
_________________________________________________________________
average_pooling2d_1 (Average (None, 53, 53, 16)        0         
_________________________________________________________________
flatten (Flatten)            (None, 44944)             0         
_________________________________________________________________
dense (Dense)                (None, 120)               5393400   
_________________________________________________________________
dense_1 (Dense)              (None, 84)                1016

In [30]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [31]:
model.fit(np.asarray(X_train), Y_train, epochs=30, batch_size=32, validation_data=(np.asarray(X_val), Y_val))

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x7fc62ad6b350>

### Evaluate

In [32]:
acc = model.evaluate(np.asarray(X_test), Y_test)

