In [1]:
# import the necessary packages
from tensorflow.keras.models import Model
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Lambda
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Input
import tensorflow as tf

* Using softmax for image class
* Using sigmoid for bound box coordinates (since the values are between 0-1) 

In [2]:
class AlexNetObjClassBoud:
    @staticmethod
    def classBranch(inputs, numCategories, fActivation="softmax"):
        x = Dense(units = 512)(inputs)
        x = Activation("relu")(x)
        x = Dense(numCategories)(x)
        x = Activation(fActivation, name="class_output")(x)
        
        # return class prediction sub-network
        return x
    
    @staticmethod    
    def boundBranch(inputs, numBoxCord, fActivation="sigmoid"):
        x = Dense(units = 128)(inputs)
        x = Activation("relu")(x)
        x = Dense(units = 64)(x)
        x = Activation("relu")(x)
        x = Dense(units = 32)(x)
        x = Activation("relu")(x)
        x = Dense(numBoxCord)(x)
        x = Activation(fActivation, name="bound_output")(x)
        
        # return bound prediction sub-network 
        return x

    @staticmethod
    def build(imWidth, imHeight, numCategories, numBoxCord):
        #initialize the image input shape and channel dimension
        inputShape = (imHeight, imWidth, 3) #RBG
        chanDim = -1
        
        # construct alexnet
        inputLayer = Input(shape = inputShape)
        # 1
        x = Conv2D(filters=96, kernel_size=(11, 11), strides=(4,4), padding="valid")(inputLayer)
        x = Activation("relu")(x)
        x = MaxPooling2D(pool_size=(3, 3), strides=(2,2), padding="valid")(x)
        x = Activation("relu")(x)
        # 2
        x = Conv2D(filters=256, kernel_size=(5, 5), strides=(1,1), padding="valid")(x)
        x = Activation("relu")(x)
        x = MaxPooling2D(pool_size=(3, 3), strides=(2,2), padding="valid")(x)
        x = Activation("relu")(x)
        # 3
        x = Conv2D(filters=384, kernel_size=(3, 3), strides=(1,1), padding="valid")(x)
        x = Activation("relu")(x)
        # 4
        x = Conv2D(filters=384, kernel_size=(3, 3), strides=(1,1), padding="valid")(x)
        x = Activation("relu")(x)
        # 5
        x = Conv2D(filters=256, kernel_size=(3, 3), strides=(1,1), padding="valid")(x)
        x = Activation("relu")(x)
        x = MaxPooling2D(pool_size=(3, 3), strides=(2,2), padding="valid")(x)
        x = Activation("relu")(x)
        # 6
        x = Flatten()(x)
        x = Dense(units = 9216)(x)
        x = Activation("relu")(x)
        # 7 
        x = Dense(units = 4096)(x)
        x = Activation("relu")(x)
        # 8
        x = Dense(units = 4096)(x)
        inputs = Activation("relu")(x)
        
        # Output 
        # going to pass the layer before the last layer to 
        # image classifier network and image bound network
        # x = Dense(units = 1000)(x)
        
        imageClassBranch = AlexNetObjClassBoud.classBranch(inputs, numCategories, 
                                                           fActivation="softmax")
        imageBoundBranch = AlexNetObjClassBoud.boundBranch(inputs, numBoxCord, 
                                                           fActivation="sigmoid")
        
        # create final model with 2 seperate outputs
        model = Model(inputs = inputLayer, outputs = [imageClassBranch, imageBoundBranch], 
                     name = "alexnetclassbound")
        
        # return the network
        return(model)

In [3]:
# set the matplotlib backend so figures can be saved in the background
import matplotlib
matplotlib.use("Agg")
# import the necessary packages
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import img_to_array
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import argparse
import random
import pickle
import cv2
import os

In [5]:
train_data_sub = pd.read_csv("/storage/home/hmm5304/scratch/OpenImagesV5/train_data_sub_down.txt", sep ='\t')
validation_data_sub = pd.read_csv("/storage/home/hmm5304/scratch/OpenImagesV5/validation_data_sub_down.txt", sep ='\t')
test_data_sub = pd.read_csv("/storage/home/hmm5304/scratch/OpenImagesV5/test_data_sub_down.txt", sep ='\t')
train_data_sub=train_data_sub[train_data_sub['ImageFound']==1]
validation_data_sub=validation_data_sub[validation_data_sub['ImageFound']==1]
test_data_sub=test_data_sub[test_data_sub['ImageFound']==1]

In [6]:
train_data_sub['LabelDescription'].value_counts()

Bus           2826
Airplane      2720
Bird          2708
Truck         2707
Person        2694
Cat           2674
Desk          2672
Motorcycle    2668
Laptop        2661
Chair         2266
Bicycle       1784
Name: LabelDescription, dtype: int64

In [7]:
validation_data_sub['LabelDescription'].value_counts()

Person        1157
Airplane       432
Bird           320
Truck          181
Cat            116
Chair           73
Bicycle         55
Motorcycle      52
Bus             49
Desk            16
Laptop          15
Name: LabelDescription, dtype: int64

In [8]:
test_data_sub['LabelDescription'].value_counts()

Person        2668
Airplane      1314
Bird          1043
Truck          522
Cat            357
Chair          221
Bicycle        203
Bus            164
Motorcycle     126
Laptop          61
Desk            46
Name: LabelDescription, dtype: int64

In [9]:
train_data_sub.head()

Unnamed: 0,ImageID,Source,LabelName,Confidence,XMin,XMax,YMin,YMax,IsOccluded,IsTruncated,IsGroupOf,IsDepiction,IsInside,LabelDescription,OriginalURL,Rotation,ImageFound
0,75424f2fadf1c6c9,activemil,/m/04_sv,1,0.074375,0.901875,0.318894,0.971429,0,0,0,0,0,Motorcycle,https://c2.staticflickr.com/1/627/21126611322_...,0.0,1.0
1,a151e217ac86652a,xclick,/m/01g317,1,0.56872,0.883886,0.432432,0.879173,0,0,0,0,0,Person,https://c3.staticflickr.com/6/5074/5844189273_...,0.0,1.0
2,81af87fff7c21972,activemil,/m/0199g,1,0.102927,0.868744,0.02625,0.846875,0,0,0,0,0,Bicycle,https://farm3.staticflickr.com/3709/8906157384...,0.0,1.0
3,f4da1efdefe9a7b6,activemil,/m/01g317,1,0.456481,0.562963,0.692593,0.963889,0,0,0,0,0,Person,https://farm7.staticflickr.com/714/20772066013...,,1.0
4,29af45a96c1b05ce,activemil,/m/07r04,1,0.14625,0.746875,0.270501,0.941427,0,0,0,0,0,Truck,https://c7.staticflickr.com/1/185/455840246_cd...,,1.0


In [10]:
train_data_sub = pd.concat([train_data_sub, pd.get_dummies(train_data_sub.LabelDescription)], axis=1)
validation_data_sub = pd.concat([validation_data_sub, pd.get_dummies(validation_data_sub.LabelDescription)], axis=1)
test_data_sub = pd.concat([test_data_sub, pd.get_dummies(test_data_sub.LabelDescription)], axis=1)

train_data_sub['imagePath'] = "/storage/home/hmm5304/scratch/OpenImagesV5/data/train/" + train_data_sub['ImageID'] + ".jpg"
validation_data_sub['imagePath'] = "/storage/home/hmm5304/scratch/OpenImagesV5/data/validation/" + validation_data_sub['ImageID'] + ".jpg"
test_data_sub['imagePath'] = "/storage/home/hmm5304/scratch/OpenImagesV5/data/test/" + test_data_sub['ImageID'] + ".jpg"

In [11]:
unique_classes = train_data_sub['LabelDescription'].unique()
unique_classes

array(['Motorcycle', 'Person', 'Bicycle', 'Truck', 'Desk', 'Bus', 'Bird',
       'Airplane', 'Chair', 'Cat', 'Laptop'], dtype=object)

In [6]:
# initialize run parameters
EPOCHS = 50
INIT_LR = 1e-3
BS = 32
IMAGE_DIMS = (227,227,3)

In [69]:
# initialize training data
# class labels and bounding box
train_data = []
train_class = []
train_bb = []

train_data_sub = train_data_sub.reset_index()
for i in range(train_data_sub.shape[0]-1):
    image = cv2.imread(train_data_sub['imagePath'][i])
    image = cv2.resize(image, (IMAGE_DIMS[1], IMAGE_DIMS[0]))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    # rotate image
    if train_data_sub['Rotation'][i] == 90:
        image = cv2.rotate(image, cv2.cv2.ROTATE_90_CLOCKWISE)
    elif train_data_sub['Rotation'][i] == 180:
        image = cv2.rotate(image, cv2.cv2.ROTATE_180)
    elif train_data_sub['Rotation'][i] == 270:
        image = cv2.rotate(image, cv2.cv2.ROTATE_180)
        image = cv2.rotate(image, cv2.cv2.ROTATE_90_CLOCKWISE)
    
    image = img_to_array(image)
    train_data.append(image)
    train_class.append(np.array(train_data_sub.loc[i,unique_classes]))
    train_bb.append(np.array(train_data_sub.loc[i,['XMin','XMax','YMin','YMax']]))

# scale the raw pixel intensities to the range [0, 1] and convert to np array
train_data = np.array(train_data, dtype="float") / 255.0
train_class = np.array(train_class)
train_bb = np.array(train_bb)

In [None]:
# initialize training data
# class labels and bounding box
validation_data = []
validation_class = []
validation_bb = []

validation_data_sub = validation_data_sub.reset_index()
for i in range(validation_data_sub.shape[0]-1):
    image = cv2.imread(validation_data_sub['imagePath'][i])
    image = cv2.resize(image, (IMAGE_DIMS[1], IMAGE_DIMS[0]))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    # rotate image
    if validation_data_sub['Rotation'][i] == 90:
        image = cv2.rotate(image, cv2.cv2.ROTATE_90_CLOCKWISE)
    elif validation_data_sub['Rotation'][i] == 180:
        image = cv2.rotate(image, cv2.cv2.ROTATE_180)
    elif validation_data_sub['Rotation'][i] == 270:
        image = cv2.rotate(image, cv2.cv2.ROTATE_180)
        image = cv2.rotate(image, cv2.cv2.ROTATE_90_CLOCKWISE)
    
    image = img_to_array(image)
    validation_data.append(image)
    validation_class.append(np.array(validation_data_sub.loc[i,unique_classes]))
    validation_bb.append(np.array(validation_data_sub.loc[i,['XMin','XMax','YMin','YMax']]))

# scale the raw pixel intensities to the range [0, 1] and convert to np array
validation_data = np.array(validation_data, dtype="float") / 255.0
validation_class = np.array(validation_class)
validation_bb = np.array(validation_bb)

In [None]:
# initialize training data
# class labels and bounding box
test_data = []
test_class = []
test_bb = []

test_data_sub = test_data_sub.reset_index()
for i in range(test_data_sub.shape[0]-1):
    image = cv2.imread(test_data_sub['imagePath'][i])
    image = cv2.resize(image, (IMAGE_DIMS[1], IMAGE_DIMS[0]))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    # rotate image
    if test_data_sub['Rotation'][i] == 90:
        image = cv2.rotate(image, cv2.cv2.ROTATE_90_CLOCKWISE)
    elif test_data_sub['Rotation'][i] == 180:
        image = cv2.rotate(image, cv2.cv2.ROTATE_180)
    elif test_data_sub['Rotation'][i] == 270:
        image = cv2.rotate(image, cv2.cv2.ROTATE_180)
        image = cv2.rotate(image, cv2.cv2.ROTATE_90_CLOCKWISE)
    
    image = img_to_array(image)
    test_data.append(image)
    test_class.append(np.array(test_data_sub.loc[i,unique_classes]))
    test_bb.append(np.array(test_data_sub.loc[i,['XMin','XMax','YMin','YMax']]))

# scale the raw pixel intensities to the range [0, 1] and convert to np array
test_data = np.array(test_data, dtype="float") / 255.0
test_class = np.array(test_class)
test_bb = np.array(test_bb)

* Using categorical crossentropy as loss for image class (the output is one-hot encoded)
* Using mean squared error as loss for bound box, since it is a regression problem

In [20]:
# initialize alexnet
model = AlexNetObjClassBoud.build(IMAGE_DIMS[1], IMAGE_DIMS[0], unique_classes.size, 4)

# define loss function for each category
# class_output bound_output
losses = {"class_output":"categorical_crossentropy", "bound_output":"mean_squared_error"}
lossWeights = {"class_output": 1.0, "bound_output": 1.0}

# initialize the optimizer and compile the model
opt = Adam(learning_rate=INIT_LR)
model.compile(optimizer=opt, loss=losses, loss_weights=lossWeights, metrics=["accuracy"])

In [22]:
model.summary()

Model: "alexnetclassbound"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_3 (InputLayer)           [(None, 227, 227, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d_10 (Conv2D)             (None, 55, 55, 96)   34944       ['input_3[0][0]']                
                                                                                                  
 activation_30 (Activation)     (None, 55, 55, 96)   0           ['conv2d_10[0][0]']              
                                                                                                  
 max_pooling2d_6 (MaxPooling2D)  (None, 27, 27, 96)  0           ['activation_30[0

In [18]:
# train the network
# train the network to perform multi-output classification
model_history = model.fit(x=train_data, y={"class_output": train_class, "bound_output": train_bb},
    validation_data=(validation_data, {"class_output": validation_class, "bound_output": validation_bb}),
    epochs=EPOCHS, batch_size=BS, verbose=1)

# save the model 
model.save("/storage/home/hmm5304/scratch/OpenImagesV5/alexnet_ObjClassBoud_v2", save_format="h5")

# save history
with open('/storage/home/hmm5304/scratch/OpenImagesV5/trainHistoryDictWBS', 'wb') as file_pi:
    pickle.dump(model_history.history, file_pi)

0.001

In [4]:
print("MSG: plot the total loss, class loss, and bound loss...")
# plot the total loss, class loss, and bound loss
lossNames = ["loss", "class_output_loss", "bound_output_loss"]
plt.style.use("ggplot")
(fig, ax) = plt.subplots(3, 1, figsize=(13, 13))

# loop over the loss names
for (i, l) in enumerate(lossNames):
    # plot the loss for both the training and validation data
    title = "Loss for {}".format(l) if l != "loss" else "Total loss"
    ax[i].set_title(title)
    ax[i].set_xlabel("Epoch #")
    ax[i].set_ylabel("Loss")
    ax[i].plot(np.arange(0, EPOCHS), model_history.history[l], label=l)
    ax[i].plot(np.arange(0, EPOCHS), model_history.history["val_" + l],
        label="val_" + l)
    ax[i].legend()

# save the losses figure
plt.tight_layout()
plt.savefig("/storage/home/hmm5304/scratch/OpenImagesV5/alexnet_ObjClassBoud_v2_losses.png")
plt.close()

In [27]:
# create a figure for the accuracies
accuracyNames = ["class_output_accuracy", "bound_output_accuracy"]
plt.style.use("ggplot")
(fig, ax) = plt.subplots(3, 1, figsize=(13, 13))
# loop over the accuracy names
for (i, l) in enumerate(accuracyNames):
    # plot the accuracy for both the training and validation data
    title = "Accuracy for {}".format(l) 
    ax[i].set_title(title)
    ax[i].set_xlabel("Epoch #")
    ax[i].set_ylabel("Accuracy")
    ax[i].plot(np.arange(0, EPOCHS), model_history.history[l], label=l)
    ax[i].plot(np.arange(0, EPOCHS), model_history.history["val_" + l],
        label="val_" + l)
    ax[i].legend()
# save the accuracies figure
plt.tight_layout()
plt.savefig("/storage/home/hmm5304/scratch/OpenImagesV5/alexnet_ObjClassBoud_v2_accuracy.png")
plt.close()

In [25]:
from tensorflow.keras.models import load_model

model = load_model("/storage/home/hmm5304/scratch/OpenImagesV5/alexnet_ObjClassBoud_v2", custom_objects={"tf": tf})

In [26]:
# initialize training data
# class labels and bounding box
test_data = np.load("/storage/home/hmm5304/scratch/OpenImagesV5/data/numpy_data/test_data.npy", allow_pickle=True)
test_data = np.asarray(test_data).astype('float32')
print(test_data.shape)
test_class = np.load("/storage/home/hmm5304/scratch/OpenImagesV5/data/numpy_data/test_class.npy", allow_pickle=True)
test_class = np.asarray(test_class).astype('float32')
print(test_class.shape)
test_bb = np.load("/storage/home/hmm5304/scratch/OpenImagesV5/data/numpy_data/test_bb.npy", allow_pickle=True)
test_bb = np.asarray(test_bb).astype('float32')
print(test_bb.shape)

(6724, 227, 227, 3)
(6724, 11)
(6724, 4)


In [28]:
# predict on test set
print("MSG: Evaluating model of test images...")
model.evaluate(x = test_data, y = {"class_output": test_class, "bound_output": test_bb})

print("MSG: Classifying test images...")
(classProbability, bboxCoordinates) = model.predict(test_data)

print("MSG: Save test images prediction...")
np.save("/storage/home/hmm5304/scratch/OpenImagesV5/data/numpy_data/classProbability_v2.npy", classProbability)
np.save("/storage/home/hmm5304/scratch/OpenImagesV5/data/numpy_data/bboxCoordinates_v2.npy", bboxCoordinates)

MSG: Evaluating model of test images...
MSG: Classifying test images...
MSG: Save test images prediction...
