In [None]:
# Import libraries
import numpy as np 
import tensorflow as tf 
import cv2 as cv 
import random
from google.colab.patches import cv_imshow 
import matplotlib.pyplot as plt

In [None]:
# Generate training images and labels
N=2000 
H,W=256,256
train_img=np.zeros([N,H,W,3],dtype=np.uint8) 
train_img.fill(255) 
train_label_coord=np.zeros([N,2],dtype=np.int32) 
train_label_size=np.zeros([N,2],dtype=np.int32) 
train_label_class=np.zeros([N,1],dtype=np.int32) #color

In [None]:
for n in range(N):
    x,y=random.randint(0,W-1),random.randint(0,H-1) 
    bw,bh=random.randint(int(W/16),int(W/4)),random.randint(int(H/16),int(H/4))
    
    if(x-bw/2<0): x=x-(x-bw/2) 
    elif(x+bw/2>W-1): x=x-(x+bw/2-(W-1)) 
    if(y-bh/2<0): y=y-(y-bh/2) 
    elif(y+bh/2>H-1): y=y-(y+bh/2-(H-1))
    
    x=int(x); y=int(y)
    train_label_coord[n,0]=x; train_label_coord[n,1]=y 
    train_label_size[n,0]=bw; train_label_size[n,1]=bh 
    train_label_class[n]=random.randint(0,3) # 0: red, 1: green, 2: blue, 3: yellow

    if train_label_class[n]==0: # red 
        cv.rectangle(train_img[n],(x-int(bw/2),y-int(bh/2)),(x+int(bw/2),y+int(bh/2)), color=(0,0,255), thickness=-1)
    elif train_label_class[n]==1: # green
        cv.rectangle(train_img[n],(x-int(bw/2),y-int(bh/2)),(x+int(bw/2),y+int(bh/2)), color=(0,255,0), thickness=-1)
    elif train_label_class[n]==2: # blue
        cv.rectangle(train_img[n],(x-int(bw/2),y-int(bh/2)),(x+int(bw/2),y+int(bh/2)), color=(255,0,0), thickness=-1)
    else: # yellow 
        cv.rectangle(train_img[n],(x-int(bw/2),y-int(bh/2)),(x+int(bw/2),y+int(bh/2)), color=(0,255,255), thickness=-1)


In [None]:
# Display some images
for n in range(8): 
    cv_imshow(train_img[n])

In [None]:
# Preprocess data
train_img = tf.keras.applications.vgg16.preprocess_input(train_img)

train_label_coord=train_label_coord.astype(np.float32) 
train_label_coord[:,0]=train_label_coord[:,0]/W 
train_label_coord[:,1]=train_label_coord[:,1]/H 
train_label_size=train_label_size.astype(np.float32) 
train_label_size[:,0]=train_label_size[:,0]/(W/4) 
train_label_size[:,1]=train_label_size[:,1]/(H/4)

In [None]:
# Build model
base_model = tf.keras.applications.VGG16(input_shape=[H,W,3], include_top=False, weights='imagenet')
x = base_model.output
x = tf.keras.layers.Flatten()(x) 
out_coord=tf.keras.layers.Dense(2,activation='sigmoid',name='out_coord')(x) 
out_size=tf.keras.layers.Dense(2, activation='sigmoid',name='out_size')(x) 
out_class=tf.keras.layers.Dense(4,activation='softmax',name='out_class')(x) 

model=tf.keras.Model(inputs=base_model.input,outputs=[out_coord,out_size,out_class]) 
model.summary()

In [None]:
# Custom loss functions
def coord_loss_func(y_true,y_pred): 
    loss_coord=tf.keras.losses.mean_squared_error(y_true,y_pred) 
    return loss_coord
def size_loss_func(y_true,y_pred): 
    loss_size=tf.keras.losses.mean_squared_error(y_true,y_pred) 
    return loss_size
def class_loss_func(y_true,y_pred): 
    loss_class=tf.keras.losses.sparse_categorical_crossentropy(y_true,y_pred) 
    return loss_class

In [None]:
# Train and save model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
              loss={'out_coord':coord_loss_func,'out_size':size_loss_func,'out_class':class_loss_func},
              loss_weights={'out_coord':4,'out_size':1,'out_class':1}) 

In [None]:
model.summary()

In [None]:
history = model.fit(x=train_img,
                    y={'out_coord':train_label_coord,'out_size':train_label_size,'out_class':train_label_class},
                    epochs=20,batch_size=50,validation_split=0.25) 
model.save('model')

In [None]:
# Generate test images and labels
N=20
H,W=256,256 
test_img=np.zeros([N,H,W,3],dtype=np.uint8) 
test_img.fill(255) 
test_label_coord=np.zeros([N,2],dtype=np.int32) 
test_label_size=np.zeros([N,2],dtype=np.int32) 
test_label_class=np.zeros([N,1],dtype=np.int32)

In [None]:
for n in range(N):
    x,y=random.randint(0,W-1),random.randint(0,H-1) 
    bw,bh=random.randint(int(W/16),int(W/4)),random.randint(int(H/16),int(H/4))

    if(x-bw/2<0): x=x-(x-bw/2)
    elif(x+bw/2>W-1):  x=x-(x+bw/2-(W-1))
    if(y-bh/2<0):  y=y-(y-bh/2)
    elif(y+bh/2>H-1): y=y-(y+bh/2-(H-1))

    x=int(x); y=int(y)
    test_label_coord[n,0]=x; test_label_coord[n,1]=y
    test_label_size[n,0]=bw; test_label_size[n,1]=bh 
    test_label_class[n]=random.randint(0,3) # color    

    if test_label_class[n]==0: 
        cv.rectangle(test_img[n],(x-int(bw/2),y-int(bh/2)),(x+int(bw/2),y+int(bh/2)), color=(0,0,255),thickness=-1)
    elif train_label_class[n]==1: 
        cv.rectangle(test_img[n],(x-int(bw/2),y-int(bh/2)),(x+int(bw/2),y+int(bh/2)), color=(0,255,0),thickness=-1)
    elif train_label_class[n]==2: 
        cv.rectangle(test_img[n],(x-int(bw/2),y-int(bh/2)),(x+int(bw/2),y+int(bh/2)), color=(255,0,0),thickness=-1)
    else: 
        cv.rectangle(test_img[n],(x-int(bw/2),y-int(bh/2)),(x+int(bw/2),y+int(bh/2)), color=(0,255,255),thickness=-1)

In [None]:
# Preprocess test images
test_img_ = tf.keras.applications.vgg16.preprocess_input(test_img)
# Predict object locations in test images
model=tf.keras.models.load_model('model', 
                                 custom_objects={'coord_loss_func':coord_loss_func,
                                                 'size_loss_func':size_loss_func, 
                                                 'class_loss_func':class_loss_func})

pred_coord,pred_size,pred_class=model.predict(test_img_) 
pred_coord[:,0]=pred_coord[:,0]*H 
pred_coord[:,1]=pred_coord[:,1]*W 
pred_size[:,0]=pred_size[:,0]*(H/4) 
pred_size[:,1]=pred_size[:,1]*(W/4) 
pred_class=np.argmax(pred_class,axis=1)

In [None]:
# Display prediction results
for n in range(N): 
    x=pred_coord[n,0].astype('int') 
    y=pred_coord[n,1].astype('int') 
    bw=pred_size[n,0].astype('int') 
    bh=pred_size[n,1].astype('int') 
    obj_class=pred_class[n]

    if obj_class==0:
        cv.rectangle(test_img[n],(x-int(bw/2),y-int(bh/2)),(x+int(bw/2),y+int(bh/2)),color=(0,0,0),thickness=1)
        cv.rectangle(test_img[n],(0,0),(W-1,H-1),color=(0,0,255),thickness=1)

    elif obj_class==1: 
        cv.rectangle(test_img[n],(x-int(bw/2),y-int(bh/2)),(x+int(bw/2),y+int(bh/2)),color=(0,0,0),thickness=1)
        cv.rectangle(test_img[n],(0,0),(W-1,H-1),color=(0,255,0),thickness=1)
    elif obj_class==2:
        cv.rectangle(test_img[n],(x-int(bw/2),y-int(bh/2)),(x+int(bw/2),y+int(bh/2)),color=(0,0,0),thickness=1)
        cv.rectangle(test_img[n],(0,0),(W-1,H-1),color=(255,0,0),thickness=1)
    else: # ellipse 
        cv.rectangle(test_img[n],(x-int(bw/2),y-int(bh/2)),(x+int(bw/2),y+int(bh/2)),color=(0,0,0),thickness=1)
        cv.rectangle(test_img[n],(0,0),(W-1,H-1),color=(0,255,255),thickness=1)

    cv.circle(test_img[n],center=(x,y),radius=2,color=(0,0,0),thickness=2) 
    cv_imshow(test_img[n])