In [1]:
!pip install https://github.com/fastai/fastai/archive/master.zip 
!pip install fastai==0.7.0 
!pip install torchtext==0.2.3 
!pip install opencv-python 
!apt update && apt install -y libsm6 libxext6 
!pip3 install http://download.pytorch.org/whl/cu80/torch-0.3.0.post4-cp36-cp36m-linux_x86_64.whl 
!pip3 install torchvision
!pip install --upgrade git+https://github.com/valeoai/dl_utils.git
!pip install --upgrade imageio
!pip install libsixel-python
!pip install -U pillow
!pip install image

In [2]:
%matplotlib inline 
%reload_ext autoreload 
%autoreload 2

In [3]:
# !pip install keras
# !pip install h5py==3.1.0 numpy==1.19.2 six==1.15.0 typing-extensions==3.7.4 wrapt==1.12.1 botocore==1.20.106 gast==0.4.0 tensorboard==2.6 tensorflow-estimator==2.6 absl-py==0.9 protobuf==3.11.2 scikit-learn==0.24 fsspec==2021.07.0 google-api-python-client==1.12.1
# !pip install --upgrade tensorflow
# !pip install --ignore-installed --upgrade tensorflow-gpu==1.15.0

In [4]:
from pathlib import Path 
import json 
import PIL 
from matplotlib import patches, patheffects
import keras
from keras.models import Sequential, Model 
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.layers import Dropout, Flatten,Dense
from tensorflow.keras.optimizers import Adam

import numpy as np
import os
from matplotlib import image,patches,patheffects
import matplotlib.pyplot as plt
from PIL import Image
import pandas as pd

In [5]:
PATH = Path('../input/pascalVOC/VOCdevkit/VOC2007')
BD = json.load((PATH / 'pascal_train2007.json').open())
IMAGES,ANNOTATIONS,CATEGORIES = ['images', 'annotations', 'categories']
FILE_NAME,ID,IMG_ID,CAT_ID,BBOX = 'file_name','id','image_id','category_id','bbox'

data_category = dict((o[ID], o['name']) for o in BD[CATEGORIES])
data_filename = dict((o[ID], o[FILE_NAME]) for o in BD[IMAGES])
data_ids = [o[ID] for o in BD[IMAGES]]

IMG_PATH = PATH/'JPEGImages'

In [6]:
import collections

def get_annotation():
    annotations = collections.defaultdict(lambda:[])
    for o in BD[ANNOTATIONS]:
        if not o['ignore']:
            bb = o[BBOX]
            bb = np.array([bb[1], bb[0], bb[3]+bb[1]-1, bb[2]+bb[0]-1])
            annotations[o[IMG_ID]].append((bb,o[CAT_ID]))
    return annotations

train_annotation = get_annotation()

In [7]:
def show_img(im, figsize=None, ax=None):
    if not ax: fig,ax = plt.subplots(figsize=figsize)
    ax.imshow(im)
    ax.set_xticks(np.arange(0, 224, 224/4))
    ax.set_yticks(np.arange(0, 224, 224/4))
    ax.grid()
    ax.set_yticklabels([])
    ax.set_xticklabels([])
    return ax

def draw_outline(o, lw):
    o.set_path_effects([patheffects.Stroke(
        linewidth=lw, foreground='black'), patheffects.Normal()])

def draw_rect(ax, b, color='white'):
    patch = ax.add_patch(patches.Rectangle(b[:2], *b[-2:], fill=False, edgecolor=color, lw=2))
    draw_outline(patch, 4)

def draw_text(ax, xy, txt, sz=14, color='white'):
    text = ax.text(*xy, txt,
        verticalalignment='top', color=color, fontsize=sz, weight='bold')
    draw_outline(text, 1)
    
def bb_hw(a): return np.array([a[1],a[0],a[3]-a[1]+1,a[2]-a[0]+1])  

In [8]:
from keras.layers import Input,GlobalAveragePooling2D, Flatten,Conv2D,Concatenate
from keras.activations import relu, softmax
from keras.models import Model
from keras.layers import BatchNormalization
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D,Activation

sz = 224

from keras.layers import Input,GlobalAveragePooling2D, Flatten,Conv2D,Concatenate
from keras.activations import relu, softmax
from keras.models import Model
from keras.layers import BatchNormalization
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D,Activation

sz = 224

def StdConv(kernel_size,stride_size,n_output):    
    def f(input):
        x = Conv2D(kernel_size=kernel_size, filters=n_output, strides=stride_size, padding='same')(input)
        x = Activation(relu)(x)
        x = BatchNormalization()(x)
        x = Dropout(0.25)(x)   

        return x
    
    return f

def SSD_Model():
    net = VGG16(include_top=False, weights='imagenet', input_shape=(sz,sz,3))
    for layer in net.layers:
        layer.trainable=False 
        
    x = net.output
    x = Activation(relu)(x)
    x = Dropout(0.25,name='VGG_backbone')(x)   
    
    # Output tensor shape of VGG is [7*7*512]

    x = StdConv(kernel_size=3,stride_size=1,n_output=256)(x) #[7*7*256]
    x = StdConv(kernel_size=3,stride_size=2,n_output=128)(x) #[4*4*128]

    # (1) classification branch
    nb_cat=20
    x1 = Conv2D(kernel_size=3, filters=nb_cat, strides=1, padding='same')(x)
    #x1 = Dropout(0.5)(x1) 
    x1 = Activation('sigmoid',name='classif')(x1)
    
    # (2) Box regression Branch
    x2 = Conv2D(kernel_size=3, filters=4, strides=1, padding='same')(x)
    x2 = Activation('linear',name='regression')(x2)
    
    # Then, we concatenate the 2 branch together
    out = Concatenate()([Flatten()(x1), Flatten()(x2)])
        
    model = Model(inputs=net.input, outputs=out)

    return model

In [9]:
net = SSD_Model()

In [10]:
net.summary()

In [11]:
mc = [[data_category[p[1]] for p in train_annotation[o]] for o in data_ids] # all object labels 'name' per image, list of images, each of lists of annotations, but only the classes

mc[:3]

In [12]:
id2cat = list(data_category.values())
cat2id = {v:k for k,v in enumerate(id2cat)}
mcs = np.array([np.array([cat2id[p] for p in o]) for o in mc]); # all object labels 'id' per image, same as mc, but with id's not names
mcs[:3]

In [13]:
# Do the same for bboxes
mbb = [np.concatenate([p[0] for p in train_annotation[o]]) for o in data_ids]
mbb[:3] # Each group of 4 is a bbox

In [14]:
mbbs = [' '.join(str(p) for p in o) for o in mbb]
mbbs[:3]

In [15]:
df = pd.DataFrame({'filename': [data_filename[o] for o in data_ids], 'bbox': mbbs, 'class':mcs}, columns=['filename','bbox','class'])

In [16]:
df.head()

In [17]:
def Split_Train_Valid(df,Split_train_val=0.7):
    # step 1: shuffle the data
    df = df.reindex(np.random.permutation(df.index))
    df=df.set_index(np.arange(len(df)))
    
    # step 2: split in training and testing
    df_train = df[:int(len(df)*Split_train_val)]
    df_valid = df[int(len(df)*Split_train_val):]
    df_train=df_train.set_index(np.arange(len(df_train)))
    df_valid=df_valid.set_index(np.arange(len(df_valid)))
    
    return df_train,df_valid

df_train, df_valid = Split_Train_Valid(df,0.7)

In [18]:
from tensorflow.keras.utils import Sequence
class Generator_MultiObject(Sequence):
    'Generates data from a Dataframe'
    def __init__(self, df, folder,preprocess_fct,batch_size=32, dim=(32,32), shuffle=True):
        'Initialization'
        self.preprocess_fct = preprocess_fct
        self.dim = dim
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.folder = folder

        self.df = df
        self.n = len(df)            
        self.nb_iteration = int(np.floor(self.n  / self.batch_size))
        
        self.on_epoch_end()
                    
    def __len__(self):
        'Denotes the number of batches per epoch'
        return self.nb_iteration

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Generate data
        X, y = self.__data_generation(indexes)

        return X, y
   
    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.df))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, index):       
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization 
        nb_label_max = 20
        X = np.empty((self.batch_size, *self.dim, 3))
        Y = []
        # Generate data
        for i, ID in enumerate(index):
            # Read the image
            img = Image.open(self.folder/self.df['filename'][ID])
            
            # extract the number of label
            c = self.df['class'][ID]
            nb_label = len(c)

            # Class in a form of a one hot encoding
            y = np.zeros((nb_label_max,1+4))
            y[:nb_label,0] = c

            # reshape the bounding box and resize
            bbox = np.asmatrix(self.df['bbox'][ID])
            bbox = bbox.reshape(nb_label,4)

            bbox_rescaled = np.copy(bbox)
            bbox_rescaled = bbox_rescaled.astype(float)
            width, height = img.size
            RatioX = width/self.dim[0]
            RatioY = height/self.dim[1]

            bbox_rescaled[:,0] = bbox_rescaled[:,0]/RatioY/self.dim[1]
            bbox_rescaled[:,1] = bbox_rescaled[:,1]/RatioX/self.dim[0]
            bbox_rescaled[:,2] = bbox_rescaled[:,2]/RatioY/self.dim[1]
            bbox_rescaled[:,3] = bbox_rescaled[:,3]/RatioX/self.dim[0]

            # save the bb coordinates
            y[:nb_label,1:5] = bbox_rescaled


            # reshape to a vector
            y = np.reshape(y,nb_label_max*5) #whats this ???????????
            img = np.asarray(img.resize(self.dim))
            X[i,] = self.preprocess_fct(np.asarray(img))

            Y.append(np.asarray(y))

        Y = np.asarray(Y)
        
        return X, Y       

In [19]:
train_gen = Generator_MultiObject(df_train, IMG_PATH,preprocess_input,batch_size=32, dim=(sz,sz), shuffle=True)
valid_gen = Generator_MultiObject(df_valid, IMG_PATH,preprocess_input,batch_size=32, dim=(sz,sz), shuffle=True)

In [20]:
k=1
anc_grid = 4
anc_offset = 1/(anc_grid*2)
anc_x = np.repeat(np.linspace(anc_offset, 1-anc_offset, anc_grid), anc_grid)
anc_y = np.tile(np.linspace(anc_offset, 1-anc_offset, anc_grid), anc_grid)

anc_ctrs = np.tile(np.stack([anc_x,anc_y],axis=1),(k,1))
anc_sizes = np.array([[1/anc_grid,1/anc_grid] for i in range(anc_grid*anc_grid)])
anchors = np.concatenate([anc_ctrs, anc_sizes], axis=1)

In [21]:
print(anchors)#4x4=16

In [22]:
def hw2corners(ctr, hw): return np.concatenate((ctr-hw/2, ctr+hw/2), axis=1)

anchor_corner = hw2corners(anchors[:,:2], anchors[:,2:])
anchor_corner

In [23]:
grid_sizes = 1/anc_grid
grid_sizes

In [24]:
def deprocess_img(processed_img):
  x = processed_img.copy()
  if len(x.shape) == 4:
    x = np.squeeze(x, 0)
  assert len(x.shape) == 3, ("Input to deprocess image must be an image of "
                             "dimension [1, height, width, channel] or [height, width, channel]")
  if len(x.shape) != 3:
    raise ValueError("Invalid input to deprocessing image")
  
  # perform the inverse of the preprocessiing step
  x[:, :, 0] += 103.939
  x[:, :, 1] += 116.779
  x[:, :, 2] += 123.68
  x = x[:, :, ::-1]

  x = np.clip(x, 0, 255).astype('uint8')
  return x

In [25]:
def show_ground_truth(ax, im, bbox, clas=None, prs=None, thresh=0.3):
    bb = [bb_hw(o) for o in bbox]
    if prs is None:  prs  = [None]*len(bb)
    if clas is None: clas = [None]*len(bb)
    ax = show_img(im, ax=ax)
    for i,(b,c,pr) in enumerate(zip(bb, clas, prs)):
        if((b[2]>1) and (pr is None or pr > thresh)):
            draw_rect(ax, bb[i])
            txt = f'{i}: '
            if c is not None: txt += ('bg' if c==len(id2cat) else id2cat[c.astype(int)])
            if pr is not None: txt += f' {pr:.2f}'
            draw_text(ax, b[:2], txt) 

In [26]:
def get_y(y):
    idx = np.where(y[:,3]-y[:,1]>0)
    bbox = y[idx[0],1:5]
    clas = y[idx[0],0]
    clas=clas.astype('int')
    return bbox,clas

In [27]:
# Let’s now look at the ground truth y
x,y = next(iter(train_gen))

# pick-up an image
i=8
ima = x[i]
Y = y[i].reshape(-1,5)
bbox, clas= get_y(Y)

fig, ax = plt.subplots(figsize=(7,7))
show_ground_truth(ax, deprocess_img(ima), bbox*sz, clas, None)# *224 since the coordinates are scaled 0..1

In [28]:
fig, ax = plt.subplots(figsize=(7,7))
show_ground_truth(ax, deprocess_img(ima), anchor_corner*224, None)

In [29]:
bbox

In [30]:
bbox.shape

In [31]:
anchor_corner

In [32]:
anchor_corner.shape

In [33]:
bbox[:,2:]

In [34]:
max_xy = np.minimum(bbox[:,None,2:],anchor_corner[:,2:])
print(max_xy.shape)
max_xy

In [35]:
bbox[:,None,2:].shape

In [36]:
anchor_corner[:, 2:].shape

In [37]:
max_xy = np.minimum(bbox[:,None,2:], anchor_corner[:, 2:])
min_xy = np.maximum(bbox[:,None,:2], anchor_corner[:, :2])

In [38]:
min_xy.shape

In [39]:
max_xy.shape

In [40]:
inter = np.clip((max_xy - min_xy),0,None)
#inter = max_xy - min_xy # If no overlap, then the min_xy > max_xy, so -ve values. In this case the score = 0, as no overlap
inter

In [41]:
inter_scores = inter[:, :, 0] * inter[:, :, 1]

In [42]:
inter_scores.shape

In [43]:
def box_sz(b): return ((b[:, 2]-b[:, 0]) * (b[:, 3]-b[:, 1]))#area

def intersect(box_a, box_b):
    
    max_xy = np.minimum(box_a[:,None,2:], box_b[:, 2:])
    min_xy = np.maximum(box_a[:,None,:2], box_b[:, :2])
    
    inter = np.clip((max_xy - min_xy),0,None)
    return inter[:, :, 0] * inter[:, :, 1]

def jaccard(box_a, box_b): # this is the fancy name of IoU!
    inter = intersect(box_a, box_b) 
    union = np.expand_dims(box_sz(box_a),1)+ np.expand_dims(box_sz(box_b),0) - inter  
    
    return inter/union

In [44]:
overlaps = jaccard(bbox,anchor_corner)
overlaps

In [45]:
overlaps.shape

In [46]:
MaxOverlap = np.amax(overlaps,axis=1)
ID_Overlap_Anchor = np.argmax(overlaps,axis=1)
 
print('Max overlapp: ',MaxOverlap)
print('ID of anchors: ',ID_Overlap_Anchor)
 
fig, ax = plt.subplots(figsize=(7,7))
show_ground_truth(ax, deprocess_img(ima), anchor_corner[ID_Overlap_Anchor,:]*224, clas, MaxOverlap,0)

In [47]:
MaxOverlap = np.amax(overlaps,axis=0)
ID_Overlap_GT = np.argmax(overlaps,axis=0)

print('Max overlapp: ',MaxOverlap)
print('ID of GT: ',ID_Overlap_GT)

In [48]:
def map_to_ground_truth(overlaps, print_it=False):
    
    # for each GT, ID of the cell for which overlapp is maximum
    prior_overlap = np.amax(overlaps,1)
    prior_idx = np.argmax(overlaps,1) 
    
    # for each cell, ID of the GT with the best overlapp 
    gt_overlap = np.amax(overlaps,0)
    gt_idx = np.argmax(overlaps,0)
        
    # to ensure each GT matches with an anchor, whatever is the overlapp, meaning even low, 
    # we overwrite the overlapp with a constant value
    gt_overlap[prior_idx] = 1.99 
    for i,o in enumerate(prior_idx): gt_idx[o] = i
    return gt_overlap,gt_idx

In [49]:
gt_overlap,gt_idx = map_to_ground_truth(overlaps)

print(gt_overlap)
print(gt_idx)

In [50]:
clas

In [51]:
gt_clas = clas[gt_idx]; 

print(gt_clas)

In [52]:
thresh = 0.5
pos = gt_overlap > thresh
print(pos)
pos_idx = np.nonzero(pos)
neg_idx = np.nonzero(1-pos)
pos_idx

In [53]:
gt_clas[neg_idx] = len(id2cat)
[id2cat[o] if o<len(id2cat) else 'bg' for o in gt_clas]

In [54]:
def actn_to_bb(actn, anchors):# The bboxes are relative to 0,0. This function makes it relative to the center it's corresponding anchor center. We use tanh as the bbox will be centered arount the center of the anchor, so -ve values are allowed. That's why we divide by 2 also.
    actn_bbs = np.tanh(actn)
    actn_centers = (actn_bbs[:,:2]/2 * grid_sizes) + anchors[:,:2]  
    actn_hw = (actn_bbs[:,2:]/2+1) * anchors[:,2:]
    return hw2corners(actn_centers, actn_hw)

In [55]:
np.random.randint(0,224,(16,4))

In [56]:
# we generate a random activation: 16*4
b_bbox = np.random.randint(0,224,(16,4));
b_bbox[:,2:] = b_bbox[:,2:] + b_bbox[:,:2]
b_bbox[:,2:] = np.clip(b_bbox[:,2:],0,224)
b_bbox = b_bbox / 224

a_ic = actn_to_bb(b_bbox, anchors)

fig, ax = plt.subplots(figsize=(7,7))
show_ground_truth(ax, deprocess_img(ima), a_ic*224, None, None)# Note the scaling, as the outputs are 0..1

In [57]:
b_bbox.shape

In [58]:
# First, load our data
x_batch,y_batch = next(iter(train_gen))

# then, make a prediction
y_pred = net.predict_on_batch(x_batch)

# for the next step, let consider only one sample from the batch
id_sample = 2
X = x_batch[id_sample]
y_GT = y_batch[id_sample]
y_pred = y_pred[id_sample]

In [59]:
# GT: the vector size is 20*(1+4) = 100, where we have [num class, 4 box coordinates] *20
print('GT shape',y_GT.shape)

y_GT = y_GT.reshape(-1,5)
gt_box,gt_clas = get_y(y_GT)

print('GT Box shape',gt_box.shape)
print('GT Class shape',gt_clas.shape)

# The prediction: the tensor size is [16*20 +  16*4], where 16 is the number of activation, and then we have 
# a one hot vector class and 4 box coordinates
print('Prediction tensor shape',y_pred.shape)
pred_clas = np.reshape(y_pred[:16*20],(16,20))
pred_box = np.reshape(y_pred[16*20:],(16,4))
print('Prediction clas shape',pred_clas.shape)
print('Prediction box shape',pred_box.shape)

In [60]:
a_ic = actn_to_bb(pred_box, anchors)

In [61]:
# just have a look to the data
plt.figure(1,figsize=(16, 12))
ax1=plt.subplot(2, 2, 1)
ax1.set_title('GT')
show_ground_truth(ax1, deprocess_img(X), gt_box*224)
ax2=plt.subplot(2, 2, 2)
ax2.set_title('Prediction')
show_ground_truth(ax2, deprocess_img(X), np.clip(pred_box,0,1)*224)

In [62]:
b_bbox.shape

In [63]:
overlaps = jaccard(gt_box, anchor_corner)

In [64]:
# it returns for each cell the best overlapp with the GT and the GT id
gt_overlap,gt_idx = map_to_ground_truth(overlaps,0) 
sel_gt_clas = gt_clas[gt_idx]

In [65]:
sel_gt_clas

In [66]:
pos = gt_overlap > 0.4
pos_idx = np.nonzero(pos)[0]# idx of anchors
neg_idx = np.nonzero(1-pos)[0]
# we keep neg_idx for a next step, where it will be used to tell to our Loss that there is 
# nothing in that cell to consider, meaning it a a background cell

sel_gt_bbox = gt_box[gt_idx]

In [67]:
gt_idx#assignation of each of the 16 anchors to the gt_box idx. In this case it's just 1 so all are index 0.

In [68]:
gt_box.shape

In [69]:
gt_box[gt_idx].shape

In [70]:
gt_overlap.shape

In [71]:
pos

In [72]:
pos_idx

In [73]:
sel_gt_bbox.shape

In [74]:
sel_gt_bbox

In [75]:
loc_loss = np.mean(np.sum(np.abs(a_ic[pos_idx] - sel_gt_bbox[pos_idx]),axis=1))
print('Localization loss is: ',loc_loss)
print('It means an average error in pixel of: ',loc_loss*224)

In [76]:
# just have a look to the data
plt.figure(1,figsize=(16, 12))
ax1=plt.subplot(2, 2, 1)
ax1.set_title('GT')
show_ground_truth(ax1, deprocess_img(X), sel_gt_bbox[pos_idx]*224)
ax1=plt.subplot(2, 2, 2)
ax1.set_title('Pred')
show_ground_truth(ax1, deprocess_img(X), a_ic[pos_idx]*224)

In [77]:
overlaps

In [78]:
# The gt_box is:
gt_box*224 # not that it's normalized

In [79]:
# we will use the Binary Cross Entropy as we did in the previous lesson
# pos_idx is the id of anchor box, all the rest should ba at zero because it does not contain any box
from keras.utils import np_utils
from keras import backend as K

label = keras.utils.np_utils.to_categorical(sel_gt_clas, 20)# 20 here is the number of classes
label[neg_idx,:]=0

clas_loss = K.eval(K.mean(K.binary_crossentropy(K.cast(label,"float64"), K.cast(pred_clas,"float64"))))

print("Binary Cross Entropy loss = ",clas_loss)

In [80]:
# there is a scale difference between classification Loss (around 4) and localization Loss (around 0.4). 
# So, we put a weight of 10 on the localization Loss
Loss = clas_loss + loc_loss*10
print("The Global Loss is = ",Loss)

In [81]:
from keras import backend as K
from keras.utils import np_utils

def SSD_Loss_1(y_GT,y_pred):
    
    # Step 1: we split the tensor into box and class for both GT and prediction
    y_GT = y_GT.reshape(-1,5)
    
    gt_box,gt_clas =  get_y(y_GT)

    pred_clas = y_pred[:20*16]
    pred_box = y_pred[20*16:]
    pred_clas = pred_clas.reshape(-1,20)
    pred_box = pred_box.reshape(-1,4)

    # step 2: we convert activation into box
    a_ic = actn_to_bb(pred_box, anchors)
        
    # step 3: we estimate the overlapp between the activation and the anchor
    overlaps = jaccard(gt_box, anchor_corner)
    
    # step 4: we map with the GT
    prior_overlap = np.amax(overlaps,1) # [20] for each GT, value of tye best overlapp
    prior_idx = np.argmax(overlaps,1) # [20] for each GT, ID of best anchors   
    gt_overlap = np.amax(overlaps,0) # [16] for each cell, ID of the GT with the best overlapp 
    gt_idx = np.argmax(overlaps,0) # [16] for each activation, ID of the GT with the best overlapp   
            
    # We ensure that for each gt box, we select an activation
    gt_overlap[prior_idx] = 1.99
    for i,o in enumerate(prior_idx): gt_idx[o] = i
    
    sel_gt_clas = gt_clas[gt_idx]

    # setp 5: We threshold the overlapp to keep only "active" anchors
    pos = gt_overlap > 0.4
    pos_idx = np.nonzero(pos)[0]
    neg_idx = np.nonzero(1-pos)[0]
    sel_gt_bbox = gt_box[gt_idx]
   
    # step 6: localization Loss   
    loc_loss = np.mean(np.sum(np.abs(a_ic[pos_idx] - sel_gt_bbox[pos_idx]),axis=1))
    
    # step 7: Classification Loss
    # we will use the Binary Cross Entropy as we did in the previous lesson
    # pos_idx is the id of anchor box, all the rest shoudl ba at zero because it does not contain any box
    label = keras.utils.np_utils.to_categorical(sel_gt_clas, 20)
    label[neg_idx,:]=0

    clas_loss = K.eval(K.mean(K.binary_crossentropy(label, pred_clas)))

    
    print('localization Loss:',loc_loss)
    print('classification loss:',clas_loss)
    return clas_loss + 10*loc_loss

def SSD_Loss(y_batch,y_pred):
    loss = 0
    for y_b,y_p in zip(y_batch,y_pred):
        loss += SSD_Loss_1(y_b,y_p)
    
    return loss

In [82]:
SSD_Loss(np.expand_dims(y_GT,0),np.expand_dims(y_pred,0))

In [83]:
loss = SSD_Loss(np.expand_dims(y_GT,0),np.expand_dims(y_pred,0))