# Libraries used:

numpy 1.19.5
<br>
tensorflow 2.4.1
<br>
pandas 1.2.4
<br>
matplotlib 3.3.4
<br>
tensorflow_addons 0.12.1 

In [1]:
import numpy as np
import os
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow_addons as tfa

In [2]:
tf.random.set_seed(1234)

# Data processing and augmentation

In [3]:
df=pd.read_csv('Train.csv')
df.head(5)

Unnamed: 0,Image_ID,class,xmin,ymin,width,height
0,ID_007FAIEI,fruit_woodiness,87.0,87.5,228.0,311.0
1,ID_00G8K1V3,fruit_brownspot,97.5,17.5,245.0,354.5
2,ID_00WROUT9,fruit_brownspot,156.5,209.5,248.0,302.5
3,ID_00ZJEEK3,fruit_healthy,125.0,193.0,254.5,217.0
4,ID_018UIENR,fruit_brownspot,79.5,232.5,233.5,182.0


In [4]:
fnames=[]
classes=[]
bbox=[]
for i in range(len(df)):
    fnames.append('Train_Images\\{}.jpg'.format(df['Image_ID'][i]))
    
    classes.append(np.array([1,0]) )   
    
    bbox_coordinates=np.array([df['xmin'][i]+0.5*df['width'][i],df['ymin'][i]+0.5*df['height'][i],
                               df['width'][i],df['height'][i]])
    bbox.append(bbox_coordinates.astype('float32'))
    

In [5]:
fname_p=fnames[0]
fnames_sorted=[]
fnames_count=[]
count=0
for i in range(1,len(fnames)):
    count=count+1
    fname_i=fnames[i]
    if fname_i!=fname_p:
        fnames_sorted.append(fname_p)
        fname_p=fname_i
        fnames_count.append(count)
        count=0

fnames_appearing_once=np.array(fnames_sorted)[np.where(np.array(fnames_count)==1)[0]]
fnames_appearing_once_indexes=[]
for i in range(len(fnames_appearing_once)):
    fnames_appearing_once_indexes.append(fnames.index(fnames_appearing_once[i]))
    
    
bboxes_once=[]
classes_once=[]
for index in np.array(fnames_appearing_once_indexes,dtype='int'):
    bboxes_once.append(bbox[index])
    classes_once.append(classes[index])

In [6]:
ds_size=len(fnames_appearing_once)

fnames_dataset = tf.data.Dataset.from_tensor_slices(fnames_appearing_once)
classes_dataset = tf.data.Dataset.from_tensor_slices(classes_once)
bbox_dataset = tf.data.Dataset.from_tensor_slices(bboxes_once)

ds = tf.data.Dataset.zip((fnames_dataset, classes_dataset, bbox_dataset))

In [7]:
train_ratio = 0.80
ds_train1=ds.take(int(ds_size*train_ratio))
ds_test1=ds.skip(int(ds_size*train_ratio))

In [8]:
#background data
@tf.function
def map_fn(fn, arrays, dtype=tf.float32):
    indices = tf.range(tf.shape(arrays[0])[0])
    out = tf.map_fn(lambda ii: fn(*[array[ii] for array in arrays]), indices, 
                    fn_output_signature=dtype)
    return out

@tf.function
def IoU(inputs):
    anchor=inputs[:,:4]
    target=inputs[:,4:]
    
    gl = tfa.losses.GIoULoss(mode = 'iou') 
    
    boxes1 = tf.stack([anchor[:,1]-0.5*anchor[:,3], anchor[:,0]-0.5*anchor[:,2], 
                          anchor[:,1]+0.5*anchor[:,3], anchor[:,0]+0.5*anchor[:,2]],axis=-1)
    boxes2 = tf.stack([target[:,1]-0.5*target[:,3], target[:,0]-0.5*target[:,2], 
                          target[:,1]+0.5*target[:,3], target[:,0]+0.5*target[:,2]],axis=-1)
    loss = map_fn(gl, (boxes1, boxes2))
    return 1-loss

def background_data(bbox):
    corner_bbox1=tf.stack([0.5*bbox[2], 0.5*bbox[3], bbox[2], bbox[3]], axis=-1)
    corner_bbox2=tf.stack([0.5*bbox[2], 512.-0.5*bbox[3], bbox[2], bbox[3]], axis=-1)
    corner_bbox3=tf.stack([512.-0.5*bbox[2], 0.5*bbox[3], bbox[2], bbox[3]], axis=-1)
    corner_bbox4=tf.stack([512.-0.5*bbox[2], 512.-0.5*bbox[3], bbox[2], bbox[3]], axis=-1)
    
    corner_bboxes=tf.stack([corner_bbox1,corner_bbox2,corner_bbox3,corner_bbox4], axis=0)
    
    bboxes_original=tf.tile(tf.expand_dims(bbox,axis=0), tf.constant([4,1]))
    
    IoU_list=IoU(tf.concat([bboxes_original,corner_bboxes], axis=-1))
    
    background_bbox=tf.gather(corner_bboxes,tf.squeeze(tf.where(IoU_list<0.1),axis=-1),axis=0)
    
    return tf.unstack(background_bbox,axis=0)

In [9]:
bbox_bg_train=[]
fnames_bg_train=[]
classes_bg_train=[]

for fnames, classes,bbox in ds_train1:
    bbox_bg_train.extend([bbox])
    fnames_bg_train.extend([fnames])
    classes_bg_train.extend([classes])
    background_bboxes=background_data(bbox)
    if len(background_bboxes)!=0:
        bbox_bg_train.extend(background_bboxes)
        fnames_bg_train.extend(tf.unstack(tf.repeat(fnames,len(background_bboxes))))
        classes_bg_train.extend(tf.unstack(tf.repeat(tf.constant([[0,1]]),len(background_bboxes), axis=0)))
        
print(len(fnames_bg_train))            
fnames_bg_train_dataset = tf.data.Dataset.from_tensor_slices(fnames_bg_train)
classes_bg_train_dataset = tf.data.Dataset.from_tensor_slices(classes_bg_train)
bbox_bg_train_dataset = tf.data.Dataset.from_tensor_slices(bbox_bg_train)    

ds_train1 = tf.data.Dataset.zip((fnames_bg_train_dataset, classes_bg_train_dataset, 
                                   bbox_bg_train_dataset))


bbox_bg_test=[]
fnames_bg_test=[]
classes_bg_test=[]
for fnames, classes,bbox in ds_test1:
    bbox_bg_test.extend([bbox])
    fnames_bg_test.extend([fnames])
    classes_bg_test.extend([classes])
    background_bboxes=background_data(bbox)
    if len(background_bboxes)!=0:
        bbox_bg_test.extend(background_bboxes)
        fnames_bg_test.extend(tf.unstack(tf.repeat(fnames,len(background_bboxes))))
        classes_bg_test.extend(tf.unstack(tf.repeat(tf.constant([[0,1]]),len(background_bboxes), axis=0)))

print(len(fnames_bg_test))          
fnames_bg_test_dataset = tf.data.Dataset.from_tensor_slices(fnames_bg_test)
classes_bg_test_dataset = tf.data.Dataset.from_tensor_slices(classes_bg_test)
bbox_bg_test_dataset = tf.data.Dataset.from_tensor_slices(bbox_bg_test)    

ds_test1 = tf.data.Dataset.zip((fnames_bg_test_dataset, classes_bg_test_dataset, 
                                   bbox_bg_test_dataset))


6138
1544


Augmentation: rotate images and bounding boxes by 90, 180 and 270 degrees

In [10]:
def bbox_rotation(bbox,deg):
    bbox=bbox.numpy()
    #deg 1: 90, 2:180, 3: 270
    if deg==0:
        return tf.constant(bbox)
    if deg==3:
        return tf.constant([512-bbox[1],bbox[0],bbox[3],bbox[2]],dtype='float32')
    
    elif deg==2:
        return tf.constant([512-bbox[0],512-bbox[1],bbox[2],bbox[3]], dtype='float32')

    elif deg==1:
        return tf.constant([bbox[1],512-bbox[0],bbox[3],bbox[2]], dtype='float32')
    else:
        return NotImplemented



IMG_SIZE=512
def process_img(img, deg):
    image = tf.io.read_file(img)
    image = tf.image.decode_jpeg(image, channels=3) 
    image = tf.image.convert_image_dtype(image, tf.float32) 
    image = tf.image.resize(image, [IMG_SIZE, IMG_SIZE])
    image=tfa.image.rotate(image,tf.constant(np.pi*0.5*deg))
    return image

def read_images(fnames, classes, bbox, deg):
    img = process_img(fnames, deg)
    bbox=bbox_rotation(bbox,deg)
    return img, classes, bbox

In [11]:
ds_train=ds_train1.map(lambda img, classes, bbox: tf.py_function(func=read_images,
          inp=[img, classes, bbox, 0.], Tout=(tf.float32,tf.int32,tf.float32)),
          num_parallel_calls=tf.data.AUTOTUNE,
          deterministic=True)

ds_train_rotate1=ds_train1.map(lambda img, classes, bbox: tf.py_function(func=read_images,
          inp=[img, classes, bbox, 1.], Tout=(tf.float32,tf.int32,tf.float32)),
          num_parallel_calls=tf.data.AUTOTUNE,
          deterministic=True)

ds_train_rotate2=ds_train1.map(lambda img, classes, bbox: tf.py_function(func=read_images,
          inp=[img, classes, bbox, 2.], Tout=(tf.float32,tf.int32,tf.float32)),
          num_parallel_calls=tf.data.AUTOTUNE,
          deterministic=True)

ds_train_rotate3=ds_train1.map(lambda img, classes, bbox: tf.py_function(func=read_images,
          inp=[img, classes, bbox, 3.], Tout=(tf.float32,tf.int32,tf.float32)),
          num_parallel_calls=tf.data.AUTOTUNE,
          deterministic=True)


ds_train=ds_train.concatenate(ds_train_rotate1)
ds_train=ds_train.concatenate(ds_train_rotate2)
ds_train=ds_train.concatenate(ds_train_rotate3)

ds_test=ds_test1.map(lambda img, classes, bbox: tf.py_function(func=read_images,
          inp=[img, classes, bbox, 0.], Tout=(tf.float32,tf.int32,tf.float32)),
          num_parallel_calls=tf.data.AUTOTUNE,
          deterministic=True)

ds_test_rotate1=ds_test1.map(lambda img, classes, bbox: tf.py_function(func=read_images,
          inp=[img, classes, bbox, 1.], Tout=(tf.float32,tf.int32,tf.float32)),
          num_parallel_calls=tf.data.AUTOTUNE,
          deterministic=True)

ds_test_rotate2=ds_test1.map(lambda img, classes, bbox: tf.py_function(func=read_images,
          inp=[img, classes, bbox, 2.], Tout=(tf.float32,tf.int32,tf.float32)),
          num_parallel_calls=tf.data.AUTOTUNE,
          deterministic=True)

ds_test_rotate3=ds_test1.map(lambda img, classes, bbox: tf.py_function(func=read_images,
          inp=[img, classes, bbox, 3.], Tout=(tf.float32,tf.int32,tf.float32)),
          num_parallel_calls=tf.data.AUTOTUNE,
          deterministic=True)

ds_test=ds_test.concatenate(ds_test_rotate1)
ds_test=ds_test.concatenate(ds_test_rotate2)
ds_test=ds_test.concatenate(ds_test_rotate3)

In [12]:
ds_train_final = ds_train.map(lambda a, b, c: (a, (b,c)))
ds_test_final = ds_test.map(lambda a, b, c: (a, (b,c)))

# Model building

In [13]:
@tf.function
def BGCN_training_loss(class_pred, class_t):
    
    cce = tf.keras.losses.CategoricalCrossentropy()
    class_loss=cce(class_t, class_pred)
    
    return class_loss
    

## Background Classifier network

In [14]:
base_model_bg = tf.keras.applications.VGG16(
                weights='imagenet', 
                input_shape=(64, 64, 3), 
                include_top=False)

#conv1=tf.keras.layers.Conv2D(512, (3,3), activation='relu',padding='same')(base_model.layers[-6].output)

flatten1_bg=tf.keras.layers.Flatten()(base_model_bg.output)
#bn1=tf.keras.layers.BatchNormalization()(flatten1)
dense1_bg=tf.keras.layers.Dense(1024, activation='relu')(flatten1_bg)
bn2_bg=tf.keras.layers.BatchNormalization()(dense1_bg)
dense2_bg=tf.keras.layers.Dense(512, activation='relu')(bn2_bg)
       
box_cls_bg=tf.keras.layers.Dense(2, activation='softmax')(dense2_bg)

bgcn_model=tf.keras.models.Model(inputs=base_model_bg.inputs, outputs=box_cls_bg)
        

In [15]:
class BGCN(tf.keras.models.Model):
    def __init__(self):
        super(BGCN, self).__init__()
                 

        self.base_model = bgcn_model 
        
        #self.base_model.trainable=False
            
        for i in range(14):
            self.base_model.layers[i].trainable=False

    def call(self, image):
        image=tf.reverse(image, axis=[-1])
        image=tf.image.per_image_standardization(image)
        
        x = self.base_model(image, training=False)
        
        return x
    
    @tf.function
    def prediction(self, image_input, bbox):
        bbox_cropped_coordinates=tf.stack([bbox[:,1]-0.5*bbox[:,3], 
                                                bbox[:,0]-0.5*bbox[:,2], 
                          bbox[:,1]+0.5*bbox[:,3], bbox[:,0]
                                                +0.5*bbox[:,2]],axis=-1)/512
        
        img_cropped=tf.image.crop_and_resize(
            image_input, bbox_cropped_coordinates, 
            box_indices=tf.range(0, tf.shape(bbox)[0], 1), crop_size=[64,64])
        
        
        image=tf.reverse(img_cropped, axis=[-1])
        image=tf.image.per_image_standardization(image)
        
        x = self.base_model(image, training=False)
        
        return x
    
    #@tf.function
    def train_step(self, data):
        
        inputs= data[0]
        targets= data[1]
        
        classes=targets[0]
        bbox=targets[1]
        
        bbox_cropped_coordinates=tf.stack([bbox[:,1]-0.5*bbox[:,3], 
                                                bbox[:,0]-0.5*bbox[:,2], 
                          bbox[:,1]+0.5*bbox[:,3], bbox[:,0]
                                                +0.5*bbox[:,2]],axis=-1)/512
        
        img_cropped=tf.image.crop_and_resize(
            inputs, bbox_cropped_coordinates, 
            box_indices=tf.range(0, tf.shape(bbox)[0], 1), crop_size=[64,64])
        
        
        with tf.GradientTape() as tape:
            class_pred1 = self(img_cropped)
            
            loss_value = BGCN_training_loss(class_pred1, classes)
        
        grads = tape.gradient(loss_value, self.trainable_variables)
        self.optimizer.apply_gradients(zip(grads, self.trainable_variables))
        self.compiled_metrics.update_state(classes, class_pred1)
       
        return {m.name: m.result() for m in self.metrics}#{'loss_value': loss_value} 
    
    
    def test_step(self, data):
        inputs= data[0]
        targets= data[1]
        
        classes=targets[0]
        bbox=targets[1]
        
        bbox_cropped_coordinates=tf.stack([bbox[:,1]-0.5*bbox[:,3], 
                                                bbox[:,0]-0.5*bbox[:,2], 
                          bbox[:,1]+0.5*bbox[:,3], bbox[:,0]
                                                +0.5*bbox[:,2]],axis=-1)/512
        
        img_cropped=tf.image.crop_and_resize(
            inputs, bbox_cropped_coordinates, 
            box_indices=tf.range(0, tf.shape(bbox)[0], 1), crop_size=[64,64])
        
        
        class_pred1 = self(img_cropped)
        
        self.compiled_metrics.update_state(classes, class_pred1)
        
        return {m.name: m.result() for m in self.metrics}
        

In [16]:
bgcn1=BGCN()
inputs = tf.keras.Input(shape=(64, 64, 3))
bgcn1(inputs)

bgcn1.base_model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 64, 64, 3)]       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 64, 64, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 64, 64, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 32, 32, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 32, 32, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 32, 32, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 16, 16, 128)       0     

# Training:
To train the classifier, uncomment the following block. This will reproduce and save the weight files in the 'checkpoints' folder. If the cell is not run, the weights will be loaded from the 'checkpoints' folder.

In [17]:
# ds_batched = ds_train_final.batch(100, drop_remainder=True)
# ds_batched_test = ds_test_final.batch(100, drop_remainder=True)

# bgcn1=BGCN()
# bgcn1.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss=None,
#            metrics=[tf.keras.metrics.CategoricalAccuracy()])


# bgcn1.fit(x=ds_batched,  shuffle=True, epochs=1, 
#     validation_data=ds_batched_test, validation_steps=50)


# bgcn1.save_weights('./checkpoints/BGCN_9600_mini_batches_adam_0.0001_1epoch_trainable_vgg')

In [18]:
ds_batched = ds_train_final.batch(100, drop_remainder=True)
ds_batched_test = ds_test_final.batch(100, drop_remainder=True)

bgcn1=BGCN()
bgcn1.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss=None,
           metrics=[tf.keras.metrics.CategoricalAccuracy()])
bgcn1.load_weights('./checkpoints/BGCN_9600_mini_batches_adam_0.0001_1epoch_trainable_vgg') 

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x2f5cb1240a0>