# Objective of first fast YOLO inspired network
The first network will generate a square where we will be abble to find the real starfish. It means that I need a very good recall without lose too much accuracy...  
I choose for a fast network to avoid overfitting.  
**Important to notice: it is not a real fast YOLO implementation but just inspired in YOLO.**

# Libraries

In [None]:
# Data analysis and processing
import pandas as pd
import numpy as np

# Tensor processing tool
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow import keras

# Help Functions

In [None]:
def image_path(video_id, video_frame):
    return f"/kaggle/input/tensorflow-great-barrier-reef/train_images/video_{video_id}/{video_frame}.jpg"

In [None]:
def cocoincode(cocobboxdict):
    """COCO incode is a np array like ['x' 'y' 'width' 'height']"""
    return np.array([cocobboxdict['x'], cocobboxdict['y'], cocobboxdict['width'], cocobboxdict['height']], dtype=np.int32)

In [None]:
def coco2yolo(cocobbox):
    """ Transform a COCO np array like ['x' 'y' 'width' 'height'] in a YOLO np array like ['x' 'y' 'width' 'height']"""
    return np.array([(cocobbox[0] + cocobbox[2]//2 + cocobbox[2]%2), (cocobbox[1] + cocobbox[3]//2 + cocobbox[3]%2), cocobbox[2], cocobbox[3]], dtype=np.int32)

In [None]:
def yolo2yolosquared(yolobbox):
    """ Transform a YOLO np array like ['x' 'y' 'width' 'height'] to a YOLO np array like ['x' 'y' 'side']"""
    return np.array([yolobbox[0], yolobbox[1], max(yolobbox[2], yolobbox[3])], dtype=np.int32)

In [None]:
def yolosquared2yolo(yolosquaredbbox):
    """ Transform a YOLO np array like ['x' 'y' 'side'] to a YOLO np array like ['x' 'y' 'width' 'height']"""
    return np.array([yolosquaredbbox[0], yolosquaredbbox[1], yolosquaredbbox[2], yolosquaredbbox[2]], dtype=np.int32)
    

In [None]:
def yolo2coco(yolobboxes):
    pass

In [None]:
from tensorflow.keras.utils import load_img
from tensorflow.keras.utils import img_to_array

def my_load_image(image_path):
    return np.array(img_to_array(load_img(image_path))/255, np.float)

# Training Functions and classes

## YOLO Squared Transformer

In [None]:
class YOLOSquaredTransformer:
    
    def __init__(self, S, B, img_width, img_height):
        self.S = S
        self.B = B
        self.img_width = img_width
        self.img_height = img_height
    
        self.l_x = img_width  // S + (1 if img_width  % S != 0 else 0 )
        self.l_y = img_height // S + (1 if img_height % S != 0 else 0 )
        
        self.img_big_side = max([img_width, img_height])
        
    def batch_from_cocodictlist(self, cocodictlistbatch):
        """
        Receive a batch of cocodictlists where labels are 'x_left', 'y_top', 'width' and 'height' creating a numpy batch 
        output in format of YOLO Squared where shape is (batch_size, S, S, B, 4) being ['prob', 'x_center', 'y_center', 'side'] -> normalized
        """
        batch_size = len(cocodictlistbatch)
    
        bboxes_in_square = np.zeros((batch_size, S, S),dtype=np.int32)
        output = np.zeros((batch_size, S, S, B, 4),dtype=np.float)
        
        for batch_pos, cocodictlist in enumerate(cocodictlistbatch):
            for cocodict in cocodictlist:
                coco = cocoincode(cocodict)
                yolo = coco2yolo(coco)
                yolosquared = yolo2yolosquared(yolo)

                S_x = yolosquared[0] // self.l_x - (1 if yolosquared[0] % self.l_x == 0 else 0 )
                S_y = yolosquared[1] // self.l_y - (1 if yolosquared[1] % self.l_y == 0 else 0 )
                
                if S_x < self.S and S_y < self.S:
                    n = bboxes_in_square[batch_pos ,S_y, S_x]
                    if n < B:
                        output[batch_pos, S_y, S_x, n, 0] = 1
                        output[batch_pos, S_y, S_x, n, 1] = (yolosquared[0] - S_x*self.l_x)/self.l_x
                        output[batch_pos, S_y, S_x, n, 2] = (yolosquared[1] - S_y*self.l_y)/self.l_y
                        output[batch_pos, S_y, S_x, n, 3] = yolosquared[2]/self.img_big_side
                        bboxes_in_square[batch_pos, S_y, S_x] = n + 1
        
        return output
    
    def cocolistbatch_from_yolosquaredbatch(self, y, cut_prob):
        '''
        Take a batch of tensors representing a yolo output (['prob', 'x_center', 'y_center', 'side'] -> normalized)  whit shape: (batch_size, S, S, B, 4) 
        and transforms it in a list of a list of coco values (['prob', 'x_lefttop', 'y_lefttop', 'width', 'height']) with shape: (batch_size, variable, 4)
        where prob is bigger than cut_prob.
        '''
        pass
    
    def iou(self, y_true, y_pred):
        """
        We only know that tensor have rank 5 and the shape is: (batch_size, S, S, B, 4)

        bbox are: ['prob' 'x' 'y' 'side'] -> normalized
        """
        side_scale = max([img_width, img_height])

        l_true = y_true[:,:,:,3:4] * self.img_big_side
        l_pred = y_pred[:,:,:,3:4] * self.img_big_side

        x_true = y_true[:,:,:,1:2] * self.l_x
        x_pred = y_pred[:,:,:,1:2] * self.l_x

        yc_true = y_true[:,:,:,2:3] * self.l_y
        yc_pred = y_pred[:,:,:,2:3] * self.l_y
        
        dx = tf.math.abs(x_true - x_pred)
        dy = tf.math.abs(yc_true - yc_pred)
        
        l_x = tf.math.maximum(tf.math.maximum(l_true/2 + l_pred/2 + dx, l_true), l_pred)
        l_y = tf.math.maximum(tf.math.maximum(l_true/2 + l_pred/2 + dy, l_true), l_pred)
        
        
        I_area = tf.nn.relu(l_true + l_pred - l_x) * tf.nn.relu(l_true + l_pred - l_x)
        
        U_area = tf.pow(l_true, 2) + tf.pow(l_pred, 2) - I_area

        IoU = I_area/U_area
        
        return IoU

## Batch generator

In [None]:
from tensorflow.keras.utils import Sequence
from json import loads

class YoloSquaredSequence(Sequence):

    def __init__(self, train_df, batch_size, transformer):
        self.train_df = train_df
        self.batch_size = batch_size
        self.transformer = transformer

    def __len__(self):
        return len(self.train_df) // self.batch_size

    def __getitem__(self, idx):
        batch_df = self.train_df[idx*self.batch_size : (idx + 1 )*(self.batch_size)]
        
        X = self.get_x(batch_df)
        
        y = self.get_y(batch_df)

        return X, y
    
    def get_x(self, batch_df):
        return np.array([my_load_image(image_path) for image_path in batch_df['image_path']])
    
    def get_y(self, batch_df):
        cocodictlistbatch = [loads(annotations.replace("'",'"')) for annotations in batch_df['annotations']]
        
        return self.transformer.batch_from_cocodictlist(cocodictlistbatch)

## Loss Function

In [None]:
from tensorflow.keras.losses import Loss

class SquaredYoloLoss(Loss):
    """We only know that tensor have rank 5 and the shape is: (batch_size, S, S, B, 4)"""
    def __init__(self, lambda_coord, lambda_noobj, transformer, *args, **kwargs):
        self.lambda_coord = lambda_coord
        self.lambda_noobj = lambda_noobj
        self.transformer = transformer
        
        super(SquaredYoloLoss, self).__init__(*args, **kwargs)
        
    def call(self, y_true, y_pred):
        # Necessario achar qual objeto é par de qual...
        
        
        # Look at y_true if we have bbox
        obj_exists = y_true[:,:,:,0:1]
        obj_noexists = 1 - obj_exists
        
        iou = self.transformer.iou(y_true, y_pred)
        
        sum_exists_coord = self.lambda_coord * tf.reduce_sum(obj_exists * (tf.pow(y_pred[:,:,:,1:4] - y_true[:,:,:,1:4],2))) + \
                           tf.reduce_sum(tf.nn.relu(y_true[:,:,:,3:4] - y_pred[:,:,:,3:4]))
            
        sum_predict = tf.reduce_sum( obj_exists * tf.pow(y_pred[:,:,:,0:1] - iou, 2) )
        
        sum_noexists = self.lambda_noobj * tf.reduce_sum(obj_noexists * (tf.pow(y_pred[:,:,:,0:1],2)))
        
        return sum_exists_coord + sum_predict + sum_noexists

## Metric Function

In [None]:
from tensorflow.keras.metrics import Metric

class YOLOSquaredMetric(Metric):
    def __init__(self, transformer, name='YOLO Recall Squared Metric', **kwargs):
        super(YOLOSquaredMetric, self).__init__(name=name, **kwargs)
        self.recall = self.add_weight(name='yoloRecall', initializer='zeros')
        self.objects = self.add_weight(name='objects', initializer='zeros')
        self.finds = self.add_weight(name='finds', initializer='zeros')
        self.transformer = transformer
        
    def update_state(self, y_true, y_pred, sample_weight=None):
        obj_exists = y_true[:,:,:,0:1]
        iou = self.transformer.iou(y_true, y_pred)
        
        self.objects = tf.reduce_sum(obj_exists)
        
    def result(self):
        return self.objects

# Training Specifications

## Constants - Hyperparameters

In [None]:
# grid size SxS:
S = 7

# bboxes per cell in grid
B = 2

# batch size
batch_size = 40

# image shape
img_shape = (720, 1280, 3)
img_height = img_shape[0]
img_width = img_shape[1]

## Preprocessing data to train 

In [None]:
def load_train_df():
    # Load train.csv:
    complete_train_df = pd.read_csv('/kaggle/input/tensorflow-great-barrier-reef/train.csv')
    
    # Only use images where we have starfish
    complete_train_df = complete_train_df[complete_train_df['annotations'] != '[]']
    
    # Create column image_path -> too expensive in memory
    complete_train_df['image_path'] = complete_train_df.apply(lambda row : image_path(row['video_id'], row['video_frame']), axis=1)
    
    #Shuffle dataframe
    complete_train_df = complete_train_df.sample(frac=1)
    return complete_train_df

train_df = load_train_df()

## Model Definition

In [None]:
transformer = YOLOSquaredTransformer(S, B, img_width, img_height)

In [None]:
batch_sequence = YoloSquaredSequence(train_df, batch_size, transformer)

In [None]:
model = keras.models.Sequential([
    keras.layers.Conv2D(128, 9, strides=(2,4), activation="relu", padding="same", input_shape=img_shape),
    keras.layers.MaxPooling2D(2),
    keras.layers.Conv2D(128, 3, activation="relu", padding="same"),
    keras.layers.MaxPooling2D(2),
    keras.layers.Conv2D(128, 1, activation="relu", padding="same"),
    keras.layers.MaxPooling2D(2),
    keras.layers.Conv2D(128, 3, activation="relu", padding="same"),
    keras.layers.MaxPooling2D(2),
    keras.layers.Conv2D(256, 1, activation="relu", padding="same"),
    keras.layers.Conv2D(256, 3, activation="relu", padding="same"),
    keras.layers.MaxPooling2D(2),
    keras.layers.Flatten(),
    keras.layers.Dense(S*S*B*4, activation="relu"),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(S*S*B*4, activation="sigmoid"),
    keras.layers.Reshape((S,S,B,4))
])

In [None]:
model.summary()

In [None]:
loss_function = SquaredYoloLoss(5, 0.5, transformer)

In [None]:
yolo_recall = YOLOSquaredMetric(transformer)

In [None]:
model.compile(optimizer="rmsprop",
    loss=loss_function,
    metrics=["accuracy",yolo_recall])

In [None]:
model.fit(x=batch_sequence,
    batch_size=batch_size,
    epochs=30)

In [None]:
model.save("fastLazyYOLO")