The purpose of this Notebook is a to test a homemade Yolo like model.

In [None]:
import numpy as np # linear algebra
import random
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import ast
import os
from tqdm import tqdm
tqdm.pandas()
import seaborn as sns
from PIL import Image
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import keras.backend as K
from sklearn.model_selection import train_test_split

# 1. Exploration

In [None]:
trainpath = '../input/tensorflow-great-barrier-reef/train.csv'
trainDF=pd.read_csv(trainpath)
trainDF.head(20)

In [None]:
trainDF.info()

In [None]:
trainDF['annotations'] = trainDF['annotations'].progress_apply(lambda x: ast.literal_eval(x))
trainDF['nb_bbox']=trainDF['annotations'].apply(lambda x:len(x))

In [None]:
def get_bboxes(list_dict):
    result = []
    for dict in list_dict:
        bbox = (dict['x'],dict['y'],dict['width'],dict['height'])
        result.append(bbox)
    return result

trainDF['bboxes']=trainDF['annotations'].progress_apply(get_bboxes)

In [None]:
trainDF['path']='../input/tensorflow-great-barrier-reef/train_images/video_'+trainDF['video_id'].map(str)+'/'+trainDF['video_frame'].map(str)+'.jpg'

In [None]:
trainDF.head(50)

In [None]:
sns.histplot(trainDF,x='nb_bbox')

In [None]:
def plotbbox(video,imageid):
    path = '../input/tensorflow-great-barrier-reef/train_images/video_'+str(video)+'/'+str(imageid)+'.jpg'
    img = Image.open(path)
    bboxes = trainDF[(trainDF['video_id']==video)&(trainDF['video_frame']==imageid)].iloc[0]['bboxes']
    plt.figure(figsize=(20,20))
    plt.imshow(img)
    ax = plt.gca()
    for bbox in bboxes:
        rect = Rectangle((bbox[0],bbox[1]),bbox[2],bbox[3],linewidth=5,edgecolor='r',facecolor='none')
        ax.add_patch(rect)
    print(img.size)
    
plotbbox(0,49)

In [None]:
def coco2yolo(bbox):
    x = bbox[0]
    y = bbox[1]
    w = bbox[2]
    h = bbox[3]
    xm = x+w/2
    ym = y+h/2
    return (xm,ym,w,h)

def yolo2coco(bbox):
    xm = bbox[0]
    ym = bbox[1]
    w = bbox[2]
    h = bbox[3]
    x = xm-w/2
    y = ym-h/2
    return (x,y,w,h)

# 2. Create dataset

In [None]:
IMG_SIZE = (720,1280)
IMG_SHAPE = (720,1280,3)
NB_I = 7
NB_J = 7
NB_CELLS = 7
NB_CLASS = 0 # ==> not used 
NB_BOX = 1
TENSOR_DEPTH = NB_BOX*5+NB_CLASS
BATCH_SIZE = 32

In [None]:
trainDF=pd.read_csv(trainpath)
trainDF['path']='../input/tensorflow-great-barrier-reef/train_images/video_'+trainDF['video_id'].map(str)+'/'+trainDF['video_frame'].map(str)+'.jpg'

In [None]:
trainDF['listbbox']=trainDF['annotations'].progress_apply(lambda x: ast.literal_eval(x))
trainDF['nbboxes']=trainDF['listbbox'].progress_apply(lambda x: len(x))
trainDF=trainDF[trainDF['nbboxes']>0]
trainDF.describe()

In [None]:
X,y = trainDF['path'],trainDF['annotations']
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2)

In [None]:
# X_train = trainDF[trainDF['video_id'].isin([0,1])]['path']
# y_train = trainDF[trainDF['video_id'].isin([0,1])]['annotations']

# X_test = trainDF[trainDF['video_id']==2]['path']
# y_test = trainDF[trainDF['video_id']==2]['annotations']

In [None]:
y_train.head(10)

In [None]:
y_test_init = y_test

In [None]:
def label_encoder(bboxes):
    label_matrix = np.zeros([NB_I,NB_J,TENSOR_DEPTH])
    xsize = IMG_SIZE[0]/NB_I
    ysize = IMG_SIZE[1]/NB_J
    for bbox in bboxes:
        bbox = coco2yolo(bbox)
        xidx = int(bbox[0]/xsize)
        xidx = min(NB_I-1,xidx)
        yidx = int(bbox[1]/ysize)
        yidx = min(NB_J-1,yidx)
        x = bbox[0]/xsize-xidx
        y = bbox[1]/ysize-yidx
        w = bbox[2]/IMG_SIZE[0]
        w = np.sqrt(w)
        h = bbox[3]/IMG_SIZE[1]
        h = np.sqrt(h)
  #      print('indexes: ',xidx,yidx)
  #      print('bbox: ',bbox)
        if label_matrix[xidx,yidx,0]==0:
            label_matrix[xidx,yidx,0]=1
            label_matrix[xidx,yidx,1:5]=[x,y,w,h]
    label_tensor = label_matrix # tf.convert_to_tensor(label_matrix, np.float32)
#    print("label encode")
    return label_tensor

In [None]:
def prepare_y(annotations):
    annotations = ast.literal_eval(annotations)
    bboxes = get_bboxes(annotations)
    label_tensor = label_encoder(bboxes)
    return label_tensor

In [None]:
y_train = y_train.progress_apply(prepare_y)
y_test = y_test.progress_apply(prepare_y)

In [None]:
y_train = np.stack(y_train.to_numpy())
y_test = np.stack(y_test.to_numpy())
print('y_train.shape: ',y_train.shape)
print('y_test.shape: ',y_test.shape)

In [None]:
@tf.function
def image_loader(path):
    Image_string = tf.io.read_file(path)
    Image = tf.image.decode_jpeg(Image_string, channels=3)
    return Image

@tf.function
def data_flipper(img,label_matrix):
    reverse_img = tf.image.flip_left_right(img)
    #reverse_matrix = np.zeros([NB_I,NB_J,TENSOR_DEPTH])
    C = label_matrix[...,0]
    X = label_matrix[...,1]
    Y = label_matrix[...,2]
    W = label_matrix[...,3]
    H = label_matrix[...,4]
    X = 1-X
    X = X*C
    temp = tf.stack([C,X,Y,W,H],axis=-1)
    reverse_matrix = tf.reverse(temp,[0])
    return reverse_img, reverse_matrix
    
@tf.function
def prepare_data(path,label_matrix):
    img = image_loader(path)
    if random.random()<0.5: img, label_matrix = data_flipper(img, label_matrix)
    return img,label_matrix

In [None]:
train_ds = tf.data.Dataset.from_tensor_slices((X_train,y_train)).map(prepare_data).shuffle(100).batch(BATCH_SIZE).prefetch(2) #Test si ça marche en cache
validation_ds = tf.data.Dataset.from_tensor_slices((X_test,y_test)).map(prepare_data).batch(BATCH_SIZE).prefetch(2)

In [None]:
def label_decoder(label_matrix, threshold = 0.5):
    
    xsize = IMG_SIZE[0]/NB_I
    ysize = IMG_SIZE[1]/NB_J
    bboxes = []
    
    for i in range(NB_I):
        for j in range(NB_J):
            if label_matrix[i,j,0]>threshold:
                x = label_matrix[i,j,1]
                y = label_matrix[i,j,2]
                x = (i+x)*xsize
                y = (j+y)*ysize
                w = label_matrix[i,j,3]
                w = w*w*IMG_SIZE[0]
                h = label_matrix[i,j,4]
                h = h*h*IMG_SIZE[1]
                bbox=(x,y,w,h)
                bbox = yolo2coco(bbox)
                bboxes.append(bbox)
    
    return bboxes


def label_submitter(label_matrix, threshold = 0.5):
    
    xsize = IMG_SIZE[0]/NB_I
    ysize = IMG_SIZE[1]/NB_J
    bboxes = []
    
    for i in range(NB_I):
        for j in range(NB_J):
            if label_matrix[i,j,0]>threshold:
                x = label_matrix[i,j,1]
                y = label_matrix[i,j,2]
                x = (i+x)*xsize
                y = (j+y)*ysize
                w = label_matrix[i,j,3]
                w = w*w*IMG_SIZE[0]
                h = label_matrix[i,j,4]
                h = h*h*IMG_SIZE[1]
                c = label_matrix[i,j,0]
                bbox=(x,y,w,h)
                bbox = yolo2coco(bbox)
                wc = min(IMG_SIZE[0]-bbox[0]-1,bbox[2])
                hc = min(IMG_SIZE[1]-bbox[1]-1,bbox[3])
                bbox = (c,int(bbox[0]),int(bbox[1]),int(wc),int(hc))
                bboxes.append(bbox)
    
    return bboxes

In [None]:
img,lbl = next(iter(train_ds))
print(img.shape)
print(lbl.shape)
# print('X: ',img[17])
# print('------------------------------------------------------------------------------------')
# print('y: ',lbl[2])
pred_bboxes = label_decoder(lbl[2,:,:,:],threshold=0.1)
plt.figure(figsize=(20,20))
plt.imshow(img.numpy()[31,:,:,:].astype('uint8'))
ax = plt.gca()
for i,bbox in enumerate(pred_bboxes):
    rect = Rectangle((bbox[0],bbox[1]),bbox[2],bbox[3],linewidth=5,edgecolor='r',facecolor='none')
    ax.add_patch(rect)
    ax.text(bbox[0],bbox[1],i)
    print('bbox ',i,' at {:.2f} {:.2f}'.format(bbox[0].numpy(),bbox[1].numpy()))
# plt.imshow(img.numpy()[31,:,:,:].astype('uint8'))

In [None]:
testbb=[(10,20,30,40)]
label_submitter(label_encoder(testbb))

In [None]:
bboxes = label_decoder(lbl[2,:,:,:].numpy())
bboxes

In [None]:
def ploter(img,lbl):
    #img,lbl=prepare_data(path,label_matrix)
    bboxes=label_decoder(lbl)
    print(bboxes)
    plt.figure(figsize=(20,20))
    plt.imshow(img.numpy().astype('uint8'))
    ax=plt.gca()
    for i,bbox in enumerate(bboxes):
        rect = Rectangle((bbox[0],bbox[1]),bbox[2],bbox[3],linewidth=5,edgecolor='r',facecolor='none')
        ax.add_patch(rect)
        ax.text(bbox[0],bbox[1],i)

In [None]:
#ploter(image_loader(X_train[21]),y_train[21])

In [None]:
#a,b=data_flipper(image_loader(X_train[21]),y_train[21])
#ploter(a,b.numpy())

In [None]:
#b

# 3. Create a Yolo model
Will be based on Yolo_v1  

Label encoder and decoder is done  
Model is done  
Loss function is done  
To do = dataset preparation  

In [None]:
strategy = 'no re-use'

In [None]:
def get_model(input_shape):
    inputs = keras.Input(input_shape)

    x = layers.Rescaling(scale = 1/127.5, offset=-1)(inputs)
    
    x = layers.Conv2D(filters=32, kernel_size=3, activation="relu")(x)
    x = layers.MaxPool2D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)
    
    x = layers.Conv2D(filters=32, kernel_size=3, activation="relu")(x)
    x = layers.MaxPool2D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)
    
    x = layers.Conv2D(filters=64, kernel_size=3, activation="relu")(x)
    x = layers.MaxPool2D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.01)(x)
    
    x = layers.Conv2D(filters=128, kernel_size=3, activation="relu")(x)
    x = layers.MaxPool2D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.02)(x)

    x = layers.Conv2D(filters=256, kernel_size=3, activation="relu")(x)
    x = layers.MaxPool2D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.03)(x)

    x = layers.Conv2D(filters=512, kernel_size=3, activation="relu")(x)
    x = layers.MaxPool2D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.04)(x)

    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(units=1024, activation="relu")(x)
    x = layers.Dropout(0.2)(x)
    
    x = layers.Dense(units=NB_I*NB_J*TENSOR_DEPTH, activation="sigmoid")(x) # We get the number of output we need
    outputs = layers.Reshape((NB_I,NB_J,TENSOR_DEPTH))(x) # We reshape to fit with label shape

    model = keras.Model(inputs, outputs, name="yolo_JB")
    model.summary()
    
    return model

In [None]:
def get_model_transfer_learning(input_shape):
    
    base_model = keras.applications.Xception(
        weights='imagenet',
        input_shape = input_shape,
        include_top = False)
    
    base_model.trainable = False
    
    inputs = keras.Input(input_shape)
    x = layers.Rescaling(scale = 1/127.5, offset=-1)(inputs)
    
    x = base_model(x, training = False)
    
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(units=1024, activation="relu")(x)
    x = layers.Dropout(0.2)(x)
    
    x = layers.Dense(units=NB_I*NB_J*TENSOR_DEPTH, activation="sigmoid")(x) # We get the number of output we need
    outputs = layers.Reshape((NB_I,NB_J,TENSOR_DEPTH))(x) # We reshape to fit with label shape

    model = keras.Model(inputs, outputs, name="yolo_JB")
    model.summary()
    
    return model

In [None]:
# yolo_JB = get_model(IMG_SHAPE)
yolo_JB = get_model_transfer_learning(IMG_SHAPE)

In [None]:
yolo_JB.output_shape

In [None]:
def yoloLoss(y_true,y_pred):
    
    # responseMask is our 1ij object
    responseMask = y_true[...,:NB_BOX]
    noResponseMask = 1-responseMask
    
    # position loss
    posLoss = K.sum(responseMask*K.square(y_true[...,NB_BOX:]-y_pred[...,NB_BOX:]))
    
    # confidence loss
    confLossObj = K.sum(responseMask*K.square(y_true[...,:NB_BOX]-y_pred[...,:NB_BOX]))
    confLossNoObj = K.sum(noResponseMask*K.square(y_true[...,:NB_BOX]-y_pred[...,:NB_BOX]))
    
    # lbda parameters
    lbda_coord = 5
    lbda_noobj = 0.5
    
    # Total loss
    loss = lbda_coord * posLoss + confLossObj + lbda_noobj * confLossNoObj
    
    return loss
    

In [None]:
def myMetrics(y_true,y_pred):
    
    xc_true = y_true[:,:,:,1]
    xc_pred = y_pred[:,:,:,1]
    yc_true = y_true[:,:,:,2]
    yc_pred = y_pred[:,:,:,2]
    
    w_true = y_true[:,:,:,3]
    w_pred = y_pred[:,:,:,3]
    h_true = y_true[:,:,:,4]
    h_pred = y_pred[:,:,:,4]
    
    # Yolo 2 Coco
    
    xa_true = xc_true - w_true/2
    xb_true = xc_true + w_true/2
    ya_true = yc_true - h_true/2
    yb_true = yc_true + h_true/2

    xa_pred = xc_pred - w_pred/2
    xb_pred = xc_pred + w_pred/2
    ya_pred = yc_pred - h_pred/2
    yb_pred = yc_pred + h_pred/2
    
    # Calculate intersection
    
    xa_inter = K.maximum(xa_true,xa_pred)
    xb_inter = K.minimum(xb_true,xb_pred)
    ya_inter = K.maximum(ya_true,ya_pred)
    yb_inter = K.minimum(yb_true,yb_pred)
    
    w_inter = xb_inter - xa_inter
    w_inter = K.maximum(w_inter,0.)
    
    h_inter = yb_inter - ya_inter
    h_inter = K.maximum(h_inter,0.)
    
    # Calculate areas
    
    A_inter = w_inter * h_inter
    A_true = w_true * h_true
    A_pred = w_pred * h_pred
    
    # Get result
    
    IoU = A_inter/(A_true + A_pred - A_inter)
    
    result = K.sum(IoU)
    
    return result/BATCH_SIZE
    
  

 
    
    

In [None]:
img1,lbl1 = next(iter(train_ds))
img2,lbl2 = next(iter(validation_ds))
yoloLoss(lbl1,lbl2)

In [None]:
yolo_JB.compile(
    loss=yoloLoss,
    optimizer=tf.keras.optimizers.Adam(), metrics=myMetrics)

In [None]:
if strategy == 're-use':
    yolo_JB = keras.models.load_model('../input/starfishes-2/my_h5_model.h5', compile=False)
else:
    history = yolo_JB.fit(train_ds, validation_data=validation_ds, epochs=40)
    history_df = pd.DataFrame(history.history)
    history_df.head(2)
    history_df.loc[:, ['loss', 'val_loss']].plot(title="yoloLoss")
    history_df.loc[:, ['myMetrics', 'val_myMetrics']].plot(title="IoU aggregation")
    yolo_JB.save("my_h5_model.h5")
    

In [None]:
def compare_results(imagepath, label):
    image = image_loader(imagepath)
    img = image.numpy().astype('uint8')
    image = tf.expand_dims(image, axis=0)
    annotations = ast.literal_eval(label)
    bboxes = get_bboxes(annotations)
    prediction = yolo_JB.predict(image)
    pred_bboxes = label_decoder(prediction[0,:,:,:],threshold=0.1)
    plt.figure(figsize=(20,20))
    plt.imshow(img)
    ax = plt.gca()
    for bbox in bboxes:
        rect = Rectangle((bbox[0],bbox[1]),bbox[2],bbox[3],linewidth=5,edgecolor='r',facecolor='none')
        ax.add_patch(rect)
    for bbox in pred_bboxes:
        rect = Rectangle((bbox[0],bbox[1]),bbox[2],bbox[3],linewidth=5,edgecolor='g',facecolor='none')
        ax.add_patch(rect)

compare_results(trainDF.path[49],trainDF.annotations[49])

# 4. Submission

In [None]:
import greatbarrierreef 
env=greatbarrierreef.make_env()

In [None]:
# def compare_results(imagepath, label):
#     image = image_loader(imagepath)
#     img = image.numpy().astype('uint8')
#     image = tf.expand_dims(image, axis=0)
#     annotations = ast.literal_eval(label)
#     bboxes = get_bboxes(annotations)
#     prediction = yolo_JB.predict(image)
#     pred_bboxes = label_decoder(prediction[0,:,:,:],threshold=0.1)
#     plt.figure(figsize=(20,20))
#     plt.imshow(img)
#     ax = plt.gca()
#     for bbox in bboxes:
#         rect = Rectangle((bbox[0],bbox[1]),bbox[2],bbox[3],linewidth=5,edgecolor='r',facecolor='none')
#         ax.add_patch(rect)
#     for bbox in pred_bboxes:
#         rect = Rectangle((bbox[0],bbox[1]),bbox[2],bbox[3],linewidth=5,edgecolor='g',facecolor='none')
#         ax.add_patch(rect)

# fig, axes = plt.subplots(5,5,figsize=(15,15))
#     axes = axes.flatten()
#     for ax,idx in zip(axes,idxlist):
#         ax.imshow(X[:,:,idx])

In [None]:
iter_test = env.iter_test()    # an iterator which loops over the test set and sample submission

fig, axes = plt.subplots(3,1,figsize=(45,45))
axes = axes.flatten()
i = 0

for (pixel_array, sample_prediction_df) in iter_test:
    print(pixel_array.shape)
    pixel_array = np.expand_dims(pixel_array, axis=0)
    prediction = yolo_JB.predict(pixel_array)
    #print(prediction)
    bboxes = label_submitter(prediction[0,:,:,:], threshold=0.1)
    axes[i].imshow(pixel_array[0,:,:,:])
    predictions = []
    for bbox in bboxes:
        predictions.append('{:.2f} {} {} {} {}'.format(bbox[0], bbox[1], bbox[2], bbox[3], bbox[4]))
        rect = Rectangle((bbox[1],bbox[2]),bbox[3],bbox[4],linewidth=5,edgecolor='r',facecolor='none')
        axes[i].add_patch(rect)
        axes[i].text(bbox[1],bbox[2],'{:.2f}'.format(bbox[0]))
    i = i+1
    prediction_str = ' '.join(predictions)
    sample_prediction_df['annotations'] = prediction_str
    env.predict(sample_prediction_df)   # register your predictions
    print('Prediction:', prediction_str)

In [None]:
'{:.2f} {} {} {} {}'.format(0.356, int(1.57), 2, 12, 0.1)