# RetinaNet for Global Wheat🌱 Head Detection

> ### This Notebook is for Training of **Keras RetinaNet** for wheat head detection 🔍

<hr>

### For EDA and Image Data analysis📊 visit:
### https://www.kaggle.com/akhileshdkapse/global-wheat-detection-comprehensive-eda

### Table of contents
* Data Loading
* Mis-matched B.boxes fixection
* Image Data Visulization
* Data Per-Processing
* Model Traning
* Trained model Analysis/Visulization of test image data
* Submission


## Data Loading

In [None]:
import numpy as np 
import pandas as pd 
import os
from glob import glob
import cv2
import ast
from tqdm.notebook import tqdm

import matplotlib.pyplot as plt
import seaborn as sns

import time

import random
sns.set_style('whitegrid')

In [None]:
df= pd.read_csv('../input/global-wheat-detection/train.csv')
print('Totall Traning data: {} with {} unique images'.format(df.shape[0], len(df.image_id.unique())))
df.head()

In [None]:
def add_path(label):
    path='../input/global-wheat-detection/train'
    return os.path.join(path, label+'.jpg')

df['image_name']= df.image_id.apply(add_path)
df.head()

In [None]:
df.info()
#bbox -- String

In [None]:
df.bbox=df.bbox.apply(lambda x: ast.literal_eval(x))

In [None]:
df['x_min']= df.bbox.apply(lambda x: x[0])
df['y_min']= df.bbox.apply(lambda x: x[1])
df['x_max']= df.bbox.apply(lambda x: x[0]+ x[2])
df['y_max']= df.bbox.apply(lambda x: x[1]+ x[3])
df['prct_area_cov']= df.bbox.apply(lambda x: ((x[2]*x[3])/(1024.0*1024.0))*100)
df=df.drop('source', 1)
df.head()

## Mis-matched B.boxes fixection
### We have encountered with Mis-matched Large Bounding Boxes in certain images for our EDA NOTEBOOK...



![](https://www.kaggleusercontent.com/kf/41590794/eyJhbGciOiJkaXIiLCJlbmMiOiJBMTI4Q0JDLUhTMjU2In0..wNU8w8SLUJMdER2nLmV0uA.bE7UWnqdhT5glaFaCMq4ReTOPK-XOCw2dZ_GRP_qdx-xLst2Df5bzmX3dvAiscihs5m0Dv8N5uZwVn_L8GPwXNfCliZgPsnBBxoJU2Ubxz0j7hdI1_H4BxpWM1aFKohGmuL7vh1kgunH3UDua8i_VHxmamOT3RE3ucyNAkH567mMcNIn3P-Nil4_5QuUtbZXzLh-ovee_nvMd1rPU_KDJqap8d5-gkixkxUMlX4oyb2j1qP3-9Dx5hM6RaaHo15eLUPevPZcNxdgNTj_xJbJ2fkxQzKfe8qaYAPoDwMh16qQUzKyq2eGf9o2MSWoF97F-pjOjHf_R8ONGrdnzD0vAVkCanJjOgUJ_uU8kfV83koFj63IpNld9SwO_OwBLQjANvdfA2ifq3Ek78UGfxPxh_cX6C0hYbZmB0GTpIYkhZjEBYQ3cq6M6GFoRzP6VcwU86ZnOa23KcFSGUnlV4DQxvfBaPf4e2dqEdyy80xLShyyP0PW7VgHXRVcIGKLiceLMMiW68pR4WaAEGFfDUHUF4tzVhD5-UU8cXKhCl5z--2q3bV2Hay5sZodpU61CS0UgdLkBqQjqcyH3F6HS_Iq2Xbso-IMHAdXtsk03yXNrN1ekn32tkulIGLA_PSd6f3S6zbgdIP1Hz7c8ZBSnSlXLyCWM-wOponEhFc7r1e_9rU.OQH__iA9D8ymBfeK7ornKg/__results___files/__results___38_2.png)


> ### Let's fix it 

In [None]:
plt.figure(figsize=(20,6))
sns.kdeplot(df.prct_area_cov.values, shade=True)
plt.xlabel('Percent Area covered by Bounding Box', size=15); plt.ylabel('Probability Density', size=15)
plt.title('Area per BBox Distribution', size=20)

### B.B0x Area outliers:
* We can see most of the **Bounding Boxes are coverd less than 3% of totall area** on Actuall Image.
* **Outliers** are those BBoxes which have **coverd more than 5%** of tatall area of Image, as WHEAT HEAD covers less space in an image.

In [None]:
print('Rows we lost:', (df[(df.prct_area_cov<8) & (df.prct_area_cov>0.4)].shape[0]))
df=df[(df.prct_area_cov<6) & (df.prct_area_cov>0.3)]
df=df.reset_index()

In [None]:
plt.figure(figsize=(20,6))
sns.kdeplot(df.prct_area_cov.values, shade=True)
plt.xlabel('Percent Area covered by Bounding Box', size=15); plt.ylabel('Probability Density', size=15)
plt.title('Area per BBox Distribution', size=20)

## Image Data Visulization

In [None]:
def load(path, resize=False, gray=False):
    img= cv2.imread(path)
    if resize:
        img= cv2.resize(img, (500,500))
    if gray:
        img= cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    else:
        img= cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    return img

def draw_rec(img, boxes):
    for box in boxes:
        x,y,w,h= box
        x=int(x); y=int(y); w=int(w); h= int(h)
        img= cv2.rectangle(img, (x,y), (x+w, y+h), color=(16, 228, 214), thickness=3)
    return img

In [None]:
def draw_rand():
    index= random.randint(0, (df.shape[0]))
    label= df.image_id[index]
    data= df[df.image_id== label]
    path= '../input/global-wheat-detection/train'
    path= os.path.join(path, (label+'.jpg'))
    img= load(path)
    img2= img.copy()
    
    img2= draw_rec(img2, data.bbox.values)
    f, ax= plt.subplots(1,2, figsize=(25,12))
    ax[0].imshow(img, aspect='auto'); ax[0].grid(False)
    ax[1].imshow(img2, aspect='auto'); ax[1].grid(False)

    plt.show()

In [None]:
draw_rand()

In [None]:
draw_rand()

## Data Per-Processing 

### Keras RetinaNet  https://github.com/fizyr/keras-retinanet

In [None]:
df['class_name']= 'wheat_head'
df.head()

In [None]:
print('Original dataframe shape',df.shape)
test=df.image_id.unique()[-10:]
test

In [None]:
df_test= pd.DataFrame([])

for _id in tqdm(test):
    df_2=df[df.image_id==_id]
    df_test= pd.concat([df_test, df_2])
    
df_train= df
for _id in tqdm(test):
    df_train=df_train[df_train.image_id!=_id]
    
df_train=df_train.reset_index()
df_test=df_test.reset_index()

In [None]:
df_test.shape, df_train.shape

In [None]:
df_train= df_train[['image_name','x_min', 'y_min', 'x_max', 'y_max', 'class_name']]
print(df_train.shape)
df_train.head()

In [None]:
df_train.iloc[:,1:-1]=df_train.iloc[:,1:-1].astype('int32')
df_train.image_name= df_train.image_name.apply(lambda x: '../'+x)
df_train.head()

## Model Traning

In [None]:
!git clone https://github.com/fizyr/keras-retinanet.git

In [None]:
%cd keras-retinanet/
!pip install .

In [None]:
!python setup.py build_ext --inplace

In [None]:
import tensorflow
from keras_retinanet import models
from keras_retinanet.utils.image import read_image_bgr ,preprocess_image, resize_image
from keras_retinanet.utils.visualization import draw_box, draw_caption
from keras_retinanet.utils.colors import label_color

import requests
import urllib

In [None]:
df_train.to_csv('annotations.csv', index=False, header=None)

In [None]:
with open("classes.csv","w") as file:
    file.write("wheat_head,0")

In [None]:
PRETRAINED_MODEL = 'snapshots/_pretrained_model.h5'

URL_MODEL = 'https://github.com/fizyr/keras-retinanet/releases/download/0.5.1/resnet50_coco_best_v2.1.0.h5'
urllib.request.urlretrieve(URL_MODEL, PRETRAINED_MODEL)

print('Downloaded pretrained model to ' + PRETRAINED_MODEL)

In [None]:
!keras_retinanet/bin/train.py --freeze-backbone \
  --random-transform \
  --weights {PRETRAINED_MODEL} \
  --batch-size 8 \
  --steps 200 \
  --epochs 9 \
  csv annotations.csv classes.csv

## Trained model Analysis/Visulization of test image data

In [None]:
!ls snapshots

In [None]:
model_path = os.path.join('snapshots', sorted(os.listdir('snapshots'), reverse=True)[0])

model = models.load_model(model_path, backbone_name='resnet50')
model = models.convert_model(model)

In [None]:
%cd ../

In [None]:
def perd_from_model(path, th=0.5, box_only=False):
    # load image
    image = read_image_bgr(path)

    # copy to draw on
    draw = image.copy()
    draw = cv2.cvtColor(draw, cv2.COLOR_BGR2RGB)

    # preprocess image for network
    image = preprocess_image(image)
    image, scale = resize_image(image)
    print('scale', scale)

    # process image
    start = time.time()
    boxes, scores, labels = model.predict_on_batch(np.expand_dims(image, axis=0))
    print("processing time: ", time.time() - start)

    # correct for image scale
    boxes /= scale
    
    if box_only:
        return scores, boxes

    # visualize detections
    for box, score, label in zip(boxes[0], scores[0], labels[0]):
    # scores are sorted so we can break
        if score < 0.5:
            break

        color = label_color(label)

        b = box.astype(int)
        draw_box(draw, b, color=color)
    return draw
        
    

In [None]:
def visu_test(df_test):
    label=test[np.random.randint(0,9)]
    data= df_test[df_test.image_id== label]
    path= '../input/global-wheat-detection/train'
    path= os.path.join(path, (label+'.jpg'))
    img= load(path)
    img2= img.copy()
    
    img2= draw_rec(img2, data.bbox.values)
    perd= perd_from_model(path, 0.3)
    
    f, ax= plt.subplots(1,3, figsize=(35,12))
    ax[0].imshow(img, aspect='auto'); ax[0].grid(False)
    ax[1].imshow(img2, aspect='auto'); ax[1].grid(False)
    ax[2].imshow(perd, aspect='auto'); ax[2].grid(False)
    
    ax[0].set_title('Original Image', size=24)
    ax[1].set_title('Original Image with B.boxes', size=24)
    ax[2].set_title('Predicted B.boxes with Image', size=24)
    plt.show()
    

In [None]:
visu_test(df_test)

In [None]:
visu_test(df_test)

In [None]:
visu_test(df_test)

In [None]:
visu_test(df_test)

## Submission

In [None]:
sub=pd.read_csv('../input/global-wheat-detection/sample_submission.csv')
sub

In [None]:
sub.PredictionString[0]

In [None]:
def perdict(label):
    string= ''
    path='../input/global-wheat-detection/test'
    path= os.path.join(path, (label+'.jpg'))
    score, boxes= perd_from_model(path, box_only=True, th=0.3)
    
    string=''
    for s, b in zip(score[0], boxes[0]):
        if s <0.3:
            break
        string+= '{} {} {} {} {} '.format(s, int(b[0]), int(b[1]), int(b[2]-b[0]), int(b[3]-b[1]))
    return string
    

In [None]:
sub['PredictionString']= sub.image_id.apply(perdict)

In [None]:
sub

In [None]:
sub.PredictionString= sub.PredictionString.apply(lambda x: x[:-1])

In [None]:
sub.to_csv('/kaggle/working/submission.csv',index=False)