# Install YOLOv5

##### YOLOv5 has the successful history of real time object detection.

In [None]:
# Download YOLOv5
!git clone https://github.com/ultralytics/yolov5  # clone
%cd yolov5
!pip install -r requirements.txt  # install

In [None]:
import torch
from yolov5 import utils
display = utils.notebook_init()  # checks

In [None]:
#%cd yolov5
#!python train.py --img 1280 --batch 16 --epochs 100 --data coco128.yaml --weights yolov5s.pt --cache

In [None]:
# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
train: '/kaggle/input/tensorflow-great-barrier-reef/train_images/video_0/' + '/kaggle/input/tensorflow-great-barrier-reef/train_images/video_1'
val: '/kaggle/input/tensorflow-great-barrier-reef/train_images/video_2'

# number of classes
nc: 1

# class names
names: ['starfish']

# Libraries

In [None]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import os
import pickle
import cv2
from glob import glob
from tqdm.notebook import tqdm
import yaml
import shutil
from shutil import copyfile
import sys
from PIL import Image,ImageDraw
from pathlib import Path
from math import sqrt
import ast
import seaborn as sns
sns.set()

# EDA 

In [None]:
train = pd.read_csv('/kaggle/input/tensorflow-great-barrier-reef/train.csv')
test = pd.read_csv('/kaggle/input/tensorflow-great-barrier-reef/test.csv')
sub = pd.read_csv('/kaggle/input/tensorflow-great-barrier-reef/example_sample_submission.csv')

path = Path('/kaggle/input/tensorflow-great-barrier-reef/train_images')
filepaths = list(path.glob(r'**/*.jpg'))

In [None]:
# checking the train test lengths
print("Number of training samples: ", len(train))
print("Number of testing samples: ", len(test))

In [None]:
train.head(200)

In [None]:
# How many frames with no starfishes
train_clean = train.loc[train["annotations"] != "[]"]
print(f"No starfishes in {len(train)-len(train_clean)} samples.")
print(f"The clean train set has {len(train_clean)} images for us to work with.")

In [None]:
train_clean.head()

# Distribution of Sequences

In [None]:
len(train_clean.sequence.value_counts())

In [None]:
print('Sequence Samples')
print(train_clean.sequence.value_counts())

In [None]:
seq_df = train_clean.sequence.value_counts().to_frame()
plt.figure(figsize=(16, 9))
sns.barplot(x=seq_df.index, y=list(seq_df.sequence), palette="Greens_d")
plt.title("Distribution of Sequences")
plt.xlabel("Sequence Id")
plt.ylabel("Frequency")
plt.show()

# Number Of Boxes

In [None]:
num_boxes = []
annotations_clean = []
for elem in train_clean.annotations:
    ann = ast.literal_eval(elem)
    num_boxes.append(len(ann))
    annotations_clean.append(ann)

In [None]:
train_clean["num_boxes"] = num_boxes
train_clean["annotations"] = annotations_clean

In [None]:
train_clean.head()

In [None]:
print("#box Frequency")
print(train_clean.num_boxes.value_counts())

In [None]:
print(f"Number of Bounding Boxes in the dataset: {train_clean.num_boxes.sum()}")

# Distribution of number of bounding boxes

In [None]:
box_count = train_clean.num_boxes.value_counts().to_frame()

In [None]:
plt.figure(figsize=(16, 9))
sns.barplot(x=box_count.index, y=list(box_count.num_boxes), palette="Greens_d")
plt.title("Distribution of Num_boxes")
plt.xlabel("# of Boxes")
plt.ylabel("Frequency")
plt.show()

## Looking at the boxes

In [None]:
list(train_clean["annotations"])[0]

In [None]:
src = 'kaggle/input/tensorflow-great-barrier-reef/train_images'
paths = []
for row in train_clean.image_id:
    vid_num = row.split('-')[0]
    img_num = row.split('-')[1]
    paths.append(os.path.join(src,f'video_{vid_num}',img_num+'.jpg'))

In [None]:
train_clean['paths'] = paths

In [None]:
def vis_boxes(img_path, bboxes):
    coords = []
    for box in bboxes:
        x1 = box['x']
        y1 = box['y']
        x2 = x1 + box['width']
        y2 = y1 + box['height']
        coords.append([x1, y1, x2, y2])
        
    img = Image.open(img_path)
    img1 = img.copy()
    draw = ImageDraw.Draw(img1)
    for elem in coords:
        draw.rectangle(elem, outline='red', width=7)
    
    return img1

# Sequences with max bounding boxes

In [None]:
train_clean.head()

In [None]:
train_clean.groupby('sequence').num_boxes.sum().to_frame()

# Checking Samples

In [None]:
plt.figure(figsize=(16, 9))
n_images = 9
count = 0
r,c = int(sqrt(n_images)), int(sqrt(n_images))
train_plot = train_clean.sample(n = n_images)

for _, row in train_plot.iterrows():
    img_path = row['paths']
    bboxes = row['annotations']
    plt.subplot(r, c, count + 1)
    img_out = vis_boxes(img_path, bboxes)
    plt.imshow(img_out)
    count+=1

plt.show()
plt.tight_layout()

# Loading Data

In [None]:
img_list = []


In [None]:
from sklearn.model_selection import train_test_split

In [None]:
%cd yolov5
!python detect.py --weights yolov5n6.pt --img 1280  

In [None]:
BATCH_SIZE = 16
EPOCHS = 30
IMG_SIZE=1280
Selected_Fold=4  #0..4

In [None]:
# Read data
TRAIN_PATH = '../input/tensorflow-great-barrier-reef/'
CKPT_PATH = '/kaggle/input/reef-baseline-fold12/l6_3600_uflip_vm5_f12_up/f1/best.pt' # by @steamedsheep
# This time I will use another pts.
#CKPT_PATH_2 = '/kaggle/input/greatbarrierreef-yolov5-train-ds/yolov5/runs/train/exp/weights/best.pt'
IMG_SIZE  = 1280
CONF      = 0.15
IOU       = 0.50
AUGMENT   = False

In [None]:
df = pd.read_csv('/kaggle/input/tensorflow-great-barrier-reef/train.csv')
df.head(5)

In [None]:
df["NumBBox"]=df['annotations'].apply(lambda x: str.count(x, 'x'))
df.head(5)

In [None]:
print(df["NumBBox"].unique())

In [None]:
df_train=df[df["NumBBox"]>0]
df_train.sample(2)

In [None]:
print(df_train['NumBBox'].sum())

In [None]:
df_train['annotations'] = df_train['annotations'].progress_apply(lambda x: ast.literal_eval(x))
df_train['bboxes'] = df_train.annotations.progress_apply(get_bbox)
df_train.sample(2)

# Training Data

In [None]:
df_train["Width"]=1280
df_train["Height"]=720
df_train.sample(2)

In [None]:
df_v = df_train[(df_train.NumBBox==13)].sample(2) 
fig,ax = plt.subplots(1,2,figsize=(30,20))
i=0;
for index, row in df_v.iterrows():
    img           = load_image(row.image_path)
    image_height  = row.Height
    image_width   = row.Width
    bboxes_coco   = np.array(row.bboxes)
    bboxes_yolo   = coco2yolo(image_height, image_width, bboxes_coco)
    names         = ['COTS']*len(bboxes_coco)
    labels        = [0]*len(bboxes_coco)
    im=draw_bboxes(img = img,
                           bboxes = bboxes_yolo, 
                           classes = names,
                           class_ids = labels,
                           class_name = True, 
                           colors = colors, 
                           bbox_format = 'yolo',
                           line_thickness = 2)
    ax[i].imshow(im)
    ax[i].axis('OFF')
    i=i+1

# Pytorch Hub

In [None]:
# PyTorch Hub
#import torch

# Model
#model = torch.hub.load('ultralytics/yolov5', 'yolov5s')

# Images
#dir = 'https://ultralytics.com/images/'
#imgs = [dir + f for f in ('zidane.jpg', 'bus.jpg')]  # batch of images

# Inference
#results = model(imgs)
#results.print()  # or .show(), .save()