# Cloning and installing YOLOv5

In [1]:
!git clone https://github.com/ultralytics/yolov5

Cloning into 'yolov5'...
remote: Enumerating objects: 10876, done.[K
remote: Total 10876 (delta 0), reused 0 (delta 0), pack-reused 10876[K
Receiving objects: 100% (10876/10876), 11.02 MiB | 18.47 MiB/s, done.
Resolving deltas: 100% (7506/7506), done.


In [2]:
!pip install -r yolov5/requirements.txt

Collecting thop
  Downloading thop-0.0.31.post2005241907-py3-none-any.whl (8.7 kB)
Installing collected packages: thop
Successfully installed thop-0.0.31.post2005241907


# Copying font to output folder

In [3]:
!cp ../input/yolov5-font/Arial.ttf ./Arial.ttf

# Write YAML file with directories

In [4]:
%%writefile ./yolov5/data/GBR.yaml

path: /kaggle/working/datasets
train: train
val: validation
test:

nc: 1
names: [ 'starfish' ]

Writing ./yolov5/data/GBR.yaml


# Copy images to working directory

In [5]:
import shutil
import os

os.mkdir('./datasets')
os.mkdir('./datasets/train')
os.mkdir('./datasets/validation')

src_path = '../input/tensorflow-great-barrier-reef/train_images'
src_list = ['video_0', 'video_1', 'video_2']
dest_list = ['./datasets/train', './datasets/validation', './datasets/train']

for i in range(0, 3):
    file_list = os.listdir(os.path.join(src_path, src_list[i]))
    
    for filename in file_list:
        prefix = src_list[i][-1:]
        shutil.copy2(os.path.join(src_path, src_list[i], filename), 
                     os.path.join(dest_list[i], prefix + '-' + filename))

# Prepare labels

In [6]:
import pandas as pd
import ast

df = pd.read_csv('../input/tensorflow-great-barrier-reef/train.csv')
df = df[df['annotations'] != '[]']

# Label train data
for index, row in df[df['video_id'] != 1].iterrows():
    label_list = []
    
    for d in ast.literal_eval(row['annotations']):
        label_list.append([0, (d.get('x') + (d.get('width') / 2)) / 1280, 
                           (d.get('y') + (d.get('height') / 2)) / 720, 
                           d.get('width') / 1280, d.get('height') / 720])
    
    label_df = pd.DataFrame(label_list)
    label_df.to_csv('./datasets/train/' + str(row['image_id']) + '.txt', header = False, index = False, sep = ' ')
    
# Label validation data
for index, row in df[df['video_id'] == 1].iterrows():
    label_list = []
    
    for d in ast.literal_eval(row['annotations']):
        label_list.append([0, (d.get('x') + (d.get('width') / 2)) / 1280, 
                           (d.get('y') + (d.get('height') / 2)) / 720, 
                           d.get('width') / 1280, d.get('height') / 720])
    
    label_df = pd.DataFrame(label_list)
    label_df.to_csv('./datasets/validation/' + str(row['image_id']) + '.txt', header = False, index = False, 
                    sep = ' ')

# Train model

In [7]:
!python ./yolov5/train.py --img 1280 --batch 16 --epochs 1 --data GBR.yaml --weights yolov5s6.pt

[34m[1mwandb[0m: (1) Create a W&B account
[34m[1mwandb[0m: (2) Use an existing W&B account
[34m[1mwandb[0m: (3) Don't visualize my results
[34m[1mwandb[0m: Enter your choice: (30 second timeout) 
[34m[1mwandb[0m: W&B disabled due to login timeout.
[34m[1mtrain: [0mweights=yolov5s6.pt, cfg=, data=GBR.yaml, hyp=yolov5/data/hyps/hyp.scratch.yaml, epochs=1, batch_size=16, imgsz=1280, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, evolve=None, bucket=, cache=None, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=yolov5/runs/train, name=exp, exist_ok=False, quad=False, linear_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest
[34m[1mgithub: [0mup to date with https://github.com/ultralytics/yolov5 ✅
YOLOv5 🚀 v6.0-255-gca0a007 torch 1.9.1 CUDA:0 (Tesla P100-PCIE-16