# Train Only

[Prepare Data](https://www.kaggle.com/ihorin/great-barrier-reef-prepare-data)

# 🛠 Install Libraries

In [None]:
!pip install -qU wandb
!pip install -qU bbox-utility # check https://github.com/awsaf49/bbox for source code

# 📚 Import Libraries

In [None]:
import numpy as np
from tqdm.notebook import tqdm
tqdm.pandas()
import pandas as pd
import os
import cv2
import matplotlib.pyplot as plt
import glob

import shutil
import sys
sys.path.append('../input/tensorflow-great-barrier-reef')

from joblib import Parallel, delayed

from IPython.display import display, HTML

from matplotlib import animation, rc
rc('animation', html='jshtml')

# for DA
from torch.utils.data import DataLoader, Dataset
import torch.utils.data as Data
import ast #?
from fastprogress.fastprogress import master_bar, progress_bar #?

# 📌 Key-Points
* 提供されているpython時系列APIを使用して予測を送信する必要があります。これにより、このコンテストは以前のオブジェクト検出コンテストとは異なります。
* 各予測行には、画像のすべての境界ボックスを含める必要があります。提出はフォーマットもCOCOのようです。これは`[x_min、y_min、幅、高さ]`を意味します
* CopmetitionメトリックF2は、ヒトデを見逃すことがほとんどないことを保証するために、いくつかの誤検知（FP）を許容します。つまり、誤検知（FN）は、誤検知（FP）よりも重要です。
$$F2 = 5 \cdot \frac{precision \cdot recall}{4\cdot precision + recall}$$

# ⭐ WandB

In [None]:
import wandb

try:
    from kaggle_secrets import UserSecretsClient
    user_secrets = UserSecretsClient()
    api_key = user_secrets.get_secret("wandb_team_iforine")
    wandb.login(key=api_key)
    anonymous = None
except:
    wandb.login(anonymous='must')
    print('To use your W&B account,\nGo to Add-ons -> Secrets and provide your W&B access token. Use the Label name as WANDB. \nGet your W&B access token from here: https://wandb.ai/authorize')

In [None]:
FOLD      = 4 # which fold to train
DIM       = 3000
MODEL     = 'yolov5s'
BATCH     = 4
EPOCHS    = 8
OPTIM     = 'Adam'
AUG       = '3_HFlip_HE_CHE_Gamma'

PROJECT   = 'iforine/great-barrier-reef-public' # w&b in yolov5
NAME      = f'{MODEL}-dim{DIM}-fold{FOLD}-bat{BATCH}-opt{OPTIM}-aug{AUG}-epch{EPOCHS}' # w&b for yolov5

ROOT_DIR  = '/kaggle/input/tensorflow-great-barrier-reef/'
DATA_DIR  = '/kaggle/input/great-barrier-reef-prepare-data'
IMAGE_DIR = '/kaggle/working/images' # directory to save images
LABEL_DIR = '/kaggle/working/labels' # directory to save labels

WORKER = 4 # よくわかってない。スレッドの数とか？

np.random.seed(42)

# Create Directories

In [None]:
!mkdir -p {IMAGE_DIR}
!mkdir -p {LABEL_DIR}

# ✏️InputからWorkingへデータをコピー

* We need to copy the Images to Current Directory(`/kaggle/working`) as `/kaggle/input` doesn't have **write access** which is needed for **YOLOv5**.
* We can make this process faster using **Joblib** which uses **Parallel** computing.

* / kaggle / inputにはYOLOv5に必要な書き込みアクセス権がないため、イメージを現在のディレクトリ（/ kaggle / working）にコピーする必要があります。
* この処理を高速化するには、**並列**計算を利用する**Joblib**を使用します。

shutil.copyfile(src, dst, *, follow_symlinks=True)

src という名前のファイルの内容 (メタデータを含まない) を dst という名前のファイルにコピーし、最も効率的な方法で dst を返します。 src と dst は path-like object または文字列でパス名を指定します。

In [None]:
def make_copy(row):
    shutil.copyfile(row.old_path, row.new_path)
    return

並列処理

```
joblib.Parallel(<Parallelへの引数>)(
    joblib.delayed(<実行する関数>)(<関数への引数>) for 変数名 in イテラブル
)
```

iterrows()メソッドを使うと、1行ずつ、インデックス名（行名）とその行のデータ（pandas.Series型）のタプル(index, Series)を取得できる。

In [None]:
paths = pd.read_csv(f'{DATA_DIR}/copy_path.csv')
_ = Parallel(n_jobs=-1, backend='threading')(delayed(make_copy)(row) for _, row in tqdm(paths.iterrows(), total=len(paths)))

In [None]:
# train.txt, val.txtのコピー
shutil.copyfile(f'{DATA_DIR}/train.txt', 
                '/kaggle/working/train.txt')
shutil.copyfile(f'{DATA_DIR}/valid.txt', 
                '/kaggle/working/val.txt')

# YAML Setting

In [None]:
import yaml

cwd = '/kaggle/working/'

data = dict(
    path  = '/kaggle/working',
    train =  os.path.join( cwd , 'train.txt') ,
    val   =  os.path.join( cwd , 'val.txt' ),
    nc    = 1,
    names = ['cots'],
    )

with open(os.path.join( cwd , 'gbr.yaml'), 'w') as outfile:
    yaml.dump(data, outfile, default_flow_style=False)

f = open(os.path.join( cwd , 'gbr.yaml'), 'r')
print('\nyaml:')
print(f.read())

In [None]:
%%writefile /kaggle/working/hyp.yaml
lr0: 0.001  # initial learning rate (SGD=1E-2, Adam=1E-3)
lrf: 0.1  # final OneCycleLR learning rate (lr0 * lrf)
momentum: 0.937  # SGD momentum/Adam beta1
weight_decay: 0.0005  # optimizer weight decay 5e-4
warmup_epochs: 2.0  # warmup epochs (fractions ok)
warmup_momentum: 0.8  # warmup initial momentum
warmup_bias_lr: 0.1  # warmup initial bias lr
box: 0.05  # box loss gain
cls: 0.5  # cls loss gain
cls_pw: 1.0  # cls BCELoss positive_weight
obj: 1.0  # obj loss gain (scale with pixels)
obj_pw: 1.0  # obj BCELoss positive_weight
iou_t: 0.20  # IoU training threshold
anchor_t: 4.0  # anchor-multiple threshold
# anchors: 3  # anchors per output layer (0 to ignore)
fl_gamma: 0.0  # focal loss gamma (efficientDet default gamma=1.5)
hsv_h: 0.015  # image HSV-Hue augmentation (fraction)
hsv_s: 0.7  # image HSV-Saturation augmentation (fraction)
hsv_v: 0.4  # image HSV-Value augmentation (fraction)
degrees: 0.0  # image rotation (+/- deg)
translate: 0.10  # image translation (+/- fraction)
scale: 0.5  # image scale (+/- gain)
shear: 0.0  # image shear (+/- deg)
perspective: 0.0  # image perspective (+/- fraction), range 0-0.001
flipud: 0.5  # image flip up-down (probability)
fliplr: 0.5  # image flip left-right (probability)
mosaic: 0.5  # image mosaic (probability)
mixup: 0.5 # image mixup (probability)
copy_paste: 0.0  # segment copy-paste (probability)

# 📦 [YOLOv5](https://github.com/ultralytics/yolov5/)

In [None]:
%cd /kaggle/working
!rm -r /kaggle/working/yolov5
# !git clone https://github.com/ultralytics/yolov5 # clone
!cp -r /kaggle/input/yolov5-lib-ds /kaggle/working/yolov5
%cd yolov5
%pip install -qr requirements.txt  # install

from yolov5 import utils
display = utils.notebook_init()  # check

# 🚅 Training

In [None]:
!python train.py --img {DIM}\
--batch {BATCH}\
--epochs {EPOCHS}\
--data /kaggle/working/gbr.yaml\
--hyp /kaggle/working/hyp.yaml\
--weights {MODEL}.pt\
--optimizer {OPTIM}\
--project {PROJECT} --name {NAME}\
--exist-ok

# ✂️ Remove Files

In [None]:
!rm -r {IMAGE_DIR}
!rm -r {LABEL_DIR}