In [None]:
import numpy as np 
import pandas as pd 
import os
from tqdm import tqdm
import shutil

from sklearn.model_selection import StratifiedKFold

# Data

In [None]:
df = pd.read_csv('../input/global-wheat-detection/train.csv')
df.head()

In [None]:
# データフレームをyolo形式に変換
def convert_to_yolo(df: pd.DataFrame) -> pd.DataFrame:
    bboxs = np.stack(df['bbox'].apply(lambda x: np.fromstring(x[1:-1], sep=',')))
    for i, column in enumerate(['x', 'y', 'w', 'h']):
        df[column] = bboxs[:,i]
    df.drop(columns=['bbox'], inplace=True)
    df['x_center'] = df['x'] + df['w']/2
    df['y_center'] = df['y'] + df['h']/2
    df['classes'] = 0
    return df

In [None]:
df = convert_to_yolo(df)
df.head()

In [None]:
# データを分割 
def stratified_kfold_split_df(df: pd.DataFrame) -> pd.DataFrame:
    fold_id = np.zeros((df.shape[0],1))
    skf = StratifiedKFold(n_splits = 5, random_state = 42, shuffle = True)
    
    # sourceでの層化
    for (ff, (train_index, test_index)) in enumerate(skf.split(df, df['source'])):
        fold_id[test_index]= int(ff)

    df['fold'] = fold_id.copy()
    df = df[['image_id','x', 'y', 'w', 'h','x_center','y_center','classes', 'fold']]
    return df

In [None]:
df = stratified_kfold_split_df(df)
df.head()

In [None]:
def create_labels_and_copy_images(df: pd.DataFrame, fold: int, source: str = "train"):
    val_index = set(df[df['fold'] == fold]['image_id'])

    # 画像ごとにバウンディングボックスをループ処理
    for name,mini in tqdm(df.groupby('image_id')):    
        # ファイルの保存場所
        if name in val_index:
            path2save = 'valid/'
        else:
            path2save = 'train/'   
        # ラベルのストレージパス
        if not os.path.exists(f'convertor/fold{fold}/labels/' + path2save):
            os.makedirs(f'convertor/fold{fold}/labels/' + path2save)
        with open(f'convertor/fold{fold}/labels/' + path2save + name + ".txt", 'w+') as f:
            # Yolo形式の要件に従って座標を正規化
            row = mini[['classes','x_center','y_center','w','h']].astype(float).values
            row = row / 1024
            row = row.astype(str)
            for j in range(len(row)):
                text = ' '.join(row[j])
                f.write(text)
                f.write("\n")
        if not os.path.exists(f'convertor/fold{fold}/images/{path2save}'):
            os.makedirs(f'convertor/fold{fold}/images/{path2save}')
        # 画像の前処理は不要 => それらをバッチとしてコピー
        shutil.copy(f"../input/global-wheat-detection/{source}/{name}.jpg", f'convertor/fold{fold}/images/{path2save}/{name}.jpg')

In [None]:
create_labels_and_copy_images(df, fold=2, source="train")

In [None]:
!git clone https://github.com/ultralytics/yolov5  && cd yolov5 && pip install -r requirements.txt &> /dev/null

In [None]:
# 割り当てられたGPUを確認
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
    print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
    print('and then re-execute this cell.')
    
else:
    print(gpu_info)

# YAML設定ファイル

In [None]:
yaml_text = """train: /kaggle/working/convertor/fold2/images/train/
val: /kaggle/working/convertor/fold2/images/valid/

nc: 1
names: ['wheat']"""

In [None]:
with open("wheat.yaml", 'w') as f:
    f.write(yaml_text)
%cat wheat.yaml

# Training

In [None]:
!python ./yolov5/train.py --img 512 --batch 2 --epochs 50 --workers 2 --data wheat.yaml --cfg "./yolov5/models/yolov5s.yaml" --name yolov5x_fold2 --cache