In [1]:
import numpy as np 
import pandas as pd 
import os
from tqdm import tqdm
import shutil

from sklearn.model_selection import StratifiedKFold



# Data

In [2]:
df = pd.read_csv('../input/global-wheat-detection/train.csv')
df.head()

Unnamed: 0,image_id,width,height,bbox,source
0,b6ab77fd7,1024,1024,"[834.0, 222.0, 56.0, 36.0]",usask_1
1,b6ab77fd7,1024,1024,"[226.0, 548.0, 130.0, 58.0]",usask_1
2,b6ab77fd7,1024,1024,"[377.0, 504.0, 74.0, 160.0]",usask_1
3,b6ab77fd7,1024,1024,"[834.0, 95.0, 109.0, 107.0]",usask_1
4,b6ab77fd7,1024,1024,"[26.0, 144.0, 124.0, 117.0]",usask_1


In [3]:
# データフレームをyolo形式に変換
def convert_to_yolo(df: pd.DataFrame) -> pd.DataFrame:
    bboxs = np.stack(df['bbox'].apply(lambda x: np.fromstring(x[1:-1], sep=',')))
    for i, column in enumerate(['x', 'y', 'w', 'h']):
        df[column] = bboxs[:,i]
    df.drop(columns=['bbox'], inplace=True)
    df['x_center'] = df['x'] + df['w']/2
    df['y_center'] = df['y'] + df['h']/2
    df['classes'] = 0
    return df

In [4]:
df = convert_to_yolo(df)
df.head()

Unnamed: 0,image_id,width,height,source,x,y,w,h,x_center,y_center,classes
0,b6ab77fd7,1024,1024,usask_1,834.0,222.0,56.0,36.0,862.0,240.0,0
1,b6ab77fd7,1024,1024,usask_1,226.0,548.0,130.0,58.0,291.0,577.0,0
2,b6ab77fd7,1024,1024,usask_1,377.0,504.0,74.0,160.0,414.0,584.0,0
3,b6ab77fd7,1024,1024,usask_1,834.0,95.0,109.0,107.0,888.5,148.5,0
4,b6ab77fd7,1024,1024,usask_1,26.0,144.0,124.0,117.0,88.0,202.5,0


In [5]:
# データを分割 
def stratified_kfold_split_df(df: pd.DataFrame) -> pd.DataFrame:
    fold_id = np.zeros((df.shape[0],1))
    skf = StratifiedKFold(n_splits = 5, random_state = 42, shuffle = True)
    
    # sourceでの層化
    for (ff, (train_index, test_index)) in enumerate(skf.split(df, df['source'])):
        fold_id[test_index]= int(ff)

    df['fold'] = fold_id.copy()
    df = df[['image_id','x', 'y', 'w', 'h','x_center','y_center','classes', 'fold']]
    return df

In [6]:
df = stratified_kfold_split_df(df)
df.head()

Unnamed: 0,image_id,x,y,w,h,x_center,y_center,classes,fold
0,b6ab77fd7,834.0,222.0,56.0,36.0,862.0,240.0,0,0.0
1,b6ab77fd7,226.0,548.0,130.0,58.0,291.0,577.0,0,2.0
2,b6ab77fd7,377.0,504.0,74.0,160.0,414.0,584.0,0,3.0
3,b6ab77fd7,834.0,95.0,109.0,107.0,888.5,148.5,0,3.0
4,b6ab77fd7,26.0,144.0,124.0,117.0,88.0,202.5,0,4.0


In [7]:
def create_labels_and_copy_images(df: pd.DataFrame, fold: int, source: str = "train"):
    val_index = set(df[df['fold'] == fold]['image_id'])

    # 画像ごとにバウンディングボックスをループ処理
    for name,mini in tqdm(df.groupby('image_id')):    
        # ファイルの保存場所
        if name in val_index:
            path2save = 'valid/'
        else:
            path2save = 'train/'   
        # ラベルのストレージパス
        if not os.path.exists(f'convertor/fold{fold}/labels/' + path2save):
            os.makedirs(f'convertor/fold{fold}/labels/' + path2save)
        with open(f'convertor/fold{fold}/labels/' + path2save + name + ".txt", 'w+') as f:
            # Yolo形式の要件に従って座標を正規化
            row = mini[['classes','x_center','y_center','w','h']].astype(float).values
            row = row / 1024
            row = row.astype(str)
            for j in range(len(row)):
                text = ' '.join(row[j])
                f.write(text)
                f.write("\n")
        if not os.path.exists(f'convertor/fold{fold}/images/{path2save}'):
            os.makedirs(f'convertor/fold{fold}/images/{path2save}')
        # 画像の前処理は不要 => それらをバッチとしてコピー
        shutil.copy(f"../input/global-wheat-detection/{source}/{name}.jpg", f'convertor/fold{fold}/images/{path2save}/{name}.jpg')

In [8]:
create_labels_and_copy_images(df, fold=2, source="train")

100%|██████████| 3373/3373 [00:29<00:00, 113.43it/s]


In [9]:
!git clone https://github.com/ultralytics/yolov5  && cd yolov5 && pip install -r requirements.txt &> /dev/null

Cloning into 'yolov5'...
remote: Enumerating objects: 15943, done.[K
remote: Counting objects: 100% (63/63), done.[K
remote: Compressing objects: 100% (47/47), done.[K
remote: Total 15943 (delta 28), reused 39 (delta 16), pack-reused 15880[K
Receiving objects: 100% (15943/15943), 14.61 MiB | 21.10 MiB/s, done.
Resolving deltas: 100% (10927/10927), done.


In [10]:
# 割り当てられたGPUを確認
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
    print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
    print('and then re-execute this cell.')
    
else:
    print(gpu_info)

Sat Sep  9 07:27:08 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.161.03   Driver Version: 470.161.03   CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   32C    P8     9W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  Tesla T4            Off  | 00000000:00:05.0 Off |                    0 |
| N/A   39C    P8     9W /  70W |      0MiB / 15109MiB |      0%      Default |
|       

# YAML設定ファイル

In [11]:
yaml_text = """train: /kaggle/working/convertor/fold2/images/train/
val: /kaggle/working/convertor/fold2/images/valid/

nc: 1
names: ['wheat']"""

In [12]:
with open("wheat.yaml", 'w') as f:
    f.write(yaml_text)
%cat wheat.yaml

train: /kaggle/working/convertor/fold2/images/train/
val: /kaggle/working/convertor/fold2/images/valid/

nc: 1
names: ['wheat']

# Training

In [None]:
!python ./yolov5/train.py --img 512 --batch 2 --epochs 3 --workers 2 --data wheat.yaml --cfg "./yolov5/models/yolov5s.yaml" --name yolov5x_fold2 --cache

[34m[1mwandb[0m: (1) Create a W&B account
[34m[1mwandb[0m: (2) Use an existing W&B account
[34m[1mwandb[0m: (3) Don't visualize my results
[34m[1mwandb[0m: Enter your choice: (30 second timeout) 
[34m[1mwandb[0m: W&B disabled due to login timeout.
[34m[1mtrain: [0mweights=yolov5/yolov5s.pt, cfg=./yolov5/models/yolov5s.yaml, data=wheat.yaml, hyp=yolov5/data/hyps/hyp.scratch-low.yaml, epochs=3, batch_size=2, imgsz=512, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, bucket=, cache=ram, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=2, project=yolov5/runs/train, name=yolov5x_fold2, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, seed=0, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest
[34m[1mgithub: [0mup to date with https://github.com/ultralytics/yolov5 ✅
YOLOv

In [None]:
!ls ./yolov5/runs/train/yolov5x_fold0/weights/ -lh

# Prediction

In [None]:
!ls /kaggle/input/global-wheat-detection/test

In [None]:
!python ./yolov5/detect.py --weights ./yolov5/runs/train/yolov5x_fold0/weights/best.pt --img 512 --conf 0.1 --source /kaggle/input/global-wheat-detection/test --save-txt --save-conf --exist-ok

In [None]:
!ls ./yolov5/runs/detect/exp/labels/

In [None]:
def convert_yolo_to_coco(s):
    x = int(1024 * (s[1] - s[3]/2))
    y = int(1024 * (s[2] - s[4]/2))
    w = int(1024 * s[3])
    h = int(1024 * s[4])
    
    return(str(s[5]) + ' ' + str(x) + ' ' + str(y) + ' ' + str(w) + ' ' + str(h))

In [None]:
with open('submission.csv', 'w') as myfile:

    # prepare submission
    wfolder = './yolov5/runs/detect/exp/labels/'
    for f in os.listdir(wfolder):
        fname = wfolder + f
        xdat = pd.read_csv(fname, sep = ' ', header = None)
        outline = f[:-4] + ' ' + ' '.join(list(xdat.apply(lambda s: convert_yolo_to_coco(s), axis = 1)))
        myfile.write(outline + '\n')
        
myfile.close()   

In [None]:
!cat submission.csv