In [3]:
from wandb_import import *

In [4]:
import random
import torch


def set_seed(x=1234):
    rng = np.random.default_rng(x)
    random.seed(x)
    np.random.seed(x)
    torch.manual_seed(x)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    if torch.cuda.is_available(): torch.cuda.manual_seed_all(x)

    return rng

RNG = set_seed()

In [5]:
from pathlib import Path
import sys

yolo_dir = Path.home()/'new_yolov5/yolov5'
if yolo_dir.is_dir():
    sys.path.append(str(yolo_dir))

from utils.tools.general import *

In [6]:
'''set yaml and paths'''
# yaml_dir = yolo_dir/'data'/'test_merge_training'
# yaml_dir = yolo_dir/'data'/'fake_coco128'
# yaml_dir = yolo_dir/'data'/'sampled_coco2017'

dpath = Path('/mnt/disks/datasets')
yaml_dir = yolo_dir/'data'/'test_sampled_coco2017' # '''jump'''
yaml_dir.mkdir(parents=True, exist_ok=True)
train_txt = yaml_dir/'train_images.txt'
val_txt = yaml_dir/'val_images.txt'


data_yaml = (yaml_dir/'data.yaml').resolve()
hyp_custom_yaml = (yaml_dir/'hyp_custom.yaml').resolve()
cfg_yaml = (yolo_dir/'models/yolov5m.yaml').resolve()

str(train_txt), str(val_txt), str(data_yaml), str(hyp_custom_yaml), cfg_yaml

('/home/cheeyung/new_yolov5/yolov5/data/test_sampled_coco2017/train_images.txt',
 '/home/cheeyung/new_yolov5/yolov5/data/test_sampled_coco2017/val_images.txt',
 '/home/cheeyung/new_yolov5/yolov5/data/test_sampled_coco2017/data.yaml',
 '/home/cheeyung/new_yolov5/yolov5/data/test_sampled_coco2017/hyp_custom.yaml',
 PosixPath('/home/cheeyung/new_yolov5/yolov5/models/yolov5m.yaml'))

# create filelist txt for val

## train

In [7]:
# train_dpath = (yolo_dir/'datasets'/'coco128_no_label')
# train_dpath = yolo_dir/'utils/paster/examples/fake_coco_128'
train_dpath = dpath/'coco2017/train2017'
train_dpath.lls()

3

In [8]:


# train_img_df = create_path_df((train_dpath/'images'/'train2017').ls())
# train_lbl_df = create_path_df((train_dpath/'labels'/'train2017').ls())
train_img_df = create_path_df((train_dpath/'images').ls())
train_lbl_df = create_path_df((train_dpath/'labels').ls())

train_img_df = train_img_df[train_img_df['is_file']]
train_lbl_df = train_lbl_df[train_lbl_df['is_file']]
train_img_and_lbl_df, columns_to_show = join_img_and_lbl_df(train_img_df, train_lbl_df)

# train_img_and_lbl_df.head()
sampled_train_df = train_img_and_lbl_df.sample(120, random_state=RNG.bit_generator)
train_img_paths = sampled_train_df['file_path-img'].values
fu.f_writelines(train_img_paths, train_txt)

  0%|          | 0/19222 [00:00<?, ?it/s]

  0%|          | 0/19222 [00:00<?, ?it/s]

## val

In [10]:
# val_dpath = Path('/mnt/disks/lightbox_china_merged-val/data')
# val_dpath = train_dpath
# val_dpath = Path('/mnt/disks/coco-2017-2/coco2017/test2017')
val_dpath = (dpath/'coco2017'/'test2017')

val_img_df = create_path_df((val_dpath/'images').ls())
val_lbl_df = create_path_df((val_dpath/'labels').ls())

val_img_df = val_img_df[val_img_df['is_file']]
val_lbl_df = val_lbl_df[val_lbl_df['is_file']]
val_img_and_lbl_df, columns_to_show = join_img_and_lbl_df(val_img_df, val_lbl_df)

sampled_val_df = val_img_and_lbl_df.sample(40, random_state=RNG.bit_generator)
val_img_paths = sampled_val_df['file_path-img'].values
fu.f_writelines(val_img_paths, val_txt)

  0%|          | 0/40670 [00:00<?, ?it/s]

  0%|          | 0/5298 [00:00<?, ?it/s]

# create yaml

In [11]:
'''remove labels cache'''

for lbl_cache in yaml_dir.ls('*.cache'):
    print(lbl_cache)
    lbl_cache.unlink()

In [12]:
print(f"yaml_dir:  {yaml_dir}")
print(f"train_txt:  {str(train_txt), train_txt.is_file()}")
print(f"val_txt:  {str(val_txt),  val_txt.is_file()}")

yaml_dir:  /home/cheeyung/new_yolov5/yolov5/data/test_sampled_coco2017
train_txt:  ('/home/cheeyung/new_yolov5/yolov5/data/test_sampled_coco2017/train_images.txt', True)
val_txt:  ('/home/cheeyung/new_yolov5/yolov5/data/test_sampled_coco2017/val_images.txt', True)


In [13]:
import utils.tools.file as fu
from types import SimpleNamespace

def create_data_yaml(dest_file, train_img_path, val_img_path, cls_names: List[str], exist_ok=False):
    dest_file = Path(dest_file)

    if dest_file.is_file() and not exist_ok:
        raise FileExistsError()

    dest_file.parent.mkdir(parents=True, exist_ok=True)
    
    train_img_path = Path(train_img_path).resolve()
    val_img_path = Path(val_img_path).resolve()

    if not train_img_path.exists():
        raise FileNotFoundError(train_img_path)

    if not val_img_path.exists():
        raise FileNotFoundError(val_img_path)


    fu.f_writelines(
    [
        f"train: {str(train_img_path)}",
        f"val: {str(val_img_path)}",
        " ",
        f"nc: {len(cls_names)}",
        f"names: {cls_names}",
    ], dest_file)

def load_data_yaml(yaml_path):
    yaml_dict = fu.load_yaml(str(yaml_path))
    return SimpleNamespace(**yaml_dict)

In [14]:
cnames = ['wangzai_milk',
 'wangwang_coco_vanilla',
 'wangwang_coco_orange',
 'vitamilk',
 'lixing_yogurt_strawberry',
 'kangshifu_green_tea',
 'tongyi_milktea',
 'kangshifu_beefNoodle',
 'wanglaoji',
 'sprite_200',
 'pocky_coco',
 'lixing_yogurt_kiwi',
 'jiaduobao_550ml',
 'kangshifu_peach_drink']

create_data_yaml(**{
    'dest_file': data_yaml,
    'train_img_path': train_txt, 
    'val_img_path': val_txt,
    'cls_names': cnames,
}, exist_ok=True)

load_data_yaml(data_yaml)

namespace(train='/home/cheeyung/new_yolov5/yolov5/data/test_sampled_coco2017/train_images.txt',
          val='/home/cheeyung/new_yolov5/yolov5/data/test_sampled_coco2017/val_images.txt',
          nc=14,
          names=['wangzai_milk',
                 'wangwang_coco_vanilla',
                 'wangwang_coco_orange',
                 'vitamilk',
                 'lixing_yogurt_strawberry',
                 'kangshifu_green_tea',
                 'tongyi_milktea',
                 'kangshifu_beefNoodle',
                 'wanglaoji',
                 'sprite_200',
                 'pocky_coco',
                 'lixing_yogurt_kiwi',
                 'jiaduobao_550ml',
                 'kangshifu_peach_drink'])

In [15]:
# %load {data_yaml}

# hyp yaml

In [16]:
%%writefile {hyp_custom_yaml}
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Hyperparameters for COCO training from scratch
# python train.py --batch 40 --cfg yolov5m.yaml --weights '' --data coco.yaml --img 640 --epochs 300
# See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials

lr0: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
lrf: 0.1  # final OneCycleLR learning rate (lr0 * lrf)
momentum: 0.937  # SGD momentum/Adam beta1
weight_decay: 0.0005  # optimizer weight decay 5e-4
warmup_epochs: 3.0  # warmup epochs (fractions ok)
warmup_momentum: 0.8  # warmup initial momentum
warmup_bias_lr: 0.1  # warmup initial bias lr
box: 0.05  # box loss gain
cls: 0.5  # cls loss gain
cls_pw: 1.0  # cls BCELoss positive_weight
obj: 1.0  # obj loss gain (scale with pixels)
obj_pw: 1.0  # obj BCELoss positive_weight
iou_t: 0.20  # IoU training threshold
anchor_t: 4.0  # anchor-multiple threshold
# anchors: 3  # anchors per output layer (0 to ignore)
fl_gamma: 0.0  # focal loss gamma (efficientDet default gamma=1.5)
hsv_h: 0.015  # image HSV-Hue augmentation (fraction)
hsv_s: 0.05  # image HSV-Saturation augmentation (fraction)
hsv_v: 0.05  # image HSV-Value augmentation (fraction)
degrees: 0.0  # image rotation (+/- deg)
translate: 0.1  # image translation (+/- fraction)
scale: 0.5  # image scale (+/- gain)
shear: 0.0  # image shear (+/- deg)
perspective: 0.0  # image perspective (+/- fraction), range 0-0.001
flipud: 0.0  # image flip up-down (probability)
fliplr: 0.3  # image flip left-right (probability)
mosaic: 0  # image mosaic (probability)
mixup: 0.0  # image mixup (probability)
copy_paste: 0.0  # segment copy-paste (probability)


Overwriting /home/cheeyung/new_yolov5/yolov5/data/test_sampled_coco2017/hyp_custom.yaml
