In [None]:
!wget http://images.cocodataset.org/zips/train2017.zip -q --show-progress
!wget https://s3-us-west-2.amazonaws.com/dl.fbaipublicfiles.com/LVIS/lvis_v1_train.json.zip -q --show-progress

!unzip -qq "./train2017.zip" -d "./train"
!unzip -qq "./lvis_v1_train.json.zip" -d "./labels"

!pip install -q lvis
!pip install -qr https://raw.githubusercontent.com/ultralytics/yolov5/master/requirements.txt
!git clone https://github.com/ultralytics/yolov5


In [None]:
import os
import yaml

from shutil import copyfile

import numpy as np

from sklearn.model_selection import train_test_split

import torch

from lvis import LVIS

from tqdm import tqdm

device = "cuda" if torch.cuda.is_available() else "cpu"
torch.device(device)


device(type='cuda')

In [None]:
lvis_api = LVIS(
    "labels/lvis_v1_train.json"
)  # колаб крашится если попробовать сделать json.load


In [None]:
def get_helmet_dataset_dicts(lvis_api):
    img_ids = sorted(lvis_api.imgs.keys())
    imgs = lvis_api.load_imgs(img_ids)
    anns = [lvis_api.img_ann_map[img_id] for img_id in img_ids]
    
    def get_file_name(img_root, img_dict):
        split_folder, file_name = img_dict["coco_url"].split("/")[-2:]
        return os.path.join(img_root + split_folder, file_name)

    dataset_dicts = []

    for (img_dict, anno_dict_list) in zip(imgs, anns):
        record = {}
        record["file_name"] = get_file_name("train/", img_dict)
        record["height"] = img_dict["height"]
        record["width"] = img_dict["width"]
        record["image_id"] = img_dict["id"]
        objs = []
        has_helmet = False
        for anno in anno_dict_list:
            # helmet & football_helmet, больше ничего с подстрокой helmet нет
            if anno["category_id"] == 556 or anno["category_id"] == 467:
                objs.append(anno["bbox"])
                has_helmet = True
        record["bbox"] = objs
        if has_helmet:
            dataset_dicts.append(record)
            has_helmet = False
    return dataset_dicts


In [None]:
dataset_dicts = get_helmet_dataset_dicts(lvis_api)
train_dataset_dicts, val_dataset_dicts = train_test_split(dataset_dicts, test_size=0.1)


In [None]:
os.makedirs("./lvis/images/train", exist_ok=True)
os.makedirs("./lvis/images/valid", exist_ok=True)

os.makedirs("./lvis/labels/train", exist_ok=True)
os.makedirs("./lvis/labels/valid", exist_ok=True)

for img_data in tqdm(train_dataset_dicts):
    img_path = img_data["file_name"]
    img_name = img_path.split("/")[-1]
    copyfile(f"{img_path}", f"./lvis/images/train/{img_name}")

for img_data in tqdm(val_dataset_dicts):
    img_path = img_data["file_name"]
    img_name = img_path.split("/")[-1]
    copyfile(f"{img_path}", f"./lvis/images/valid/{img_name}")


100%|██████████| 1718/1718 [00:06<00:00, 252.07it/s]
100%|██████████| 191/191 [00:00<00:00, 232.43it/s]


In [None]:
data_yaml = dict(
    train="../lvis/images/train", val="../lvis/images/valid", nc=1, names=["helmet"]
)

with open("./yolov5/data/data.yaml", "w") as outfile:
    yaml.dump(data_yaml, outfile, default_flow_style=True)


In [None]:
def create_labels(dataset_dicts, is_train=True):
    for img_data in tqdm(dataset_dicts):
        img_path = img_data["file_name"]
        img = cv2.imread(img_path)
        img_h, img_w, _ = img.shape

        bboxes = []
        for i in range(len(img_data["bbox"])):
            bbox = img_data["bbox"][i]
            x, y, w, h = bbox
            # формат для YOLO
            x_c, y_c, w, h = x + w / 2, y + h / 2, w, h
            bboxes.append([x_c / img_w, y_c / img_h, w / img_w, h / img_h])

        img_name = img_path.split("/")[-1]
        img_name = img_name[:-4]  # remove .jpg

        file_name = f'./lvis/labels/{"train" if is_train else "valid"}/{img_name}.txt'

        with open(file_name, "w") as f:
            for i, bbox in enumerate(bboxes):
                label = 0
                bbox = [label] + bbox
                bbox = [str(i) for i in bbox]
                bbox = " ".join(bbox)
                f.write(bbox)
                f.write("\n")


In [None]:
create_labels(train_dataset_dicts, is_train=True)
create_labels(val_dataset_dicts, is_train=False)


100%|██████████| 1718/1718 [00:10<00:00, 162.88it/s]
100%|██████████| 191/191 [00:01<00:00, 164.63it/s]


In [None]:
!python yolov5/train.py --batch 24 \
                        --epochs 10 \
                        --data data.yaml \
                        --weights yolov5x.pt \
                        --freeze 12 \
                        --save-period 1 \
                        --project lvis

[34m[1mtrain: [0mweights=yolov5x.pt, cfg=, data=data.yaml, hyp=yolov5/data/hyps/hyp.scratch-low.yaml, epochs=20, batch_size=24, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, bucket=, cache=None, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=lvis, name=exp, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[12], save_period=1, seed=0, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest
[34m[1mgithub: [0mup to date with https://github.com/ultralytics/yolov5 ✅
YOLOv5 🚀 v7.0-41-g10e93d2 Python-3.8.16 torch-1.13.0+cu116 CUDA:0 (Tesla T4, 15110MiB)

[34m[1mhyperparameters: [0mlr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=0.05, cls=0.5, cls_pw=1.0, obj=1.0, obj_pw=1.0, iou_t=0.2, anchor_t=4.0, fl_gamma=0.0

In [19]:
idx = np.random.randint(0, len(val_dataset_dicts))
file_path = val_dataset_dicts[idx]["file_name"]
file_name = file_path.split("/")[-1]
print(file_name)


000000503707.jpg


In [25]:
!python yolov5/detect.py --weights ./lvis/exp5/weights/last.pt \
                         --source ./lvis/images/valid/{file_name} \
                         --save-txt \
                         --save-conf \
                         --project lvis

[34m[1mdetect: [0mweights=['./lvis/exp5/weights/last.pt'], source=./lvis/images/valid/000000503707.jpg, data=yolov5/data/coco128.yaml, imgsz=[640, 640], conf_thres=0.25, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=True, save_conf=True, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=lvis, name=exp, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False, vid_stride=1
YOLOv5 🚀 v7.0-41-g10e93d2 Python-3.8.16 torch-1.13.0+cu116 CUDA:0 (Tesla T4, 15110MiB)

Fusing layers... 
Model summary: 322 layers, 86173414 parameters, 0 gradients, 203.8 GFLOPs
image 1/1 /content/lvis/images/valid/000000503707.jpg: 448x640 3 helmets, 72.5ms
Speed: 0.7ms pre-process, 72.5ms inference, 1.7ms NMS per image at shape (1, 3, 640, 640)
Results saved to [1mlvis/exp10[0m
1 labels saved to lvis/exp10/labels
