# ***YOLO V5 Installation***

In [None]:
!git clone https://github.com/ultralytics/yolov5.git > /dev/null
!cd yolov5 && git reset --hard  cce7e78
!cd yolov5 && pip install -r requirements.txt > /dev/null

Cloning into 'yolov5'...
remote: Enumerating objects: 9827, done.[K
remote: Counting objects: 100% (5/5), done.[K
remote: Compressing objects: 100% (5/5), done.[K
remote: Total 9827 (delta 0), reused 2 (delta 0), pack-reused 9822[K
Receiving objects: 100% (9827/9827), 10.15 MiB | 29.70 MiB/s, done.
Resolving deltas: 100% (6822/6822), done.
Checking out files: 100% (104/104), done.
HEAD is now at cce7e78 Created using Colaboratory
  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes
You should consider upgrading via the '/opt/conda/bin/python -m pip install --upgrade pip' command.[0m


# ***Helpers***

In [None]:
import os
import yaml
from PIL import Image
import random
import numpy as np
from shutil import copyfile
import json
from tqdm import tqdm
import matplotlib.pyplot as plt
import os
import json
from tqdm import tqdm
from PIL import Image
from shutil import copyfile
import cv2
import json

def makedirs(path_to_save_labels, path_to_images, dataset_type2image):
    for split_type in dataset_type2image.keys():
        if not os.path.exists(os.path.join(path_to_save_labels, split_type)):
            os.makedirs(os.path.join(path_to_save_labels, split_type))
        if not os.path.exists(os.path.join(path_to_images, split_type)):
            os.makedirs(os.path.join(path_to_images, split_type))




class ConvertCOCOToYOLO:

    """
    Takes in the path to COCO annotations and outputs YOLO annotations in multiple .txt files.
    COCO annotation are to be JSON formart as follows:
        "annotations":{
            "area":2304645,
            "id":1,
            "image_id":10,
            "category_id":4,
            "bbox":[
                0::704
                1:620
                2:1401
                3:1645
            ]
        }
        
    """

    def __init__(self, img_folder, json_path,PATH_TO_SAVE_LABELS):
        self.img_folder = img_folder
        self.json_path = json_path
        self.PATH_TO_SAVE_LABELS = PATH_TO_SAVE_LABELS

    def get_img_shape(self, img_path):
        img = cv2.imread(img_path)
        try:
            return img.shape
        except AttributeError:
            print('error!', img_path)
            return (None, None, None)

    def convert_labels(self, img_path, x1, y1, x2, y2):
        """
        Definition: Parses label files to extract label and bounding box
        coordinates. Converts (x1, y1, x1, y2) KITTI format to
        (x, y, width, height) normalized YOLO format.
        """

        def sorting(l1, l2):
            if l1 > l2:
                lmax, lmin = l1, l2
                return lmax, lmin
            else:
                lmax, lmin = l2, l1
                return lmax, lmin
        size = (1080,1920)
        xmax, xmin = sorting(x1, x2)
        ymax, ymin = sorting(y1, y2)
        dw = 1./size[1]
        dh = 1./size[0]
        x = (xmin + xmax)/2.0
        y = (ymin + ymax)/2.0
        w = xmax - xmin
        h = ymax - ymin
        x = x*dw
        w = w*dw
        y = y*dh
        h = h*dh
        return (x,y,w,h)

    def convert(self,annotation_key='annotations',img_id='image_id',cat_id='category_id',bbox='bbox',image2dataset_type = None):
        # Enter directory to read JSON file
        data = json.load(open(self.json_path))
        
        check_set = set()

        # Retrieve data
        for i in range(len(data[annotation_key])):

            # Get required data
            image_id = data['annotations'][i]['image_id']
            category_id = int(f'{data[annotation_key][i][cat_id]}') - 1
            bbox = data[annotation_key][i]['bbox'] 
            # Retrieve image.
            if self.img_folder == None:
                image_path = f'{image_id}'
            else:
                image_path = f'./{self.img_folder}{image_id}'


            # Convert the data
            kitti_bbox = [bbox[0], bbox[1], bbox[2] + bbox[0], bbox[3] + bbox[1]]
            yolo_bbox = self.convert_labels(image_path, kitti_bbox[0], kitti_bbox[1], kitti_bbox[2], kitti_bbox[3])
            
            # Prepare for export
            
            filename = f'{image_id}.txt'
            content =f"{category_id} {yolo_bbox[0]} {yolo_bbox[1]} {yolo_bbox[2]} {yolo_bbox[3]}"
            split_type = image2dataset_type[image_id]
            # Export 
            if image_id in check_set:
                # Append to existing file as there can be more than one label in each image
                file = open(PATH_TO_SAVE_LABELS +split_type+'/'+ filename, "a")
                file.write("\n")
                file.write(content)
                file.close()

            elif image_id not in check_set:
                check_set.add(image_id)
                # Write files
                file = open(PATH_TO_SAVE_LABELS +split_type+'/'+ filename, "w")
                file.write(content)
                file.close()
            
def get_yolo_labels(
    path_to_bboxes, path_to_save_labels, path_to_images, image2dataset_type, class_names
):
    for file_name in os.listdir(path_to_bboxes):
        with open(os.path.join(path_to_bboxes, file_name), "r") as f:
            img_boxes = json.load(f)
        img_name = file_name[:-5]
        im = Image.open(os.path.join(path_to_images, img_name))
        im_width, im_height = im.size
        split_type = image2dataset_type[img_name]
        if len(img_boxes["bb_objects"]) > 0:
            for box in img_boxes["bb_objects"]:
                x_center = ((box["x1"] + box["x2"]) / 2) / im_width
                y_center = ((box["y1"] + box["y2"]) / 2) / im_height
                width = (box["x2"] - box["x1"]) / im_width
                height = (box["y2"] - box["y1"]) / im_height
                label_class = class_names.index(box["class"])
                with open(
                    os.path.join(
                        path_to_save_labels, split_type, img_name[:-4] + ".txt"
                    ),
                    "a+",
                ) as f:
                    f.write(f"{label_class} {x_center} {y_center} {width} {height}\n")

        # Если не хотите пропускать семплы без разметки
        else:
            open(
                os.path.join(path_to_save_labels, split_type, img_name[:-4] + ".txt"),
                "a",
            ).close()


def copy_images(image_dic,images_name, image2dataset_type, path_to_images):
    for img_path in tqdm(images_name):
        split_type = image2dataset_type[image_dic[img_path]]
        try:
            copyfile(
                os.path.join(path_to_images, img_path),
                os.path.join(path_to_images, split_type, (str(image_dic[img_path]) + '.jpg')),
            )
        except IsADirectoryError:
            continue

# ***Prepare Data and YOLO Annotations***

In [None]:
PATH_TO_BBOXES = 'data_task2/train/annotations/COCO_json/'
PATH_TO_IMAGES = 'data_task2/train/images/'
PATH_TO_SAVE_LABELS = 'data_task2/train/labels/'

CLASS_NAMES = ['Human']

In [None]:
random.seed(0)
IMAGES_NAME = os.listdir(PATH_TO_IMAGES)
random.shuffle(IMAGES_NAME)
train_images = IMAGES_NAME[:int(len(IMAGES_NAME)*0.8)]
val_images = IMAGES_NAME[int(len(IMAGES_NAME)*0.8):]
test_images = IMAGES_NAME[int(len(IMAGES_NAME)*0.8):]
len(train_images), len(val_images), len(test_images)

(400, 100, 100)

In [None]:
dic = json.load(open(PATH_TO_BBOXES + 'coco_annotations_train.json'))

In [None]:
image_dic = dict()
for i in range(len(dic['images'])):
    image_dic[dic['images'][i]['file_name']] = dic['images'][i]['id']
    

In [None]:
DATASET_TYPE2IMAGE = {'train': train_images,
                      'val': val_images,
                      'test': test_images}

IMAGE2DATASET_TYPE = {}
for key, values in DATASET_TYPE2IMAGE.items():
    for file_name in values:
        IMAGE2DATASET_TYPE[image_dic[file_name]] = key

In [None]:
evra_dataset = ["train: ../data_task2/train/images/train/" + "\n",
                 "val: ../data_task2/train/images/test/" + "\n",
                 "test: ../data_task2/train/images/test/" + "\n\n",
                 "nc: 1" + "\n\n",
                 "names: [ 'Human']",
                ]

with open(r'yolov5/data/evra_dataset.yaml', 'w') as f:
    f.writelines(evra_dataset)

In [None]:
makedirs(PATH_TO_SAVE_LABELS, PATH_TO_IMAGES, DATASET_TYPE2IMAGE)

In [None]:
ConvertCOCOToYOLO(img_folder=PATH_TO_IMAGES,json_path=PATH_TO_BBOXES + 'coco_annotations_train.json',PATH_TO_SAVE_LABELS = PATH_TO_SAVE_LABELS).convert(image2dataset_type =IMAGE2DATASET_TYPE)

In [None]:
copy_images(image_dic,IMAGES_NAME, IMAGE2DATASET_TYPE, PATH_TO_IMAGES)

100%|██████████| 500/500 [00:13<00:00, 35.76it/s]


In [None]:

with open("yolov5/data/hyps/hyp.scratch.yaml", "r") as f:
    hyps = yaml.safe_load(f)
    

with open("yolov5/data/hyps/hyp_evra_.yaml", 'w') as f:
    yaml.dump(hyps, f)

# ***Training***

In [None]:
!cd yolov5 && python train.py --img 1280 --batch 8 --epochs 30 --data evra_dataset.yaml --weights yolov5m6.pt --hyp data/hyps/hyp_evra_.yaml --name exp6 --workers 0

[34m[1mtrain: [0mweights=yolov5m6.pt, cfg=, data=evra_dataset.yaml, hyp=data/hyps/hyp_evra_.yaml, epochs=30, batch_size=8, imgsz=1280, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, evolve=None, bucket=, cache=None, image_weights=False, device=, multi_scale=False, single_cls=False, adam=False, sync_bn=False, workers=0, entity=None, project=runs/train, name=exp6, exist_ok=False, quad=False, linear_lr=False, label_smoothing=0.0, upload_dataset=False, bbox_interval=-1, save_period=-1, artifact_alias=latest, local_rank=-1, freeze=0, patience=100
[34m[1mgithub: [0m⚠️ YOLOv5 is out of date by 58 commits. Use `git pull` or `git clone https://github.com/ultralytics/yolov5` to update.
YOLOv5 🚀 v5.0-491-gcce7e78 torch 1.10.0+cu102 CUDA:0 (Tesla T4, 15109.75MB)

[34m[1mhyperparameters: [0manchor_t=4.0, box=0.05, cls=0.5, cls_pw=1.0, copy_paste=0.0, degrees=0.0, fl_gamma=0.0, fliplr=0.5, flipud=0.0, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, iou_t=0.2, lr0=0.01, lrf=0.2,

# ***Finetune for bigger image size***

In [None]:

with open("yolov5/data/hyps/hyp_evra_.yaml", "r") as f:
    hyps = yaml.safe_load(f)
    hyps['lr0'] = 0.0005
    hyps['lrf'] = 1
with open("yolov5/data/hyps/hyp_evra_.yaml", 'w') as f:
    yaml.dump(hyps, f)

In [None]:
!cd yolov5 && python train.py --img 2048 --batch 2 --epochs 10 --data evra_dataset.yaml --weights runs/train/exp66/weights/best.pt --hyp data/hyps/hyp_evra_.yaml --name exp2048 --workers 0

[34m[1mtrain: [0mweights=runs/train/exp66/weights/best.pt, cfg=, data=evra_dataset.yaml, hyp=data/hyps/hyp_evra_.yaml, epochs=10, batch_size=2, imgsz=2048, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, evolve=None, bucket=, cache=None, image_weights=False, device=, multi_scale=False, single_cls=False, adam=False, sync_bn=False, workers=0, entity=None, project=runs/train, name=exp2048, exist_ok=False, quad=False, linear_lr=False, label_smoothing=0.0, upload_dataset=False, bbox_interval=-1, save_period=-1, artifact_alias=latest, local_rank=-1, freeze=0, patience=100
[34m[1mgithub: [0m⚠️ YOLOv5 is out of date by 58 commits. Use `git pull` or `git clone https://github.com/ultralytics/yolov5` to update.
YOLOv5 🚀 v5.0-491-gcce7e78 torch 1.10.0+cu102 CUDA:0 (Tesla T4, 15109.75MB)

[34m[1mhyperparameters: [0manchor_t=4.0, box=0.05, cls=0.5, cls_pw=1.0, copy_paste=0.0, degrees=0.0, fl_gamma=0.0, fliplr=0.5, flipud=0.0, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, iou_t