<a href="https://colab.research.google.com/github/guthi1/mooc-exercises/blob/daffy-project/project/solution/dev/yolo_training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Dependencies

In [1]:
import os
import contextlib
@contextlib.contextmanager
def directory(name):
  ret = os.getcwd()
  os.chdir(name)
  yield None
  os.chdir(ret)

import subprocess
def run(input, exception_on_failure=False):
  try:
    program_output = subprocess.check_output(f"{input}", shell=True, universal_newlines=True, stderr=subprocess.STDOUT)
  except Exception as e:
    if exception_on_failure:
      raise e
    program_output = e.output

    return program_output

def runp(input, exception_on_failure=False):
    print(input)
    print(run(input, exception_on_failure))

#make boxes to xywh format:
def xminyminxmaxymax2xywfnormalized(box, image_size):
    xmin, ymin, xmax, ymax = np.array(box, dtype=np.float64)
    center_x = (xmin+xmax)/2
    center_y = (ymin+ymax)/2
    width = xmax-xmin
    height = ymax-ymin

    normalized = np.array([center_x, center_y, width, height])/image_size
    return np.round(normalized, 5)

def train_test_split(filenames, split_percentage, dataset_dir):
    train_txt = np.array(filenames)
    np.random.shuffle(train_txt)
    nb_things = len(train_txt)
    sp = int(split_percentage * nb_things)
    train_txt, val_txt = train_txt[:sp], train_txt[sp:]

    print("ALL IMAGE NAMES TO MOVE DURING THIS SPLIT:", filenames)
    print("DATASET DIRECTORY", dataset_dir)

    def mv(img_name, to_train):
        print("MOVING IMG NAMED", img_name)

        dest = "train" if to_train else "val"
        runp(f"mv {dataset_dir}/images/{img_name}.jpg {dataset_dir}/{dest}/images/{img_name}.jpg")
        runp(f"mv {dataset_dir}/labels/{img_name}.txt {dataset_dir}/{dest}/labels/{img_name}.txt")

    for img in train_txt:
        mv(img, True)
    for img in val_txt:
        mv(img, False)


@contextlib.contextmanager
def makedirs(name):
    try:
        os.makedirs(name)
    except:
        pass
    yield None

@contextlib.contextmanager
def directory(name):
    ret = os.getcwd()
    os.chdir(name)
    yield None
    os.chdir(ret)

def makedirs(name):
    try:
        os.makedirs(name)
    except:
        pass
    yield None

def seed(seed):
    # torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

def launch_env(map):
    import gym_duckietown
    from gym_duckietown.envs import DuckietownEnv
    env = DuckietownEnv(
        map_name=map,
        domain_rand=False,
        max_steps=math.inf,
    )
    return env

import cv2

def _mod_mask(mask):
    temp = mask.copy()
    temp[temp == 1] = 50
    temp[temp == 2] = 100
    temp[temp == 3] = 150
    temp[temp == 4] = 200
    temp = temp.astype("uint8")
    mask = cv2.applyColorMap(temp, cv2.COLORMAP_RAINBOW)
    return mask

def display_img_seg_mask(real_img, seg_img):
    all = np.concatenate(
        (cv2.cvtColor(real_img, cv2.COLOR_RGB2BGR), seg_img),
        axis=1
    )

    cv2.imshow("image", all)
    cv2.waitKey(0)

def prun(input, exception_on_failure=False):
  x = run(input, exception_on_failure)
  print(x)
  return x

In [2]:
DATASET_DIR="/dt_dataset"
IMAGE_SIZE = 416
# this is the percentage of data that will go into the training set (as opposed to the testing set)
SPLIT_PERCENTAGE = 0.8

## download dataset

In [None]:
# Download data:
runp(f"rm -rf {DATASET_DIR}")
runp(f"mkdir {DATASET_DIR}")
runp(f"mkdir {DATASET_DIR}/images")
runp(f"mkdir {DATASET_DIR}/labels")
runp(f"mkdir {DATASET_DIR}/train")
runp(f"mkdir {DATASET_DIR}/val")
runp(f"mkdir {DATASET_DIR}/train/images")
runp(f"mkdir {DATASET_DIR}/train/labels")
runp(f"mkdir {DATASET_DIR}/val/images")
runp(f"mkdir {DATASET_DIR}/val/labels")
!wget -O duckietown_object_detection_dataset.zip https://www.dropbox.com/s/bpd535fzmj1pz5w/duckietown%20object%20detection%20dataset-20201129T162330Z-001.zip?dl=0
runp(f"unzip -q duckietown_object_detection_dataset.zip -d {DATASET_DIR}")
runp(f"mv {DATASET_DIR}/duckietown\ object\ detection\ dataset/* {DATASET_DIR} && rm -rf {DATASET_DIR}/duckietown\ object\ detection\ dataset")
runp(f"rm duckietown_object_detection_dataset.zip")

Resize the images

In [8]:
import json
import os
import cv2
import numpy as np
from tqdm import tqdm

with open(f"{DATASET_DIR}/annotation/final_anns.json") as anns:
    annotations = json.load(anns)

In [None]:
npz_index = 0

all_image_names = []
    
def save_img(img, boxes, classes):
    global npz_index
    cv2.imwrite(f"{DATASET_DIR}/images/real_{npz_index}.jpg", img)
    with open(f"{DATASET_DIR}/labels/real_{npz_index}.txt", "w") as f:
        for i in range(len(boxes)):
            f.write(f"{classes[i]} "+" ".join(map(str,boxes[i]))+"\n")
    npz_index += 1
    all_image_names.append(f"real_{npz_index}")

filenames = tqdm(os.listdir(f"{DATASET_DIR}/frames"))
for filename in filenames:
    img = cv2.imread(f"{DATASET_DIR}/frames/{filename}")

    orig_y, orig_x = img.shape[0], img.shape[1]
    scale_y, scale_x = IMAGE_SIZE/orig_y, IMAGE_SIZE/orig_x

    img = cv2.resize(img, (IMAGE_SIZE,IMAGE_SIZE))

    boxes = []
    classes = []

    if filename not in annotations:
        continue

    for detection in annotations[filename]:
        box = detection["bbox"]
        label = detection["cat_name"]

        if label not in ["duckie", "cone"]:
            continue

        orig_x_min, orig_y_min, orig_w, orig_h = box

        x_min = int(np.round(orig_x_min * scale_x))
        y_min = int(np.round(orig_y_min * scale_y))
        x_max = x_min + int(np.round(orig_w * scale_x))
        y_max = y_min + int(np.round(orig_h * scale_y))

        boxes.append([x_min, y_min, x_max, y_max])
        classes.append(1 if label == "duckie" else 2)

    if len(boxes) == 0:
        continue

    boxes = np.array([xminyminxmaxymax2xywfnormalized(box, IMAGE_SIZE) for box in boxes])
    classes = np.array(classes)-1
    
    save_img(img, boxes, classes)

train_test_split(all_image_names, SPLIT_PERCENTAGE, DATASET_DIR)

Mounts your google drive to move the data

In [10]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [14]:
runp(f"mv {DATASET_DIR} /content/drive/MyDrive/dt_dataset")

mv /dt_dataset /content/drive/MyDrive/dt_dataset
None


## Extract dataset from drive

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
runp(f"cp -r /content/drive/MyDrive/dt_dataset /content{DATASET_DIR} ")

cp -r /content/drive/MyDrive/dt_dataset /content/dt_dataset 
None


In [5]:
os.chdir(f'/content{DATASET_DIR}')

In [6]:
if not os.path.exists("SENTINEL"):
  prun("mkdir duckietown_dataset")
  prun("mv train duckietown_dataset && mv val duckietown_dataset")

None
None


## Clone Yolov5

In [7]:
!git clone https://github.com/guthi1/yolov5.git -b dt-obj-det
!cd yolov5 && pip3 install -r requirements.txt
!pip3 install torch==1.11 torchvision==0.12.0
if not os.path.exists("SENTINEL"):
  run("mv duckietown_dataset yolov5")
!touch SENTINEL

Cloning into 'yolov5'...
remote: Enumerating objects: 6166, done.[K
remote: Counting objects: 100% (4/4), done.[K
remote: Compressing objects: 100% (4/4), done.[K
remote: Total 6166 (delta 0), reused 0 (delta 0), pack-reused 6162[K
Receiving objects: 100% (6166/6166), 8.48 MiB | 13.39 MiB/s, done.
Resolving deltas: 100% (4207/4207), done.
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting thop
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Installing collected packages: thop
Successfully installed thop-0.1.1.post2209072238
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torch==1.11
  Downloading torch-1.11.0-cp38-cp38-manylinux1_x86_64.whl (750.6 MB)
[K     |████████████████████████████████| 750.6 MB 18 kB/s 
[?25hCollecting torchvision==0.12.0
  Downloading torchvision-0.12.0-cp38-cp38-manylinux1_x86_64.whl (21.0 MB)
[K     |████████████████

# Training 

## Logger

Create a loggin system with wandb

In [9]:
%pip install -q wandb 
import wandb
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mguthi1[0m ([33mxabjuwplb[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

## Training

In [11]:
!mv yolov5/best.pt yolov5/best_old.pt
!cd yolov5 && pip3 install -r requirements.txt && python3 train.py --img 416 --batch 16 --epochs 30 --data duckietown.yaml --weights yolov5s.pt

import numpy as np

all_exps = os.listdir("yolov5/runs/train")
all_exps_filtered = map(lambda x: int(x.replace("exp", "1")), filter(lambda x: x.startswith("exp"), all_exps))
all_exps_filtered = np.array(list(all_exps))
latest_exp_index = np.argmax(all_exps)
latest_exp = all_exps[latest_exp_index]
print(f"Latest exp is {latest_exp}")

prun(f"cp yolov5/runs/train/{latest_exp}/weights/best.pt yolov5/best.pt")

mv: cannot stat 'yolov5/best.pt': No such file or directory
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
[34m[1mgithub: [0mup to date with https://github.com/guthi1/yolov5 ✅
YOLOv5 🚀 v5.0-69-g68abb22 torch 1.11.0+cu102 CUDA:0 (Tesla T4, 15109.75MB)

Namespace(adam=False, artifact_alias='latest', batch_size=16, bbox_interval=-1, bucket='', cache_images=False, cfg='', data='./data/duckietown.yaml', device='', entity=None, epochs=30, evolve=False, exist_ok=False, global_rank=-1, hyp='data/hyp.scratch.yaml', image_weights=False, img_size=[416, 416], label_smoothing=0.0, linear_lr=False, local_rank=-1, multi_scale=False, name='exp', noautoanchor=False, nosave=False, notest=False, project='runs/train', quad=False, rect=False, resume=False, save_dir='runs/train/exp2', save_period=-1, single_cls=False, sync_bn=False, total_batch_size=16, upload_dataset=False, weights='yolov5s.pt', workers=8, world_size=1)
[34m[1mtensorboard: [0mStart 

# Results the model

In [17]:
import sys
import torch

def select_device(device='', batch_size=None):
    import torch
    # device = 'cpu' or '0' or '0,1,2,3'
    cpu = device.lower() == 'cpu'
    if cpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = '-1'  # force torch.cuda.is_available() = False
    elif device:  # non-cpu device requested
        os.environ['CUDA_VISIBLE_DEVICES'] = device  # set environment variable
        assert torch.cuda.is_available(), f'CUDA unavailable, invalid device {device} requested'  # check availability

    cuda = not cpu and torch.cuda.is_available()

    return torch.device('cuda:0' if cuda else 'cpu')

In [None]:
# model = torch.load("./yolov5/best.pt", map_location=select_device("cpu"))['model'].float()  # load to FP32
# img = 'https://ultralytics.com/images/zidane.jpg'  # or file, Path, PIL, OpenCV, numpy, list
# # Inference
# results = model(img)

# # Results
# results.print()  # or .show(), .save(), .crop(), .pandas(), etc.


## Upload model to Duckietown's cloud

In [None]:
!pip3 install git+https://github.com/duckietown/lib-dt-mooc-2021

In [22]:
from dt_mooc.cloud import Storage
import sys
import torch

def select_device(device='', batch_size=None):
    import torch
    # device = 'cpu' or '0' or '0,1,2,3'
    cpu = device.lower() == 'cpu'
    if cpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = '-1'  # force torch.cuda.is_available() = False
    elif device:  # non-cpu device requested
        os.environ['CUDA_VISIBLE_DEVICES'] = device  # set environment variable
        assert torch.cuda.is_available(), f'CUDA unavailable, invalid device {device} requested'  # check availability

    cuda = not cpu and torch.cuda.is_available()

    return torch.device('cuda:0' if cuda else 'cpu')

sys.path.insert(0, './yolov5')
model = torch.load("./yolov5/best.pt", map_location=select_device("cpu"))['model'].float()  # load to FP32
model.to(select_device("cpu")).eval()

storage = Storage("dt1-3nT8KSoxVh4Migd7N6Nsjy5q8BHtzjcsyz57x9FyJbx5UhJ-43dzqWFnWd8KBa1yev1g3UKnzVxZkkTbfex5eXnmoSTSmB3YdtDmc5tQuXNDk3cQ74")

storage.upload_yolov5("yolov5", model, "./yolov5/best.pt")

Uploading file `best.pt`...



File `best.pt` successfully uploaded! It will now be found at `courses/mooc/2021/data/nn_models/yolov5.pt`.
Uploading file `best.pt.wts`...

File `best.pt.wts` successfully uploaded! It will now be found at `courses/mooc/2021/data/nn_models/yolov5.wts`.
Uploading file `best.pt.wts.sha256`...

File `best.pt.wts.sha256` successfully uploaded! It will now be found at `courses/mooc/2021/data/nn_models/yolov5.sha256`.


# Done!