# Prep

In [None]:
!test -d yolov5 || git clone https://github.com/ultralytics/yolov5
!test -f tacotrashdataset.zip  && test -d tacotrashdataset || unzip tacotrashdataset.zip  -d tacotrashdataset
!test -f yolov5-taco.zip && test -d yolov5-taco || unzip yolov5-taco.zip -d yolov5-taco
%cd yolov5
!pip install --upgrade pip
!pip install requirements.txt
!pip install pycocotools split-folders wandb torch==1.7.1 tensorboard opencv-python torchvision

# New Training ~ 6 hours on 16 i9 cores

In [None]:
from pycocotools.coco import COCO
import numpy as np
import tqdm
import matplotlib.pyplot as plt
%matplotlib inline
annotation_path = '../tacotrashdataset/data/annotations.json'

data_source = COCO(annotation_file=annotation_path)
catIds = data_source.getCatIds()
categories = data_source.loadCats(catIds)
categories.sort(key=lambda x: x['id'])

### Class dictionary

In [None]:
classes = {}
taco_labels = {}
taco_labels_inverse = {}

for c in categories:
    taco_labels[len(classes)] = c['id']
    taco_labels_inverse[c['id']] = len(classes)
    classes[c['name']] = len(classes)

### Split data for training, validation, testing

In [None]:
!mkdir -p tmp/labels tmp/images

In [None]:
IMAGES_PATH = 'tmp/images/'
LABELS_PATH = 'tmp/labels/'
import shutil
import os
img_ids = data_source.getImgIds()

for index, img_id in tqdm.tqdm(enumerate(img_ids)):
    img_info = data_source.loadImgs(img_id)[0]
    # img_dir: batch_x/.....jpg ---> batch_x_......jpg
    img_dir = img_info['file_name'].replace('/', '_')
    
    image_name = img_dir.split('.')[0]
    label_dir = LABELS_PATH + image_name + '.txt'
    
    height = img_info['height']
    width = img_info['width']

    # get images
    shutil.copy(f"../tacotrashdataset/data/{img_info['file_name']}", os.path.join(IMAGES_PATH, img_dir))

    # get labels
    with open(label_dir, mode='w') as fp:
        annotation_id = data_source.getAnnIds(img_id)
        if len(annotation_id) == 0:
            fp.write('')
            continue
        boxes = np.zeros((0, 5))
        annotations = data_source.loadAnns(annotation_id)
        lines = ''
        for annotation in annotations:
            label = taco_labels_inverse[annotation['category_id']]
            box = annotation['bbox']
            # some annotations have basically no width / height (extremely small), skip them
            if box[2] < 1 or box[3] < 1:
                continue
            # top_x,top_y,width,height ----> cen_x,cen_y,width,height
            # standardize to 0-1
            box[0] = round((box[0] + box[2] / 2) / width, 6)
            box[1] = round((box[1] + box[3] / 2) / height, 6)
            box[2] = round(box[2] / width, 6)
            box[3] = round(box[3] / height, 6)
            # line: label x_center y_center width height
            lines = lines + str(label)
            for i in box:
                lines += ' ' + str(i)
            lines += '\n'
        fp.writelines(lines)

In [None]:
print(len(os.listdir(IMAGES_PATH)))
print(len(os.listdir(LABELS_PATH)))

In [None]:
import splitfolders
splitfolders.ratio('tmp', output='taco', seed=1337, ratio=(.8, 0.1,0.1)) 

Check files:

In [None]:
print(sorted(os.listdir('taco/train/images'))[:5])
print(sorted(os.listdir('taco/val/images'))[:5])
print(sorted(os.listdir('taco/test/images'))[:5])

Remove tmp fodler

In [None]:
!rm -r tmp

Run this if the error "a view of a leaf Variable that requires grad is being used in an in-place operation", then train again

In [None]:
import time
from pathlib import Path
import shutil

RUNS_DIR=Path("runs/train")
PRUN_DIR=Path(f'prev_runs/{time.strftime("%Y%m%d_%H%M%S")}')
PRUN_DIR.mkdir(parents=True, exist_ok=True)
for i in RUNS_DIR.glob('*'):
    print(f'moving previous run {i} to {PRUN_DIR}')
    shutil.move(str(i), str(PRUN_DIR))

In [None]:
# !python train.py -h
# !python export.py --help

In [None]:
# Be forewanred, this takes ~6 hours on 16 i9 cores
import time
import shutil
start = time.time()
!wandb disabled
!python train.py --multi-scale \
   --img 640 \
   --batch 64 \
   --epochs 50 \
   --data ../yolov5-taco/taco.yaml

# disabling this to see what no "last trained looks like"
#   --weights ../yolov5-taco/50_epochs_trained_last.pt
end = time.time()

In [None]:
print('Training time:', end - start, 'seconds')

### Show the training result

In [None]:
%cd 'runs/train/exp'
%ls

In [None]:
res_path = 'results.png'
img = plt.imread(res_path)
plt.figure(figsize=(20, 20))
plt.imshow(img)
plt.xticks([])
plt.yticks([])
plt.show()

In [None]:
%cd /home/jovyan/work/yolov5
!python detect.py \
   --weights runs/train/exp/weights/best.pt \
   --img 640 --conf 0.25 --source taco/test/images 

In [None]:
OUT_PATH = 'runs/detect/exp'
cnt = 0
for file in os.listdir(OUT_PATH):
    img = plt.imread(os.path.join(OUT_PATH, file))
    plt.figure(figsize=(15, 15))
    plt.imshow(img)
    plt.xticks([])
    plt.yticks([])
    plt.show()
    cnt += 1

In [None]:
# Generate web model
import time
from pathlib import Path
import shutil
import os

BN="trained"
SRC_DIR=Path('runs/train/exp')
SAVE_PT=SRC_DIR.joinpath('weights/best.pt')
PRUN_DIR=Path('prev_runs')
TS=time.time()
DEST_DIR=Path(f'{PRUN_DIR}/{TS}')
DEST_DIR.mkdir(parents=True, exist_ok=True)
DEST_FILE=DEST_DIR.joinpath(f'{BN}.pt')
MODEL_DIR=DEST_DIR.joinpath(f'{TS}/{BN}_web_model')
print(f'moving previous run {SAVE_PT} to {DEST_FILE}')
shutil.move(str(SAVE_PT), str(DEST_FILE))
cmd = f'''
python export.py \
   --weights  {DEST_FILE} \
   --include tfjs 
'''
os.system(cmd)
shutil.rmtree(str(SRC_DIR))
