### Jupyter code for yolov8 training.

In [1]:
# import necessary packages:
import numpy as np
import os
import torch
import pandas as pd
import json
import cv2
from tqdm import tqdm
import shutil
import yaml
import random

In [2]:
def create_empty_yaml_file(file_path):
    # Create an empty data structure
    data = {}
    # Open the file in write mode and dump the empty data to YAML
    with open(file_path, 'w') as file:
        yaml.dump(data, file)

In [3]:
# Edit yaml content for raw yolov8 default normalization & augmentation
create_raw_yolo8_yaml = False # already created
if create_raw_yolo8_yaml:
    yaml_file_src = r'C:\Users\Kevin\PycharmProjects\hubmap\detection_model\yolov8_data.yaml'
    create_empty_yaml_file(yaml_file_src)
    yaml_content = f'''
    train: \fatherserverdw\Kevin\hubmap\yolov8\train\images
    val: \fatherserverdw\Kevin\hubmap\yolov8\val\images

    names:
        0: blood_vessel
    '''
    with open(yaml_file_src, 'w') as f:
        f.write(yaml_content)
        yaml.dump

### Then edit .yaml manually to fix!

In [4]:
# Edit yaml content for randstainna normalization & augmentation (images already transformed)
create_raw_yolo8_yaml = False # already created
if create_raw_yolo8_yaml:
    yaml_file_src = r'C:\Users\Kevin\PycharmProjects\hubmap\detection_model\yolov8_data_randstain.yaml'
    create_empty_yaml_file(yaml_file_src)
    yaml_content = f'''
    train: \fatherserverdw\Kevin\hubmap\yolov8_randstain\train\images
    val: \fatherserverdw\Kevin\hubmap\yolov8_randstain\val\images

    names:
        0: blood_vessel

    nc: 1
    '''
    with open(yaml_file_src, 'w') as f:
        f.write(yaml_content)
        yaml.dump

### Then edit .yaml manually to fix!

### Now to train yolov8:

In [2]:
from ultralytics import YOLO
import ultralytics
ultralytics.checks()

Ultralytics YOLOv8.0.113  Python-3.10.11 torch-2.0.1+cu117 CUDA:0 (NVIDIA GeForce RTX 3090 Ti, 24564MiB)
Setup complete  (24 CPUs, 127.8 GB RAM, 702.3/833.2 GB disk)


In [3]:
model = YOLO('yolov8m.pt') #detection model

In [7]:
# %load_ext tensorboard
# %tensorboard --logdir C:\Users\Kevin\PycharmProjects\hubmap\detection_model\runs

In [4]:
# sets the seed of the entire notebook so results are the same every time we run for reproducibility. no randomness, everything is controlled.
def set_seed(seed = 42):
    np.random.seed(seed) #numpy specific random
    random.seed(seed) # python specific random (also for albumentation augmentations)
    torch.manual_seed(seed) # torch specific random
    torch.cuda.manual_seed(seed) # cuda specific random
    # when running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False # when deterministic = true, benchmark = False, otherwise might not be deterministic
    os.environ['PYTHONHASHSEED'] = str(seed)  # set a fixed value for the hash seed, for hases like dictionary

set_seed(seed=42)

In [9]:
model.train(data=r'C:\Users\Kevin\PycharmProjects\hubmap\detection_model\yolov8_data.yaml',device = 0, batch = 16 ,epochs=50, imgsz = 512, verbose = True, deterministic = True, name = 'baseline, no randstain', cfg='default.yaml') # no randstain, baseline yolov8 model

New https://pypi.org/project/ultralytics/8.0.115 available  Update with 'pip install -U ultralytics'
cfg file passed. Overriding default params with default.yaml.
Ultralytics YOLOv8.0.113  Python-3.10.11 torch-2.0.1+cu117 CUDA:0 (NVIDIA GeForce RTX 3090 Ti, 24564MiB)
[34m[1myolo\engine\trainer: [0mtask=detect, mode=train, model=None, data=C:\Users\Kevin\PycharmProjects\hubmap\detection_model\yolov8_data.yaml, epochs=50, patience=50, batch=16, imgsz=512, save=True, save_period=-1, cache=False, device=0, workers=8, project=None, name=baseline, no randstain, exist_ok=False, pretrained=False, optimizer=SGD, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=0, resume=False, amp=True, fraction=1.0, profile=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, sa

In [10]:
# Edit yaml content for randstainna normalization & augmentation (images already transformed)
create_raw_yolo8_yaml = False # already created
if create_raw_yolo8_yaml:
    yaml_file_src = r'C:\Users\Kevin\.conda\envs\hubmap\Lib\site-packages\ultralytics\yolo\cfg\custom_cfg.yaml'
    create_empty_yaml_file(yaml_file_src)

    with open(yaml_file_src, 'w') as f:
        f.write(yaml_content)
        yaml.dump

### Then edit .yaml manually to fix (copy paste default.yaml in the cfg file, and then edit in pycharm manually!)

In [11]:
model.train(data=r'C:\Users\Kevin\PycharmProjects\hubmap\detection_model\yolov8_data_randstain.yaml',device = 0, batch = 16 ,epochs=50, imgsz = 512, verbose = True, deterministic = True, name = 'baseline_randstain', cfg='custom_cfg.yaml') # randstain images with SGD and no NMS

New https://pypi.org/project/ultralytics/8.0.115 available  Update with 'pip install -U ultralytics'
cfg file passed. Overriding default params with custom_cfg.yaml.
Ultralytics YOLOv8.0.113  Python-3.10.11 torch-2.0.1+cu117 CUDA:0 (NVIDIA GeForce RTX 3090 Ti, 24564MiB)
[34m[1myolo\engine\trainer: [0mtask=detect, mode=train, model=None, data=C:\Users\Kevin\PycharmProjects\hubmap\detection_model\yolov8_data_randstain.yaml, epochs=50, patience=50, batch=16, imgsz=512, save=True, save_period=-1, cache=False, device=0, workers=8, project=None, name=baseline_randstain, exist_ok=False, pretrained=False, optimizer=Adam, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=0, resume=False, amp=True, fraction=1.0, profile=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf

In [13]:
model.train(data=r'C:\Users\Kevin\PycharmProjects\hubmap\detection_model\yolov8_data_randstain.yaml',device = 0, batch = 16 ,epochs=50, imgsz = 512, verbose = True, deterministic = True, name = 'baseline_randstain_adam', cfg='custom_cfg.yaml') # randstain images with adam and NMS IOU = 0.7 (NMS with adam is better vs no NMS with adam)

New https://pypi.org/project/ultralytics/8.0.115 available  Update with 'pip install -U ultralytics'
cfg file passed. Overriding default params with custom_cfg.yaml.
Ultralytics YOLOv8.0.113  Python-3.10.11 torch-2.0.1+cu117 CUDA:0 (NVIDIA GeForce RTX 3090 Ti, 24564MiB)
[34m[1myolo\engine\trainer: [0mtask=detect, mode=train, model=None, data=C:\Users\Kevin\PycharmProjects\hubmap\detection_model\yolov8_data_randstain.yaml, epochs=50, patience=50, batch=16, imgsz=512, save=True, save_period=-1, cache=False, device=0, workers=8, project=None, name=baseline_randstain_adam, exist_ok=False, pretrained=False, optimizer=Adam, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=0, resume=False, amp=True, fraction=1.0, profile=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save

In [18]:
### Trying segmentation with same conditions as above, but just changing .yaml file's task from detect to segment. We can see here which model is more fitting.

In [12]:
model2 = YOLO('yolov8x-seg.pt')

Downloading https:\github.com\ultralytics\assets\releases\download\v0.0.0\yolov8x-seg.pt to yolov8x-seg.pt...
100%|██████████| 137M/137M [00:03<00:00, 36.5MB/s] 


In [13]:
model2.train(data=r'C:\Users\Kevin\PycharmProjects\hubmap\detection_model\yolov8_data.yaml',device = 0, batch = 16 ,epochs=50, imgsz = 512, verbose = True, deterministic = True, name = 'baseline, no randstain', cfg='default.yaml') # no randstain, baseline yolov8 model with SGD and no NMS

New https://pypi.org/project/ultralytics/8.0.116 available  Update with 'pip install -U ultralytics'
cfg file passed. Overriding default params with default.yaml.
Ultralytics YOLOv8.0.113  Python-3.10.11 torch-2.0.1+cu117 CUDA:0 (NVIDIA GeForce RTX 3090 Ti, 24564MiB)
[34m[1myolo\engine\trainer: [0mtask=segment, mode=train, model=None, data=C:\Users\Kevin\PycharmProjects\hubmap\detection_model\yolov8_data.yaml, epochs=50, patience=50, batch=16, imgsz=512, save=True, save_period=-1, cache=False, device=0, workers=8, project=None, name=baseline, no randstain, exist_ok=False, pretrained=False, optimizer=SGD, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=0, resume=False, amp=True, fraction=1.0, profile=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, s

In [6]:
model2.train(data=r'C:\Users\Kevin\PycharmProjects\hubmap\detection_model\yolov8_data_randstain.yaml',device = 0, batch = 16 ,epochs=50, imgsz = 512, verbose = True, deterministic = True, name = 'baseline_randstain', cfg='custom_cfg.yaml') # randstain images with SGD and no NMS

New https://pypi.org/project/ultralytics/8.0.116 available  Update with 'pip install -U ultralytics'
cfg file passed. Overriding default params with custom_cfg.yaml.
Ultralytics YOLOv8.0.113  Python-3.10.11 torch-2.0.1+cu117 CUDA:0 (NVIDIA GeForce RTX 3090 Ti, 24564MiB)
[34m[1myolo\engine\trainer: [0mtask=segment, mode=train, model=None, data=C:\Users\Kevin\PycharmProjects\hubmap\detection_model\yolov8_data_randstain.yaml, epochs=50, patience=50, batch=16, imgsz=512, save=True, save_period=-1, cache=False, device=0, workers=8, project=None, name=baseline_randstain, exist_ok=False, pretrained=False, optimizer=SGD, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=0, resume=False, amp=True, fraction=1.0, profile=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf

In [8]:
model2.train(data=r'C:\Users\Kevin\PycharmProjects\hubmap\detection_model\yolov8_data_randstain.yaml',device = 0, batch = 16 ,epochs=50, imgsz = 512, verbose = True, deterministic = True, name = 'baseline_randstain_adam', cfg='custom_cfg.yaml') # randstain images with adam and NMS IOU = 0.7 (NMS with adam is better vs no NMS with adam)

New https://pypi.org/project/ultralytics/8.0.116 available  Update with 'pip install -U ultralytics'
cfg file passed. Overriding default params with custom_cfg.yaml.
Ultralytics YOLOv8.0.113  Python-3.10.11 torch-2.0.1+cu117 CUDA:0 (NVIDIA GeForce RTX 3090 Ti, 24564MiB)
[34m[1myolo\engine\trainer: [0mtask=segment, mode=train, model=None, data=C:\Users\Kevin\PycharmProjects\hubmap\detection_model\yolov8_data_randstain.yaml, epochs=50, patience=50, batch=16, imgsz=512, save=True, save_period=-1, cache=False, device=0, workers=8, project=None, name=baseline_randstain_adam, exist_ok=False, pretrained=False, optimizer=Adam, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=0, resume=False, amp=True, fraction=1.0, profile=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, sav

### Below is to train detection model for yolov8_v2 images & labels:

In [7]:
# Edit yaml content for raw yolov8 default normalization & augmentation
create_raw_yolo8_yaml = False # already created
if create_raw_yolo8_yaml:
    yaml_file_src = r'C:\Users\Kevin\PycharmProjects\hubmap\detection_model\yolov8_v2_data.yaml'
    create_empty_yaml_file(yaml_file_src)
    yaml_content = f'''
    train: \fatherserverdw\Kevin\hubmap\yolov8_v2\train\images
    val: \fatherserverdw\Kevin\hubmap\yolov8_v2\val\images

    names:
        0: blood_vessel
    '''
    with open(yaml_file_src, 'w') as f:
        f.write(yaml_content)
        yaml.dump

### Then edit .yaml manually to fix!

In [5]:
model.train(data=r'C:\Users\Kevin\PycharmProjects\hubmap\detection_model\yolov8_v2_data.yaml',device = 0, batch = 16 ,epochs=50, imgsz = 512, verbose = True, deterministic = True, name = 'yolov8_v2_baseline, no randstain', cfg='default.yaml') # no randstain, baseline yolov8 model

New https://pypi.org/project/ultralytics/8.0.117 available  Update with 'pip install -U ultralytics'
cfg file passed. Overriding default params with default.yaml.
Ultralytics YOLOv8.0.113  Python-3.10.11 torch-2.0.1+cu117 CUDA:0 (NVIDIA GeForce RTX 3090 Ti, 24564MiB)
[34m[1myolo\engine\trainer: [0mtask=detect, mode=train, model=None, data=C:\Users\Kevin\PycharmProjects\hubmap\detection_model\yolov8_v2_data.yaml, epochs=50, patience=50, batch=16, imgsz=512, save=True, save_period=-1, cache=False, device=0, workers=8, project=None, name=yolov8_v2_baseline, no randstain, exist_ok=False, pretrained=False, optimizer=SGD, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=0, resume=False, amp=True, fraction=1.0, profile=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_c

In [7]:
model.train(data=r'C:\Users\Kevin\PycharmProjects\hubmap\detection_model\yolov8_v2_data.yaml',device = 0, batch = 16 ,epochs=50, imgsz = 512, verbose = True, deterministic = True, name = 'yolov8_v2_adam_nms, no randstain', cfg='custom_cfg.yaml') # baseline images with adam and NMS IOU = 0.7 (NMS with adam is better vs no NMS with adam)
# so NMS with adam does worse vs no NMS with SGD (above baseline)!

New https://pypi.org/project/ultralytics/8.0.117 available  Update with 'pip install -U ultralytics'
cfg file passed. Overriding default params with custom_cfg.yaml.
Ultralytics YOLOv8.0.113  Python-3.10.11 torch-2.0.1+cu117 CUDA:0 (NVIDIA GeForce RTX 3090 Ti, 24564MiB)
[34m[1myolo\engine\trainer: [0mtask=detect, mode=train, model=None, data=C:\Users\Kevin\PycharmProjects\hubmap\detection_model\yolov8_v2_data.yaml, epochs=50, patience=50, batch=16, imgsz=512, save=True, save_period=-1, cache=False, device=0, workers=8, project=None, name=yolov8_v2_adam_nms, no randstain, exist_ok=False, pretrained=False, optimizer=Adam, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=0, resume=False, amp=True, fraction=1.0, profile=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, sa

In [5]:
# we can see that for baseline, randstain does slightly worse somehow.
model.train(data=r'C:\Users\Kevin\PycharmProjects\hubmap\detection_model\yolov8_v2_data.yaml',device = 0, batch = 16 ,epochs=50, imgsz = 512, verbose = True, deterministic = True, name = 'yolov8_v2_baseline, randstain', cfg='default.yaml') # no randstain, baseline yolov8 model

New https://pypi.org/project/ultralytics/8.0.117 available  Update with 'pip install -U ultralytics'
cfg file passed. Overriding default params with default.yaml.
Ultralytics YOLOv8.0.113  Python-3.10.11 torch-2.0.1+cu117 CUDA:0 (NVIDIA GeForce RTX 3090 Ti, 24564MiB)
[34m[1myolo\engine\trainer: [0mtask=detect, mode=train, model=None, data=C:\Users\Kevin\PycharmProjects\hubmap\detection_model\yolov8_v2_data.yaml, epochs=50, patience=50, batch=16, imgsz=512, save=True, save_period=-1, cache=False, device=0, workers=8, project=None, name=yolov8_v2_baseline, randstain, exist_ok=False, pretrained=False, optimizer=SGD, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=0, resume=False, amp=True, fraction=1.0, profile=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf

In [10]:
model3 = YOLO('yolov8x.pt')
model3.train(data=r'C:\Users\Kevin\PycharmProjects\hubmap\detection_model\yolov8_v2_data.yaml',device = 0, batch = 16 ,epochs=50, imgsz = 512, verbose = True, deterministic = True, name = 'yolov8_v2_baseline_yolov8x, no randstain', cfg='default.yaml') # no randstain, baseline yolov8 model, but with yolov8x

Downloading https:\github.com\ultralytics\assets\releases\download\v0.0.0\yolov8x.pt to yolov8x.pt...
100%|██████████| 131M/131M [00:02<00:00, 48.7MB/s] 
New https://pypi.org/project/ultralytics/8.0.117 available  Update with 'pip install -U ultralytics'
cfg file passed. Overriding default params with default.yaml.
Ultralytics YOLOv8.0.113  Python-3.10.11 torch-2.0.1+cu117 CUDA:0 (NVIDIA GeForce RTX 3090 Ti, 24564MiB)
[34m[1myolo\engine\trainer: [0mtask=detect, mode=train, model=None, data=C:\Users\Kevin\PycharmProjects\hubmap\detection_model\yolov8_v2_data.yaml, epochs=50, patience=50, batch=16, imgsz=512, save=True, save_period=-1, cache=False, device=0, workers=8, project=None, name=yolov8_v2_baseline_yolov8x, no randstain, exist_ok=False, pretrained=False, optimizer=SGD, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=0, resume=False, amp=True, fraction=1.0, profile=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, 

In [7]:
# adam + nms is worse than SGD + no nms
model3 = YOLO('yolov8x.pt')
model3.train(data=r'C:\Users\Kevin\PycharmProjects\hubmap\detection_model\yolov8_v2_data.yaml',device = 0, batch = 16 ,epochs=50, imgsz = 512, verbose = True, deterministic = True, name = 'yolov8_v2_baseline_yolov8x, no randstain', cfg='custom_cfg.yaml') # no randstain, baseline yolov8 model, but with yolov8x and adam & nms

New https://pypi.org/project/ultralytics/8.0.117 available  Update with 'pip install -U ultralytics'
cfg file passed. Overriding default params with custom_cfg.yaml.
Ultralytics YOLOv8.0.113  Python-3.10.11 torch-2.0.1+cu117 CUDA:0 (NVIDIA GeForce RTX 3090 Ti, 24564MiB)
[34m[1myolo\engine\trainer: [0mtask=detect, mode=train, model=None, data=C:\Users\Kevin\PycharmProjects\hubmap\detection_model\yolov8_v2_data.yaml, epochs=50, patience=50, batch=16, imgsz=512, save=True, save_period=-1, cache=False, device=0, workers=8, project=None, name=yolov8_v2_baseline_yolov8x, no randstain, exist_ok=False, pretrained=False, optimizer=Adam, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=0, resume=False, amp=True, fraction=1.0, profile=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=F

In [None]:
### Since no masks are created for object detection, we try segmentation for yolov8_v2 with no randstain:
model_seg = YOLO('yolov8x-seg.pt')
model_seg.train(data=r'C:\Users\Kevin\PycharmProjects\hubmap\detection_model\yolov8_v2_data.yaml',device = 0, batch = 16 ,epochs=300, imgsz = 512, verbose = True, deterministic = True, name = 'yolov8_v2_seg_yolov8x_, no randstain', cfg='default.yaml')

New https://pypi.org/project/ultralytics/8.0.117 available  Update with 'pip install -U ultralytics'
cfg file passed. Overriding default params with default.yaml.
Ultralytics YOLOv8.0.113  Python-3.10.11 torch-2.0.1+cu117 CUDA:0 (NVIDIA GeForce RTX 3090 Ti, 24564MiB)
[34m[1myolo\engine\trainer: [0mtask=segment, mode=train, model=None, data=C:\Users\Kevin\PycharmProjects\hubmap\detection_model\yolov8_v2_data.yaml, epochs=300, patience=50, batch=16, imgsz=512, save=True, save_period=-1, cache=False, device=0, workers=8, project=None, name=yolov8_v2_seg_yolov8x_, no randstain, exist_ok=False, pretrained=False, optimizer=SGD, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=0, resume=False, amp=True, fraction=1.0, profile=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, 

### Now we run ray tune for the best model configuration, but run it in fine_tune.py