In [1]:
import os
import random
import shutil
import re
from tqdm.notebook import tqdm
import json
import logging
from datetime import datetime
from ultralytics import YOLO

In [2]:
logging.basicConfig(format='%(asctime)s [%(levelname)s] %(message)s', level=logging.INFO)
random.seed(42)
val_split = 0.2

if not os.path.exists("../data/labels/val"):
    os.mkdir("../data/labels/val")
    labels = os.listdir("../data/labels/train/")
    val_labels = random.sample(labels, int(val_split*len(labels)))
    for val_label in val_labels:
        shutil.move(os.path.join("../data/labels/train", val_label), "../data/labels/val/")
        
if not os.path.exists("../data/images/val"):
    shutil.copytree("../data/images/train", "../data/images/val")

In [3]:
start_time = datetime.now().strftime("%Y-%m-%d_%H-%M")
if os.path.exists("../models/best.pt"):
    model = YOLO("../models/best.pt")
else:
    model = YOLO("../models/yolov8n.pt")
#     train_layers = [
#         "model.22.cv3.2.2.weight",
#         "model.22.cv3.2.2.bias",
#         "model.22.dfl.conv.weight"
#     ]
#     for k, v in model.model.named_parameters():
#         if k in train_layers:
#             v.requires_grad = True
#         else:
#             v.requires_grad = False
#             logging.info("freezing " + k)
    
    model.train(
        task="detect",
        model="../models/yolov8n.pt",
        data='../data/data.yaml',
        epochs=100,
        patience=50,
        batch=16,
        imgsz=1920,
        save=True,
        save_period=5,
        cache=False,
        device=0,
        workers=4,
        project="Vision_Meets_Algae",
        name=start_time,
        exist_ok=False,
        pretrained=True,
        optimizer="auto",
        verbose=False,
        seed=42,
        deterministic=True,
        single_cls=False,
        rect=False,
        cos_lr=True,
        close_mosaic=10,
        resume=False,
        amp=True,
        fraction=1.0,
        profile=False,
        lr0=0.01,
        lrf=1,
        momentum=0.937,
        weight_decay=0,
        warmup_epochs=3.0,
        warmup_momentum=0.8,
        warmup_bias_lr=0.1,
        box=7.5,
        cls=0.5,
        dfl=1.5,
        pose=12.0,
        kobj=2.0,
        label_smoothing=0.0,
        nbs=64,
        dropout=0.0,
        val=True,
        plots=True,
        save_json=True,
        hsv_h=0,  # (float) image HSV-Hue augmentation (fraction)
        hsv_s=0,  # (float) image HSV-Saturation augmentation (fraction)
        hsv_v=0,  # (float) image HSV-Value augmentation (fraction)
        degrees=180,  # (float) image rotation (+/- deg)
        translate=0,  # (float) image translation (+/- fraction)
        scale=0.5,  # (float) image scale (+/- gain)
        shear=0,  # (float) image shear (+/- deg)
        perspective=0,  # (float) image perspective (+/- fraction), range 0-0.001
        flipud=0.5,  # (float) image flip up-down (probability)
        fliplr=0.5,  # (float) image flip left-right (probability)
        mosaic=0.5,  # (float) image mosaic (probability)
        mixup=0,  # (float) image mixup (probability)
        copy_paste=0
    )
    shutil.copyfile(os.path.join("Vision_Meets_Algae", start_time, "weights", "best.pt"), "../models/best.pt")

Ultralytics YOLOv8.0.145  Python-3.7.3 torch-1.12.1+cu113 CUDA:0 (NVIDIA GeForce RTX 4090, 24564MiB)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=../models/yolov8n.pt, data=../data/data.yaml, epochs=100, patience=50, batch=16, imgsz=1920, save=True, save_period=5, cache=False, device=0, workers=4, project=Vision_Meets_Algae, name=2024-01-17_18-09, exist_ok=False, pretrained=True, optimizer=auto, verbose=False, seed=42, deterministic=True, single_cls=False, rect=False, cos_lr=True, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=True, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, line_width=None, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, boxes=True, format=torchscript, k

[34m[1moptimizer:[0m AdamW(lr=0.001, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0), 63 bias(decay=0.0)
Image sizes 1920 train, 1920 val
Using 4 dataloader workers
Logging results to [1mVision_Meets_Algae\2024-01-17_18-09[0m
Starting training for 100 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
      1/100      17.4G      1.845      9.715      1.507         35       1920: 100%|██████████| 44/44 [00:18<00:00,  2.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 22/22 [00:11<00:00,  1.90it/s]
                   all        700        348    0.00156      0.579     0.0573     0.0177

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
      2/100      17.2G      1.438      7.124      1.269         28       1920: 100%|██████████| 44/44 [00:17<00:00,  2.49it/s]
                 Class     Images  Instances      Box(P        

     19/100      17.2G      1.231      1.351      1.164         35       1920: 100%|██████████| 44/44 [00:17<00:00,  2.50it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 22/22 [00:04<00:00,  4.62it/s]
                   all        700        348      0.182      0.814      0.201      0.111

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
     20/100      17.2G      1.212      1.303      1.137         52       1920: 100%|██████████| 44/44 [00:17<00:00,  2.51it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 22/22 [00:04<00:00,  4.69it/s]
                   all        700        348      0.188      0.815      0.209      0.131

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
     21/100      17.2G      1.197      1.197      1.135         39       1920: 100%|██████████| 44/44 [00:17<00:00,  2.54it/s]
       

     38/100      18.3G      1.137     0.9306      1.112         34       1920: 100%|██████████| 44/44 [00:17<00:00,  2.53it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 22/22 [00:04<00:00,  4.86it/s]
                   all        700        348      0.184      0.859      0.213       0.14

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
     39/100      17.2G      1.136     0.9565      1.096         33       1920: 100%|██████████| 44/44 [00:17<00:00,  2.52it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 22/22 [00:04<00:00,  4.80it/s]
                   all        700        348      0.182      0.776      0.204      0.102

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
     40/100      18.3G      1.113     0.9114      1.079         56       1920: 100%|██████████| 44/44 [00:17<00:00,  2.53it/s]
       

     57/100      17.2G      1.131     0.8785      1.097         33       1920: 100%|██████████| 44/44 [00:17<00:00,  2.52it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 22/22 [00:04<00:00,  4.83it/s]
                   all        700        348      0.174      0.779      0.191      0.087

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
     58/100      17.1G      1.119     0.8326      1.081         31       1920: 100%|██████████| 44/44 [00:17<00:00,  2.53it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 22/22 [00:04<00:00,  4.73it/s]
                   all        700        348       0.19      0.756      0.194     0.0751

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
     59/100      18.3G      1.103     0.8639       1.07         26       1920: 100%|██████████| 44/44 [00:17<00:00,  2.51it/s]
       

     76/100      17.2G      1.103     0.8442      1.078         36       1920: 100%|██████████| 44/44 [00:17<00:00,  2.52it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 22/22 [00:04<00:00,  4.86it/s]
                   all        700        348       0.18      0.812      0.193     0.0972

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
     77/100      17.2G      1.078     0.8743      1.054         50       1920: 100%|██████████| 44/44 [00:17<00:00,  2.52it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 22/22 [00:04<00:00,  4.91it/s]
                   all        700        348      0.176       0.87      0.208      0.111

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
     78/100      17.2G      1.089     0.8416       1.08         27       1920: 100%|██████████| 44/44 [00:17<00:00,  2.53it/s]
       

     95/100      17.2G      1.062     0.8121      1.081         34       1920: 100%|██████████| 44/44 [00:17<00:00,  2.55it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 22/22 [00:04<00:00,  4.56it/s]
                   all        700        348      0.189      0.909      0.208      0.128

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
     96/100      17.2G      1.052     0.8173      1.084         24       1920: 100%|██████████| 44/44 [00:17<00:00,  2.54it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 22/22 [00:04<00:00,  4.78it/s]
                   all        700        348      0.191      0.904      0.214      0.152

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
     97/100      17.2G      1.079     0.8282      1.109         26       1920: 100%|██████████| 44/44 [00:17<00:00,  2.55it/s]
       

  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, fla

  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, fla


100 epochs completed in 0.637 hours.
Optimizer stripped from Vision_Meets_Algae\2024-01-17_18-09\weights\last.pt, 6.6MB
Optimizer stripped from Vision_Meets_Algae\2024-01-17_18-09\weights\best.pt, 6.6MB

Validating Vision_Meets_Algae\2024-01-17_18-09\weights\best.pt...
Ultralytics YOLOv8.0.145  Python-3.7.3 torch-1.12.1+cu113 CUDA:0 (NVIDIA GeForce RTX 4090, 24564MiB)
Model summary (fused): 168 layers, 3006818 parameters, 0 gradients
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 22/22 [00:05<00:00,  4.36it/s]
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(

  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, fla

  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, fla

Speed: 0.7ms preprocess, 3.2ms inference, 0.0ms loss, 1.0ms postprocess per image
Saving Vision_Meets_Algae\2024-01-17_18-09\predictions.json...
Results saved to [1mVision_Meets_Algae\2024-01-17_18-09[0m


In [4]:
results = model.predict("../data/images/test/", imgsz=1920, save=True, save_txt=True)

Results saved to [1mVision_Meets_Algae\2024-01-17_18-092[0m
300 labels saved to Vision_Meets_Algae\2024-01-17_18-092\labels


In [5]:
submit_json = list()
for result in tqdm(results):
    image_id = int(re.findall(r"(\d*)\.jpg$", result.path)[-1])
    for i, cls in enumerate(result.boxes.cls):
        category_id = int(cls)
        score = round(float(result.boxes.conf[i]), 5)
        bbox_xywh = list(map(lambda x: round(x, 3), result.boxes.xywh[i].tolist()))
        bbox_xyxy = list(map(lambda x: round(x, 3), result.boxes.xyxy[i].tolist()))
        submit_json.append({
            "image_id": image_id,
            "category_id": category_id,
            "bbox": bbox_xyxy[0:2] + bbox_xywh[2:],
            "score": score
        })

  0%|          | 0/300 [00:00<?, ?it/s]

In [6]:
with open("../data/submit_%s.json" % start_time, "w") as f:
    f.write(json.dumps(submit_json))