# YOLO v8 training with Optuna

### Optuna is an open source hyperparameter optimization framework to automate hyperparameter search

### Intall dependencies

In [1]:
import os
HOME = os.path.split(os.getcwd())[0]
print(HOME)

c:\Users\arihs\Documents\Thesis\CowId


In [2]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

Looking in indexes: https://download.pytorch.org/whl/cu118


In [3]:
!pip install ultralytics



In [4]:
!pip install optuna



In [5]:
# Check YOLO instalation

import ultralytics
ultralytics.checks()

Ultralytics YOLOv8.0.68  Python-3.8.16 torch-2.0.0+cu118 CUDA:0 (NVIDIA GeForce RTX 4070 Ti, 12282MiB)
Setup complete  (20 CPUs, 31.8 GB RAM, 265.6/465.0 GB disk)


In [6]:
# This is available for systems with nvidia GPUs to enable training accelaration with graphics cards:
# CUDA package: https://developer.nvidia.com/cuda-downloads

import torch
torch.cuda.is_available()

True

### Training with YOLO + Optuna

In [7]:
import optuna
from ultralytics import YOLO

def objective(trial):
    # Load a model
    model = YOLO(f'{HOME}/yolov8s.pt') 
    # Set parameter ranges
    param_grid = {"epochs": trial.suggest_int("epochs", 10, 50, step=10), 
                  'lr': trial.suggest_float("lr", 0.01, 0.04, step=0.005), 
                  'dropout': trial.suggest_float("dropout", 0.0, 0.20),
                  'batch': trial.suggest_int("batch", 10, 50, step=10)}
    # Train with parameter suggestions
    model.train(data=f'{HOME}/data/CowID.v9i.yolov8/data.yaml', 
                          epochs=param_grid['epochs'], 
                          batch=param_grid['batch'],  
                          plots=True, 
                          dropout=param_grid['dropout'],
                          lr0=param_grid['lr'],
                          seed=42)
 
    # Get validation results
    results = model.val() 
    precision = results.results_dict['metrics/precision(B)']
    recall = results.results_dict['metrics/recall(B)']
    return precision, recall

# Select to maximize or minimize parameters
study = optuna.create_study(directions=['maximize', 'maximize'])
study.optimize(objective, n_trials=25)

  from .autonotebook import tqdm as notebook_tqdm
[32m[I 2023-04-19 21:15:38,666][0m A new study created in memory with name: no-name-a4c39380-3180-4c23-ab0c-e6f443ae96a4[0m
New https://pypi.org/project/ultralytics/8.0.83 available  Update with 'pip install -U ultralytics'
Ultralytics YOLOv8.0.68  Python-3.8.16 torch-2.0.0+cu118 CUDA:0 (NVIDIA GeForce RTX 4070 Ti, 12282MiB)
[34m[1myolo\engine\trainer: [0mtask=detect, mode=train, model=c:\Users\arihs\Documents\Thesis\CowId/yolov8s.pt, data=c:\Users\arihs\Documents\Thesis\CowId/data/CowID.v9i.yolov8/data.yaml, epochs=50, patience=50, batch=50, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=None, exist_ok=False, pretrained=False, optimizer=SGD, verbose=True, seed=42, deterministic=True, single_cls=False, image_weights=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, overlap_mask=True, mask_ratio=4, dropout=0.1661828360209482, val=True, split=val, save_json=False

### Analyze the results

In [8]:
for trial in study.best_trials:
    print(f"Trial number: {trial.number}")
    print(trial.params)
    print(f"Trial precision: {trial.values[0]}, Trial recall: {trial.values[1]}")

Trial number: 5
{'epochs': 40, 'lr': 0.02, 'dropout': 0.18880872997279013, 'batch': 20}
Trial precision: 0.8795615389572623, Trial recall: 0.9341323511101481
Trial number: 9
{'epochs': 50, 'lr': 0.035, 'dropout': 0.12578487952851136, 'batch': 40}
Trial precision: 1.0, Trial recall: 0.8017007802064144
Trial number: 10
{'epochs': 40, 'lr': 0.025, 'dropout': 0.17398433369916277, 'batch': 10}
Trial precision: 0.946994360572071, Trial recall: 0.8372093023255814
Trial number: 11
{'epochs': 50, 'lr': 0.01, 'dropout': 0.08923767565885676, 'batch': 30}
Trial precision: 0.9372398179186505, Trial recall: 0.872093023255814
Trial number: 17
{'epochs': 30, 'lr': 0.015, 'dropout': 0.1592328410888584, 'batch': 40}
Trial precision: 0.9666764527304846, Trial recall: 0.8255813953488372
Trial number: 20
{'epochs': 50, 'lr': 0.015, 'dropout': 0.17522641389370708, 'batch': 50}
Trial precision: 0.9156262042486611, Trial recall: 0.8833088805920063
Trial number: 24
{'epochs': 40, 'lr': 0.025, 'dropout': 0.1752

In [9]:
for trial in study.trials:
    print(f"Trial number: {trial.number}")
    print(trial.params)
    print(f"Trial precision: {trial.values[0]}, Trial recall: {trial.values[1]}")

Trial number: 0
{'epochs': 50, 'lr': 0.03, 'dropout': 0.1661828360209482, 'batch': 50}
Trial precision: 0.9258113461617059, Trial recall: 0.8372093023255814
Trial number: 1
{'epochs': 50, 'lr': 0.04, 'dropout': 0.048652304471813436, 'batch': 20}
Trial precision: 0.9307501718339336, Trial recall: 0.8488372093023255
Trial number: 2
{'epochs': 10, 'lr': 0.02, 'dropout': 0.04102142800086559, 'batch': 40}
Trial precision: 0.7271433896523403, Trial recall: 0.7209302325581395
Trial number: 3
{'epochs': 10, 'lr': 0.03, 'dropout': 0.042498786145794436, 'batch': 40}
Trial precision: 0.5944929010328405, Trial recall: 0.45348837209302323
Trial number: 4
{'epochs': 20, 'lr': 0.025, 'dropout': 0.03817313168855494, 'batch': 10}
Trial precision: 0.8523821923063792, Trial recall: 0.8057388772276852
Trial number: 5
{'epochs': 40, 'lr': 0.02, 'dropout': 0.18880872997279013, 'batch': 20}
Trial precision: 0.8795615389572623, Trial recall: 0.9341323511101481
Trial number: 6
{'epochs': 10, 'lr': 0.04, 'dropo

In [13]:
# Save the parameters
import pickle

# save the list to a file
with open('trials2.pickle', 'wb') as f:
    pickle.dump(study.best_trials, f)

In [None]:
# load the list from the file
with open('trials2.pickle', 'rb') as f:
    trials = pickle.load(f)

### Prediction

In [23]:
!pip install opencv-python



In [3]:
from ultralytics import YOLO
from PIL import Image
import os
import cv2
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Layer, Conv2D, Dense, MaxPooling2D, Input, Flatten


HOME = os.path.split(os.getcwd())[0]
print(HOME)

c:\Users\arihs\Documents\Thesis\CowId


In [5]:
# Load the model

model = YOLO(f"{HOME}/notebooks/runs/detect/train6/weights/best.pt")

# Make predictions based on images

# results = model.predict(source=f"{HOME}/test_4.mp4", save=True)

class L1Dist(Layer):
    
    # Init method - inheritance
    def __init__(self, **kwargs):
        super().__init__()
       
    # Similarity calculation
    def call(self, input_embedding, validation_embedding):
        return tf.math.abs(input_embedding - validation_embedding)

siamese_model = tf.keras.models.load_model('siamesemodelv2.h5', custom_objects={'L1Dist':L1Dist, 'BinaryCrossentropy':tf.losses.BinaryCrossentropy})

In [6]:
def verify(model, detection_threshold, verification_threshold):
    # Build results array
    results = []
    for image in os.listdir(os.path.join('..', 'data', 'verify')):
        input_img = preprocess(os.path.join('..', 'data', 'b.jpg'))
        validation_img = preprocess(os.path.join('..', 'data', 'verify', image))
        
        # Make Predictions 
        result = model.predict(list(np.expand_dims([input_img, validation_img], axis=1)))
        results.append(result)
    
    return results

def expand_rectangle_to_square(x1, y1, x2, y2):
    width = abs(x2 - x1)
    height = abs(y2 - y1)
    size = max(width, height)
    center_x = (x1 + x2) / 2
    center_y = (y1 + y2) / 2
    new_x1 = center_x - size / 2
    new_y1 = center_y - size / 2
    new_x2 = center_x + size / 2
    new_y2 = center_y + size / 2
    return new_x1, new_y1, new_x2, new_y2

def crop_to_square(image, x1, y1, x2, y2):
    image_to_rgb = np.uint8(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    cropped_image = Image.fromarray(image_to_rgb).crop((x1, y1, x2, y2)).resize((250, 250))
    return cropped_image

def video_to_frames_with_prediction(model, input_loc, output_loc, filename):
    # Create folder if not exists
    if not os.path.exists(output_loc):
        os.makedirs(output_loc)
    vidcap = cv2.VideoCapture(input_loc)
    fps = vidcap.get(cv2.CAP_PROP_FPS)
    success = True
    success, image = vidcap.read()
    count = 0
    while success:
        res = model(image, conf=0.5)
        boxes = res[0].boxes.xyxy.tolist()
        cow_number = 0
        for inner_list in boxes:
            cow_number += 1
            x1, y1, x2, y2 = inner_list
            x1, y1, x2, y2 = expand_rectangle_to_square(x1, y1, x2, y2)
            cropped_image = crop_to_square(image, x1, y1, x2, y2)
            time = count/fps
            output_filename = f"{os.path.splitext(filename)[0]}-{count:08d}-{cow_number}-{time}.jpg"
            output_path = os.path.join(HOME, output_loc, output_filename)
            cropped_image.save(output_path)
        success,image = vidcap.read()
        count += 1

In [7]:
# Load the model

model = YOLO(f"{HOME}/notebooks/runs/detect/train6/weights/best.pt")

# Make predictions based on images

VIDEO_NAME = 'CC001'

video_to_frames_with_prediction(model, f"{os.path.join(HOME, VIDEO_NAME)}.mp4", f"{os.path.join(HOME, VIDEO_NAME)}", VIDEO_NAME)

VIDEO_NAME = 'CC002'

video_to_frames_with_prediction(model, f"{os.path.join(HOME, VIDEO_NAME)}.mp4", f"{os.path.join(HOME, VIDEO_NAME)}", VIDEO_NAME)

VIDEO_NAME = 'CC003'

video_to_frames_with_prediction(model, f"{os.path.join(HOME, VIDEO_NAME)}.mp4", f"{os.path.join(HOME, VIDEO_NAME)}", VIDEO_NAME)


0: 384x640 2 cows, 98.5ms
Speed: 2.0ms preprocess, 98.5ms inference, 1286.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 2 cows, 7.0ms
Speed: 0.0ms preprocess, 7.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 2 cows, 9.2ms
Speed: 1.0ms preprocess, 9.2ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 2 cows, 19.0ms
Speed: 0.0ms preprocess, 19.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 2 cows, 13.0ms
Speed: 0.0ms preprocess, 13.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 2 cows, 8.0ms
Speed: 1.0ms preprocess, 8.0ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 2 cows, 7.0ms
Speed: 1.0ms preprocess, 7.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 2 cows, 7.5ms
Speed: 0.0ms preprocess, 7.5ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 2 c