# Solemne 2 Fish´s Head and Tail detection

In [None]:
import torch
import os
import pandas as pd
import random
import shutil
import numpy as np
from ultralytics import YOLO

### Clean names

In [None]:
base_folder = "content"
prefix = "-"
for root, dirs, files in os.walk(base_folder):
    for filename in files:
        old_path = os.path.join(root, filename)
    
        if prefix not in filename:
            continue
    
        new_name = filename.split(prefix, 1)[1]
        new_path = os.path.join(root, new_name)
    
        if os.path.exists(new_path):
            print(f"Archive already exist, next: {new_name}")
            continue
    
        os.rename(old_path, new_path)

### Create YOLO files structure

In [12]:
os.makedirs("dataset_yolo/train/images", exist_ok=True)
os.makedirs("dataset_yolo/train/labels", exist_ok=True)
os.makedirs("dataset_yolo/val/images", exist_ok=True)
os.makedirs("dataset_yolo/val/labels", exist_ok=True)
os.makedirs("dataset_yolo/test/images", exist_ok=True)
os.makedirs("dataset_yolo/test/labels", exist_ok=True)

### Obtain test, train and val images names

In [None]:
singular_dir = "singular_images"
singular_names = os.listdir(singular_dir)
len_singular_names = len(singular_names)

images_path = "content/images"
labels_path = "content/labels"

images_names = os.listdir(images_path)
labels_names = os.listdir(labels_path)

In [18]:
def select_random(names, amount, seed):
    random.seed(seed)
    selected = random.sample(names, min(amount, len(names)))
    remaining = list(set(names) - set(selected))
    return selected, remaining

In [21]:
percentages = [0.75, 0.15]
images_distribution = {"train_list": [], "test_list": [], "val_list": []}
images_names = list(set(images_names) - set(singular_names))
len_images_remaining = len(images_names)
# Singular_images selection
train_selected, remaining_names = select_random(singular_names, int(len_singular_names * percentages[0]), 1)
images_distribution["train_list"] = train_selected
test_selected, remaining_names = select_random(remaining_names, int(len_singular_names * percentages[1]), 1)
images_distribution["test_list"] = test_selected
images_distribution["val_list"] = remaining_names


In [22]:
# Distribute the others images
train_selected, remaining_names = select_random(images_names, int(len_images_remaining * percentages[0]), 1)
images_distribution["train_list"] += train_selected
test_selected, remaining_names = select_random(remaining_names, int(len_images_remaining * percentages[1]), 1)
images_distribution["test_list"] += test_selected
images_distribution["val_list"] += remaining_names

### Move Files based on generated list

In [27]:
def move_files(file_list, split):
    for img_name in file_list:
        name_base, ext = os.path.splitext(img_name)
        label_name = f"{name_base}.txt"

        img_src = os.path.join(images_path, img_name)
        lbl_src = os.path.join(labels_path, label_name)

        img_dst = os.path.join("dataset_yolo", split, "images", img_name)
        lbl_dst = os.path.join("dataset_yolo", split, "labels", label_name)

        if os.path.exists(img_src):
            shutil.move(img_src, img_dst)
        else:
            print(f"Image not found: {img_name}")

        if os.path.exists(lbl_src):
            shutil.move(lbl_src, lbl_dst)
        else:
            print(f"Label not found: {label_name}")

move_files(images_distribution["train_list"], "train")
move_files(images_distribution["test_list"], "test")
move_files(images_distribution["val_list"], "val")

## Reproducibility

In [2]:
def fix_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    print(f"Seed fixed: {seed}")

fix_seed(0)

Seed fixed: 0


## Training 

### Learning rate Variation

In [4]:
data_yaml = "data.yaml"
base_model = "yolov8n.yaml"
device = "cuda" if torch.cuda.is_available() else "cpu"

learning_rates= [0.00001, 0.00005, 0.0001, 0.0005, 0.001]
results_list= []

for lr in learning_rates:
    model = YOLO(base_model)

    results = model.train(
        data=data_yaml,       
        epochs=30,             
        imgsz=300,
        batch=15,
        device=device,              
        pretrained=False,
        optimizer="Adam",
        lr0=lr,
        project="lr_search_results",
        name=f"lr_{lr}"
    )

New https://pypi.org/project/ultralytics/8.3.221 available 😃 Update with 'pip install -U ultralytics'
Ultralytics 8.3.213 🚀 Python-3.12.3 torch-2.8.0+cu128 CUDA:0 (NVIDIA GeForce RTX 3050 Laptop GPU, 4096MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=15, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=data.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=30, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=300, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=1e-05, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.yaml, momentum=0.937, mosaic=1.0, multi_scale=False, name=lr_1e-05, nbs=64, 

#### Save LR variation and K-Fold

In [None]:
df = pd.DataFrame(all_results)
df.to_csv("lr_kfold_metrics.csv", index=False)

print("\n Results saved in 'lr_kfold_metrics.csv'")
display(df)