# Solemne 2 Fish´s Head and Tail detection

In [1]:
import torch
import os
import pandas as pd
import random
import shutil
import yaml
import re
import numpy as np
from sklearn.model_selection import KFold
from ultralytics import YOLO

### Clean names

In [None]:
base_folder = "content"
prefix = "-"
for root, dirs, files in os.walk(base_folder):
    for filename in files:
        old_path = os.path.join(root, filename)
    
        if prefix not in filename:
            continue
    
        new_name = filename.split(prefix, 1)[1]
        new_path = os.path.join(root, new_name)
    
        if os.path.exists(new_path):
            print(f"Archive already exist, next: {new_name}")
            continue
    
        os.rename(old_path, new_path)

### Create YOLO files structure

In [12]:
os.makedirs("dataset_yolo/train/images", exist_ok=True)
os.makedirs("dataset_yolo/train/labels", exist_ok=True)
os.makedirs("dataset_yolo/val/images", exist_ok=True)
os.makedirs("dataset_yolo/val/labels", exist_ok=True)
os.makedirs("dataset_yolo/test/images", exist_ok=True)
os.makedirs("dataset_yolo/test/labels", exist_ok=True)

### Obtain test, train and val images names

In [None]:
singular_dir = "singular_images"
singular_names = os.listdir(singular_dir)
len_singular_names = len(singular_names)

images_path = "content/images"
labels_path = "content/labels"

images_names = os.listdir(images_path)
labels_names = os.listdir(labels_path)

In [18]:
def select_random(names, amount, seed):
    random.seed(seed)
    selected = random.sample(names, min(amount, len(names)))
    remaining = list(set(names) - set(selected))
    return selected, remaining

In [21]:
percentages = [0.75, 0.15]
images_distribution = {"train_list": [], "test_list": [], "val_list": []}
images_names = list(set(images_names) - set(singular_names))
len_images_remaining = len(images_names)
# Singular_images selection
train_selected, remaining_names = select_random(singular_names, int(len_singular_names * percentages[0]), 1)
images_distribution["train_list"] = train_selected
test_selected, remaining_names = select_random(remaining_names, int(len_singular_names * percentages[1]), 1)
images_distribution["test_list"] = test_selected
images_distribution["val_list"] = remaining_names


In [22]:
# Distribute the others images
train_selected, remaining_names = select_random(images_names, int(len_images_remaining * percentages[0]), 1)
images_distribution["train_list"] += train_selected
test_selected, remaining_names = select_random(remaining_names, int(len_images_remaining * percentages[1]), 1)
images_distribution["test_list"] += test_selected
images_distribution["val_list"] += remaining_names

### Move Files based on generated list

In [27]:
def move_files(file_list, split):
    for img_name in file_list:
        name_base, ext = os.path.splitext(img_name)
        label_name = f"{name_base}.txt"

        img_src = os.path.join(images_path, img_name)
        lbl_src = os.path.join(labels_path, label_name)

        img_dst = os.path.join("dataset_yolo", split, "images", img_name)
        lbl_dst = os.path.join("dataset_yolo", split, "labels", label_name)

        if os.path.exists(img_src):
            shutil.move(img_src, img_dst)
        else:
            print(f"Image not found: {img_name}")

        if os.path.exists(lbl_src):
            shutil.move(lbl_src, lbl_dst)
        else:
            print(f"Label not found: {label_name}")

move_files(images_distribution["train_list"], "train")
move_files(images_distribution["test_list"], "test")
move_files(images_distribution["val_list"], "val")

## Reproducibility

In [6]:
def fix_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    print(f"Seed fixed: {seed}")

fix_seed(0)

Seed fixed: 0


## Training 

### Learning rate Variation

In [None]:
data_yaml = "data.yaml"
base_model = "yolov8n.yaml"
device = "cuda" if torch.cuda.is_available() else "cpu"

learning_rates= [0.00001, 0.00005, 0.0001, 0.0005, 0.001]
results_list= []

for lr in learning_rates:
    model = YOLO(base_model)

    results = model.train(
        data=data_yaml,       
        epochs=30,             
        imgsz=300,
        batch=15,
        device=device,              
        pretrained=False,
        optimizer="Adam",
        lr0=lr,
        project="lr_search_results",
        name=f"lr_{lr}"
    )

In [4]:
data_yaml = "data.yaml"
base_model = "yolov8n.pt"
device = "cuda" if torch.cuda.is_available() else "cpu"

learning_rates = [0.00001, 0.00005, 0.0001]
k_folds = 3

with open(data_yaml, 'r') as f:
    data_dict = yaml.safe_load(f)
print(data_dict)

images_names = os.listdir(data_dict["train"])
images= []
for img in images_names:
    images.append(data_dict["train"]+f"/{img}")

kf = KFold(n_splits=k_folds, shuffle=True, random_state=42)
results_list = []
for lr in learning_rates:
    for fold, (train_idx, val_idx) in enumerate(kf.split(images)):
        print(f"\nLR={lr} | Fold {fold+1}/{k_folds}")

        train_files = [images[i] for i in train_idx]
        val_files = [images[i] for i in val_idx]

        fold_folder = f"kfold_lr_{lr}_fold_{fold}"
        os.makedirs(fold_folder, exist_ok=True)
        train_file = os.path.join(fold_folder, "train.txt")
        val_file = os.path.join(fold_folder, "val.txt")
        
        train_files_abs = [os.path.abspath(f) for f in train_files]
        val_files_abs = [os.path.abspath(f) for f in val_files]

        print(f"Train_files: {train_files_abs[:10]}")

        with open(train_file, "w") as f:
            f.write("\n".join(train_files_abs))
        with open(val_file, "w") as f:
            f.write("\n".join(val_files_abs))

        temp_yaml = os.path.join(fold_folder, "data.yaml")
        temp_data = data_dict.copy()
        temp_data["train"] = os.path.abspath(train_file)
        temp_data["val"] = os.path.abspath(val_file)

        with open(temp_yaml, "w") as f:
            yaml.dump(temp_data, f)

        model = YOLO(base_model)

        results = model.train(
            data=temp_yaml,
            epochs=30,
            imgsz=300,
            batch=10,
            device=device,
            pretrained=False,
            optimizer="Adam",
            lr0=lr,
            project="kfold_lr",
            name=f"lr{lr}_fold{fold}",
        )

        metrics_dict = results.results_dict
        results_list.append({
            "lr": lr,
            "fold": fold,
            "precision": metrics_dict.get("precision", None),
            "recall": metrics_dict.get("recall", None),
            "mAP50": metrics_dict.get("map50", None),
            "mAP50-95": metrics_dict.get("map", None)
        })

{'train': 'dataset_yolo/train/images', 'val': 'dataset_yolo/val/images', 'test': 'dataset_yolo/test/images', 'nc': 2, 'names': ['head', 'tail']}

LR=1e-05 | Fold 1/3
Train_files: ['/home/vinbu/University/Semester_8/Unit_2_Ai/AI_Solemne_2/dataset_yolo/train/images/seq_0004_00033.jpeg', '/home/vinbu/University/Semester_8/Unit_2_Ai/AI_Solemne_2/dataset_yolo/train/images/seq_0006_01258.jpeg', '/home/vinbu/University/Semester_8/Unit_2_Ai/AI_Solemne_2/dataset_yolo/train/images/seq_0006_00546.jpeg', '/home/vinbu/University/Semester_8/Unit_2_Ai/AI_Solemne_2/dataset_yolo/train/images/seq_0002_00194.jpeg', '/home/vinbu/University/Semester_8/Unit_2_Ai/AI_Solemne_2/dataset_yolo/train/images/seq_0006_00496.jpeg', '/home/vinbu/University/Semester_8/Unit_2_Ai/AI_Solemne_2/dataset_yolo/train/images/seq_0002_00259.jpeg', '/home/vinbu/University/Semester_8/Unit_2_Ai/AI_Solemne_2/dataset_yolo/train/images/seq_0002_00058.jpeg', '/home/vinbu/University/Semester_8/Unit_2_Ai/AI_Solemne_2/dataset_yolo/train/i

In [7]:
df_results = pd.read_csv("kfold_lr/lr0.0001_fold0/results.csv")
df_results

Unnamed: 0,epoch,time,train/box_loss,train/cls_loss,train/dfl_loss,metrics/precision(B),metrics/recall(B),metrics/mAP50(B),metrics/mAP50-95(B),val/box_loss,val/cls_loss,val/dfl_loss,lr/pg0,lr/pg1,lr/pg2
0,1,9.709,2.99274,3.8019,1.28086,0.10752,0.50952,0.18982,0.07405,2.37502,3.48425,1.08544,0.067205,3.3e-05,3.3e-05
1,2,17.5879,2.45273,2.28554,1.0947,0.39597,0.39139,0.30283,0.1159,2.47963,2.17515,1.12522,0.033902,6.4e-05,6.4e-05
2,3,25.7054,2.35057,1.93069,1.05636,0.39761,0.58764,0.42436,0.17707,2.21089,1.65495,1.0385,0.000598,9.3e-05,9.3e-05
3,4,33.634,2.28305,1.80621,1.03031,0.38808,0.60736,0.42473,0.16717,2.15252,1.57963,1.00287,9e-05,9e-05,9e-05
4,5,41.4022,2.24501,1.75505,1.02424,0.45029,0.59265,0.47597,0.18666,2.19832,1.51163,1.02202,8.7e-05,8.7e-05,8.7e-05
5,6,49.1524,2.21027,1.65442,1.02834,0.48312,0.56416,0.55642,0.22863,2.08745,1.48064,0.99512,8.3e-05,8.3e-05,8.3e-05
6,7,57.6532,2.1538,1.6192,0.98931,0.56416,0.57199,0.55523,0.22877,2.03976,1.44693,0.97816,8e-05,8e-05,8e-05
7,8,67.5566,2.0866,1.5761,0.98444,0.63493,0.57587,0.62441,0.2538,2.04814,1.40905,0.97589,7.7e-05,7.7e-05,7.7e-05
8,9,75.9511,2.12222,1.48233,0.99283,0.67525,0.60202,0.66212,0.27069,2.03985,1.35611,0.97535,7.4e-05,7.4e-05,7.4e-05
9,10,83.9011,2.08406,1.46397,0.98962,0.7415,0.62687,0.68694,0.28159,2.03149,1.32673,0.97523,7e-05,7e-05,7e-05
