# Test data generation

We will generate test images with uniformly distributed noise, clustering and background-signal contrast properties.

In [1]:
import numpy as np
from src.image_generator import generate_multiple_images

In [2]:
np.random.seed(0)

SAVING = True
IMAGE_LIST_PATH = '../data/datasets/source_dataset/test.txt'
IMAGE_SAVE_DIR = '../data/datasets/source_dataset/images/test'
LABEL_SAVE_DIR = '../data/datasets/source_dataset/labels/test'

generate_multiple_images(num_images=1000,
                         start_index=2200,
                         image_size=(2472, 2472),
                         snr_range=(2.5, 12.5),
                         clustering_range=(0, 10),
                         bg_color_range=(30, 220),
                         bg_sn_ratio=(1.2, 1.8),
                         fibers=(45, 15),
                         save=SAVING,
                         save_format='all',
                         image_list_file=IMAGE_LIST_PATH,
                         save_dir=IMAGE_SAVE_DIR,
                         label_dir=LABEL_SAVE_DIR,
                         gui=False)

In [None]:
from src.data_utils import resize_images_dir

image_path = f"../data/datasets/yolo_dataset/images/test/"
json_path = f"../data/datasets/yolo_dataset/images/test/"

resize_images_dir(image_path, 1024)

# Model Evaluation

In [1]:
from ultralytics import YOLO
from src.model_eval import evaluate_model

In [4]:
model = YOLO('../models/yolo11large.pt')
evaluate_model(model,
               test_image_dir='../data/datasets/sam_dataset/test/images',
               test_annotation_dir='../data/datasets/sam_dataset/test/images',
               batch=16,
               use_sam_annots=True,
               output_file='../data/results/yolo11large.csv')

  0%|          | 0/63 [00:01<?, ?it/s]


RuntimeError: MPS backend out of memory (MPS allocated: 17.76 GB, other allocations: 21.78 MB, max allowed: 18.13 GB). Tried to allocate 1024.00 MB on private pool. Use PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 to disable upper limit for memory allocations (may cause system failure).

# Sorting Data

In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv("../data/results/yolo11large.csv")

# Example binning
df['snr_bin'] = pd.cut(df['snr'], bins=[2.5, 5, 7.5, 10, 12.5])
df['cluster_bin'] = pd.cut(df['clustering'], bins=[0, 2, 4, 6, 8, 10])
df['fibre_bin'] = pd.cut(df['num_fibers'], bins=[0, 30, 60, 90, 120])

In [3]:
snr_summary = df.groupby('snr_bin', observed=False)[['f1_score', 'precision', 'recall']].mean().round(3)

# Group by Clustering bin
cluster_summary = df.groupby('cluster_bin', observed=False)[['f1_score', 'precision', 'recall']].mean().round(3)

# Group by Fibre Count bin
fibre_summary = df.groupby('fibre_bin', observed=False)[['f1_score', 'precision', 'recall']].mean().round(3)

# Display
print("SNR Summary:\n", snr_summary)
print("\nClustering Summary:\n", cluster_summary)
print("\nFibre Count Summary:\n", fibre_summary)

SNR Summary:
               f1_score  precision  recall
snr_bin                                  
(2.5, 5.0]       0.560      0.557   0.564
(5.0, 7.5]       0.526      0.519   0.535
(7.5, 10.0]      0.560      0.556   0.564
(10.0, 12.5]     0.561      0.556   0.567

Clustering Summary:
              f1_score  precision  recall
cluster_bin                             
(0, 2]          0.647      0.649   0.645
(2, 4]          0.602      0.598   0.606
(4, 6]          0.529      0.522   0.538
(6, 8]          0.474      0.465   0.485
(8, 10]         0.429      0.419   0.443

Fibre Count Summary:
            f1_score  precision  recall
fibre_bin                             
(0, 30]       0.623      0.626   0.620
(30, 60]      0.548      0.542   0.555
(60, 90]      0.476      0.465   0.489
(90, 120]     0.291      0.269   0.319
