# Test data generation

We will generate test images with uniformly distributed noise, clustering and background-signal contrast properties.

In [None]:
import numpy as np
from src.image_generator import generate_multiple_images

In [2]:
np.random.seed(0)

SAVING = True
IMAGE_LIST_PATH = '../data/datasets/source_dataset/test.txt'
IMAGE_SAVE_DIR = '../data/datasets/source_dataset/images/test'
LABEL_SAVE_DIR = '../data/datasets/source_dataset/labels/test'

generate_multiple_images(num_images=1000,
                         start_index=2200,
                         image_size=(2472, 2472),
                         snr_range=(2.5, 12.5),
                         clustering_range=(0, 10),
                         bg_color_range=(30, 220),
                         bg_sn_ratio=(1.2, 1.8),
                         fibers=(45, 15),
                         save=SAVING,
                         save_format='all',
                         image_list_file=IMAGE_LIST_PATH,
                         save_dir=IMAGE_SAVE_DIR,
                         label_dir=LABEL_SAVE_DIR,
                         gui=False)

In [None]:
from src.data_utils import resize_images_dir

image_path = f"../data/datasets/yolo_dataset/images/test/"
json_path = f"../data/datasets/yolo_dataset/images/test/"

resize_images_dir(image_path, 1024)

# Choosing Best SAM checkpoint

In [None]:
import os
from ultralytics import SAM
from src.model_eval import evaluate_model

In [None]:
ckpt_dir = '/workspace/pixi-sam2/sam2_logs/configs/sam2.1_training/fiber_segmentation.yaml/checkpoints'

for ckpt in os.listdir(ckpt_dir):
    if not ckpt.endswith('.pt'):
        continue
    if not ckpt.endswith('_sam2.1_t.pt'):
        os.rename(os.path.join(ckpt_dir, ckpt), os.path.join(ckpt_dir, f'{os.path.splitext(ckpt)[0]}_sam2.1_t.pt'))
        ckpt = f'{os.path.splitext(ckpt)[0]}_sam2.1_t.pt'
    print("CURRENT CHECKPOINT:", ckpt)
    model = SAM(os.path.join(ckpt_dir, ckpt))
    evaluate_model(model,
                test_image_dir='/workspace/datasets/sam_dataset/val/images',
                test_annotation_dir='/workspace/datasets/sam_dataset/val/images',
                batch=1,
                use_sam_annots=True,
                output_file=f'{str(ckpt)}_val.csv',
                verbose=False)

### Evaluating each checkpoint to choose best

In [10]:
import pandas as pd
import os

In [16]:
ckpt_folder = '../data/results/sam ckpts'

for ckpt_results in sorted(os.listdir(ckpt_folder)):
    if not ckpt_results.endswith('.csv'):
        continue
    sam_df = pd.read_csv(str(os.path.join(ckpt_folder, ckpt_results)))
    avg_f1 = sam_df['f1_score'].mean()
    avg_prec = sam_df['precision'].mean()
    avg_recall = sam_df['recall'].mean()
    print(ckpt_results)
    print("F1:", avg_f1, "\n",
          "Precision:", avg_prec, "\n",
          "Recall:", avg_recall, "\n")

# Model Evaluation

In [1]:
from ultralytics import YOLO
from src.model_eval import evaluate_model

In [14]:
model = YOLO('../model_checkpoints/yolo11large-fibersegmentation.pt')
evaluate_model(model,
               test_image_dir='../data/datasets/sam_dataset/test/images',
               test_annotation_dir='../data/datasets/sam_dataset/test/images',
               batch=1,
               use_sam_annots=True,
               output_file='../data/results/yolo11large.csv')

# Sorting Data

In [11]:
import numpy as np
import pandas as pd

In [15]:
df = pd.read_csv("../data/results/yolo11nano.csv")

# Example binning
df['snr_bin'] = pd.cut(df['snr'], bins=[2.4, 5, 7.5, 10, 12.5])
df['cluster_bin'] = pd.cut(df['clustering'], bins=[-1, 2, 4, 6, 8, 10])
df['fibre_bin'] = pd.cut(df['num_fibers'], bins=[0, 30, 60, 90, 120])

In [16]:
snr_summary = df.groupby('snr_bin', observed=False)[['f1_score', 'precision', 'recall']].mean().round(3)

# Group by Clustering bin
cluster_summary = df.groupby('cluster_bin', observed=False)[['f1_score', 'precision', 'recall']].mean().round(3)

# Group by Fibre Count bin
fibre_summary = df.groupby('fibre_bin', observed=False)[['f1_score', 'precision', 'recall']].mean().round(3)

# Display
print("SNR Summary:\n", snr_summary)
print("\nClustering Summary:\n", cluster_summary)
print("\nFibre Count Summary:\n", fibre_summary)