# Test data generation

We will generate test images with uniformly distributed noise, clustering and background-signal contrast properties.

In [1]:
import numpy as np
from src.image_generator import generate_multiple_images

In [2]:
np.random.seed(0)

SAVING = True
IMAGE_LIST_PATH = '../data/datasets/source_dataset/test.txt'
IMAGE_SAVE_DIR = '../data/datasets/source_dataset/images/test'
LABEL_SAVE_DIR = '../data/datasets/source_dataset/labels/test'

generate_multiple_images(num_images=1000,
                         start_index=2200,
                         image_size=(2472, 2472),
                         snr_range=(2.5, 12.5),
                         clustering_range=(0, 10),
                         bg_color_range=(30, 220),
                         bg_sn_ratio=(1.2, 1.8),
                         fibers=(45, 15),
                         save=SAVING,
                         save_format='all',
                         image_list_file=IMAGE_LIST_PATH,
                         save_dir=IMAGE_SAVE_DIR,
                         label_dir=LABEL_SAVE_DIR,
                         gui=False)

In [None]:
from src.data_utils import resize_images_dir

image_path = f"../data/datasets/yolo_dataset/images/test/"
json_path = f"../data/datasets/yolo_dataset/images/test/"

resize_images_dir(image_path, 1024)

(264/1000) Resizing snr9.197_cluster7_bg152_sn116_n47_image_2369.png to 102444

# Sorting Data

In [1]:
import numpy as np
import pandas as pd

In [1]:
df = pd.read_csv("../src/output.csv")

# Example binning
df['snr_bin'] = pd.cut(df['snr'], bins=[2.5, 5, 7.5, 10, 12.5])
df['cluster_bin'] = pd.cut(df['clustering'], bins=[0, 2, 4, 6, 8, 10])
df['fibre_bin'] = pd.cut(df['num_fibers'], bins=[0, 30, 60, 90, 120])

In [2]:
snr_summary = df.groupby('snr_bin', observed=False)[['f1_score', 'precision', 'recall']].mean().round(3)

# Group by Clustering bin
cluster_summary = df.groupby('cluster_bin', observed=False)[['f1_score', 'precision', 'recall']].mean().round(3)

# Group by Fibre Count bin
fibre_summary = df.groupby('fibre_bin', observed=False)[['f1_score', 'precision', 'recall']].mean().round(3)

# Display
print("SNR Summary:\n", snr_summary)
print("\nClustering Summary:\n", cluster_summary)
print("\nFibre Count Summary:\n", fibre_summary)