In [6]:
import sys
from pathlib import Path

# make sure repo root is in sys.path (safe & idempotent)
PROJECT_ROOT = Path.cwd().resolve().parents[0]
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

import numpy as np
import pandas as pd

from src.universe_sim import run_simulation
from src.metrics import (
    nearest_neighbor_distance,
    largest_cluster_fraction,
    density_variance_grid,
    number_of_clusters,
)

In [7]:
def compute_metrics_from_history(history, box_size=1.0, eps=0.06, bins=20, min_size=3, burn_frac=0.4):
    """
    history: (T, N, 2)
    burn_frac=0.4 means drop first 40% frames, compute metrics on remaining frames, then average over time.
    """
    T = history.shape[0]
    start = int(np.floor(T * burn_frac))
    frames = history[start:] if start < T else history

    nn_list, lcf_list, dv_list, nc_list = [], [], [], []

    for pos in frames:
        nn_list.append(nearest_neighbor_distance(pos, box_size))
        lcf_list.append(largest_cluster_fraction(pos, eps=eps, box_size=box_size))
        dv_list.append(density_variance_grid(pos, box_size, bins=bins, normalized=True))
        nc_list.append(number_of_clusters(pos, eps=eps, box_size=box_size, min_size=min_size))

    return {
        "nn_mean_time": float(np.mean(nn_list)),
        "lcf_mean_time": float(np.mean(lcf_list)),
        "densvar_mean_time": float(np.mean(dv_list)),
        "nclusters_mean_time": float(np.mean(nc_list)),
    }

To reduce the influence of initial conditions, we discard an initial transient (“burn-in”) period of each simulation.
Observables are computed only on the latter part of the time series (typically the final 60% of the saved configurations) and then averaged over time.
This procedure ensures that the reported measurements reflect the stationary behaviour of the system rather than transient dynamics.

In [8]:
BASE = dict(
    N=200,
    steps=1500,
    box_size=1.0,
    dt=1.0,
    repulsion=0.02,
    repulsion_radius=0.05,
    save_every=30,   # saved frames ~ steps/save_every
)

def run_one(seed, attraction, interaction_range, noise):
    h = run_simulation(
        **BASE,
        seed=seed,
        attraction=attraction,
        interaction_range=interaction_range,
        noise=noise,
    )
    m = compute_metrics_from_history(h, box_size=BASE["box_size"], eps=0.06, bins=20, min_size=3, burn_frac=0.4)
    m.update({
        "seed": seed,
        "attraction": attraction,
        "interaction_range": interaction_range,
        "noise": noise,
    })
    return m

In [9]:
def sweep(param_name, values, *, seeds, fixed):
    """
    param_name: one of "attraction", "interaction_range", "noise"
    values: list of floats
    seeds: list of ints
    fixed: dict with the other two params fixed, e.g. {"noise":0.01,"interaction_range":0.6}
    """
    rows = []
    for v in values:
        for s in seeds:
            params = dict(fixed)
            params[param_name] = v
            rows.append(run_one(
                seed=s,
                attraction=params["attraction"],
                interaction_range=params["interaction_range"],
                noise=params["noise"],
            ))
    df = pd.DataFrame(rows)

    # aggregate over seeds
    group_cols = [param_name]
    agg = df.groupby(group_cols).agg(
        nn_mean=("nn_mean_time", "mean"),
        nn_std=("nn_mean_time", "std"),
        lcf_mean=("lcf_mean_time", "mean"),
        lcf_std=("lcf_mean_time", "std"),
        densvar_mean=("densvar_mean_time", "mean"),
        densvar_std=("densvar_mean_time", "std"),
        nclusters_mean=("nclusters_mean_time", "mean"),
        nclusters_std=("nclusters_mean_time", "std"),
        runs=("seed", "count"),
    ).reset_index()

    return df, agg

In [10]:
seeds = list(range(10))
values = [0.0, 0.01, 0.02, 0.03, 0.04]

raw, agg = sweep(
    "attraction",
    values,
    seeds=seeds,
    fixed={"interaction_range": 0.6, "noise": 0.01, "attraction": 0.02}  
)

agg

Unnamed: 0,attraction,nn_mean,nn_std,lcf_mean,lcf_std,densvar_mean,densvar_std,nclusters_mean,nclusters_std,runs
0,0.0,0.036571,0.000261,0.11715,0.00731,1.875867,0.025407,22.696667,0.569698,10
1,0.01,0.014382,0.000186,0.9882,0.001879,9.5666,0.092731,1.04,0.026294,10
2,0.02,0.010971,8.3e-05,0.997883,0.000445,14.845,0.127468,1.0,0.0,10
3,0.03,0.009503,8e-05,0.999467,0.000312,18.961667,0.169909,1.0,0.0,10
4,0.04,0.008564,3.7e-05,0.999717,0.000236,22.568867,0.091263,1.0,0.0,10


Summary of findings

As the attraction strength increases, the system undergoes a clear transition from a near-homogeneous state to a globally clustered phase.
Even a small attraction (≈ 0.01) is sufficient to produce a dominant cluster, as indicated by the rapid increase of the largest cluster fraction to values close to one. This transition is accompanied by a monotonic decrease in nearest-neighbour distance, a strong increase in density variance, and a collapse of the number of clusters from many small components to a single connected structure.
The low variability across independent runs confirms that this behaviour is robust and not driven by stochastic fluctuations or initial conditions.