In [1]:
import torch

!pip install optuna -q
!pip uninstall torch-scatter torch-sparse torch-geometric torch-cluster  --y -q
!pip install torch-scatter -f https://data.pyg.org/whl/torch-{torch.__version__}.html -q
!pip install torch-sparse -f https://data.pyg.org/whl/torch-{torch.__version__}.html -q
!pip install torch-cluster -f https://data.pyg.org/whl/torch-{torch.__version__}.html -q
!pip install git+https://github.com/pyg-team/pytorch_geometric.git -q

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/386.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m378.9/386.6 kB[0m [31m29.9 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m386.6/386.6 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/242.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m242.5/242.5 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.8/10.8 MB[0m [31m78.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.0/5.0 MB[0m [31m23.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.4/3.4 MB[0m [31m16.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies .

In [3]:
# ────────────────────────────────────────────────────────────
# 1) Core Imports & Drive Mount
# ────────────────────────────────────────────────────────────
import numpy as np
import pandas as pd
import geopandas as gpd
import os

import networkx as nx

from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from scipy.stats import pearsonr

from tqdm.auto import tqdm
import random
from collections import Counter

import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv, GATConv

import optuna
from optuna.samplers import TPESampler
from optuna.pruners import HyperbandPruner

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# ────────────────────────────────────────────────────────────
# 2) Data Prep & Graph + Outlier Detection
# ────────────────────────────────────────────────────────────

# 2.1) Load GeoJSON into GeoDataFrame, project to EPSG:28992
fp = '/content/drive/MyDrive/Universiteit Utrecht/Thesis/data/road_network_lufeature.geojson'
gdf = gpd.read_file(fp).to_crs(epsg=28992).reset_index(drop=True)
gdf['centroid'] = gdf.geometry.centroid
coords = np.column_stack([gdf.centroid.x, gdf.centroid.y])

# 2.2) Define target and feature columns
target_col = 'NO2d'
feature_cols = [
    c for c in gdf.columns
    if any(s in c for s in ['AGRI','INDUS','NATUR','PORT','RES','TRANS','URBG','WATER',
                             'POP','EEA','HHOLD','RDL','TLOA','HLOA','MRDL','TMLOA','HMLOA',
                             'TRAF','DINV'])
]
feature_matrix = gdf[feature_cols].to_numpy()

# 2.3) Build initial NetworkX graph (nodes = segments, edges = touching)
G = nx.Graph()
sidx = gdf.sindex
for idx, row in gdf.iterrows():
    G.add_node(idx, **row.drop('geometry').to_dict())
for idx, geom in enumerate(gdf.geometry):
    for j in sidx.intersection(geom.bounds):
        if idx != j and geom.touches(gdf.geometry[j]):
            G.add_edge(idx, j)

# 2.4) Mark highways for adaptive threshold
gdf['is_highway'] = gdf['TRAFMAJOR'] > 20000
group_thresh = {True: 9.0, False: 5.0}

# 2.5) Outlier detection function
def detect_outliers(G, gdf, target_col='NO2d', hop=1):
    """
    Detect spatial outliers based on 1-hop neighborhood residuals.
    Returns a list of node indices flagged as outliers.
    """
    nodes = list(G.nodes())
    vals, neigh_means = [], []
    for n in nodes:
        v = gdf.at[n, target_col]
        if pd.isna(v):
            vals.append(np.nan); neigh_means.append(np.nan)
            continue
        sp = nx.single_source_shortest_path_length(G, n, cutoff=hop)
        neigh = [m for m in sp if m != n]
        nbr_vals = gdf.loc[neigh, target_col].dropna().values
        vals.append(v)
        neigh_means.append(np.nan if nbr_vals.size == 0 else nbr_vals.mean())
    vals = np.array(vals); neigh_means = np.array(neigh_means)
    valid = ~np.isnan(vals) & ~np.isnan(neigh_means)
    residuals = vals[valid] - neigh_means[valid]
    valid_nodes = np.array(nodes)[valid]
    med = np.median(residuals)
    mad = np.median(np.abs(residuals - med))
    # compute per-node cutoff
    cutoffs = np.array([
        group_thresh.get(gdf.at[n, 'is_highway'], 3.0) * mad
        for n in valid_nodes
    ])
    is_outlier = np.abs(residuals - med) > cutoffs
    return valid_nodes[is_outlier].tolist()

# 2.6) Run outlier detection
error_segs = detect_outliers(G, gdf)
print(f"⚠️ Detected {len(error_segs)} outlier segments.")

# ────────────────────────────────────────────────────────────
# 3) Graph Augmentation + Data Builder
# ────────────────────────────────────────────────────────────

def augment_grouped_far_knn(
    G, gdf, groups, coords, feature_matrix, feature_cols,
    top_n, neighbors, sim_thresh, min_dist, max_dist,
    hop_thresh, max_edges, per_node_cap,
    road_id_col="ROAD_FID", suffix='grp_far_knn'
):
    """
    1) For each group, pick the top_n segments by intensity.
    2) In cosine feature space, find `neighbors` nearest peers with sim ≥ sim_thresh.
    3) Filter pairs by spatial distance [min_dist, max_dist], graph-hop > hop_thresh,
       different ROAD_FID.
    4) Enforce per-node cap and global max_edges.
    Returns: (augmented Graph, list of new edges).
    """
    road_ids = gdf[road_id_col].to_numpy()
    col_to_idx = {c:i for i,c in enumerate(feature_cols)}
    candidates = set()

    for cols in groups.values():
        # skip if any group feature missing
        if any(c not in col_to_idx for c in cols): continue
        intensity = gdf[cols].sum(axis=1)
        top_idx   = intensity.nlargest(top_n).index.to_numpy()
        if top_idx.size < 2: continue

        idxs = [col_to_idx[c] for c in cols]
        subF = feature_matrix[top_idx][:, idxs]
        subF /= np.linalg.norm(subF, axis=1, keepdims=True).clip(1e-6)

        nbr = NearestNeighbors(
            n_neighbors=min(neighbors+1, len(top_idx)),
            metric='cosine', n_jobs=-1
        ).fit(subF)
        dists, nn_idxs = nbr.kneighbors(subF)
        sims = 1 - dists  # cosine similarity

        for ii, src in enumerate(top_idx):
            close = set(nx.single_source_shortest_path_length(G, int(src), cutoff=hop_thresh))
            for rank, dst_j in enumerate(nn_idxs[ii,1:], start=1):
                if sims[ii, rank] < sim_thresh:
                    break
                dst = top_idx[dst_j]
                u, v = sorted((int(src), int(dst)))
                if road_ids[src] == road_ids[dst]:
                    continue
                dxy = np.hypot(*(coords[src] - coords[dst]))
                if dxy < min_dist or dxy > max_dist or dst in close:
                    continue
                candidates.add((u, v))

    # Prune to budgets
    final, counts = [], Counter()
    for u, v in random.sample(list(candidates), len(candidates)):
        if counts[u] < per_node_cap and counts[v] < per_node_cap:
            final.append((u, v))
            counts[u] += 1; counts[v] += 1
        if len(final) >= max_edges:
            break

    G2 = G.copy()
    G2.add_edges_from(final, feature_sim=suffix)
    return G2, final

def build_data_mask_missing(G, gdf, feature_cols, target_col, outliers=None):
    """
    Constructs a PyG Data object from a NetworkX graph + GeoDataFrame.
    - Standardizes features
    - Builds edge_index (bi-directional)
    - Creates train/test masks (80/20 split), excluding any `outliers` from train.
    """
    gdf2 = gdf.reset_index(drop=True)
    G2 = nx.relabel_nodes(G, {old:new for new,old in enumerate(gdf.index)})

    X = StandardScaler().fit_transform(gdf2[feature_cols].values)
    y = gdf2[target_col].values.reshape(-1,1)

    edges = np.array(list(G2.edges())).T
    edge_index = torch.tensor(
        np.concatenate([edges, edges[::-1]], axis=1),
        dtype=torch.long
    )

    data = Data(
        x=torch.tensor(X, dtype=torch.float),
        edge_index=edge_index,
        y=torch.tensor(y, dtype=torch.float)
    )

    valid_idx = np.where(~np.isnan(y.flatten()))[0]
    perm      = torch.randperm(len(valid_idx))
    n_train   = int(0.8 * len(valid_idx))
    train_idx = valid_idx[perm[:n_train].numpy()]
    test_idx  = valid_idx[perm[n_train:].numpy()]

    train_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
    test_mask  = torch.zeros(data.num_nodes, dtype=torch.bool)
    train_mask[train_idx] = True
    test_mask[test_idx]  = True

    if outliers is not None:
        train_mask[outliers] = False

    data.train_mask = train_mask
    data.test_mask  = test_mask
    return data

# 3.1) Define groups used for augmentation
groups = {
    'industrial':        ['INDUS_300','INDUS_1000'],
    'residential':       ['RES_300','RES_1000'],
    'agriculture':       ['AGRI_300','AGRI_1000'],
    'natural':           ['NATUR_300','NATUR_1000'],
    'port':              ['PORT_300','PORT_1000'],
    'urb_built':         ['URBG_300','URBG_1000'],
    'water':             ['WATER_300','WATER_1000'],
    'traffic':           ['TRAFNEAR','TRAFMAJOR'],
    'pop':               ['POP_300','POP_1000'],
    'population_density':['EEA_300','EEA_1000'],
}

# ────────────────────────────────────────────────────────────
# 4) Utility functions: eval_rmse & train_and_eval (revised)
# ────────────────────────────────────────────────────────────

def eval_rmse(model, data):
    model.eval()
    with torch.no_grad():
        pred = model(data.x, data.edge_index)[data.test_mask]\
               .cpu().numpy().flatten()
        true = data.y[data.test_mask]\
               .cpu().numpy().flatten()
    return np.sqrt(mean_squared_error(true, pred))

def train_and_eval(model_cls, data, device, model_init_args, optim_args):
    """
    Instantiate `model_cls(**model_init_args)`, train for 50 epochs using optimizer settings
    from `optim_args`, and return test RMSE.
    """
    model = model_cls(**model_init_args).to(device)
    data  = data.to(device)
    optimizer = torch.optim.Adam(
        model.parameters(),
        lr=optim_args['learning_rate'],
        weight_decay=optim_args['weight_decay']
    )
    for _ in range(50):
        model.train()
        optimizer.zero_grad()
        out = model(data.x, data.edge_index)
        loss = F.mse_loss(out[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()
    return eval_rmse(model, data)

# ────────────────────────────────────────────────────────────
# 5) Define Baseline GCN & GAT classes
# ────────────────────────────────────────────────────────────

# GCN Model Definition (baseline)
class GCN(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        x = F.relu(x)
        x = self.conv3(x, edge_index)
        x = F.relu(x)
        return x

# GAT Model Definition (unchanged)
class GAT(nn.Module):
    def __init__(self, in_c, h_c, out_c, heads=2):
        super().__init__()
        self.g1 = GATConv(in_c,   h_c,  heads=heads)
        self.g2 = GATConv(h_c*heads, h_c, heads=heads)
        self.g3 = GATConv(h_c*heads, out_c, heads=1, concat=False)

    def forward(self, x, e):
        x = F.elu(self.g1(x,e))
        x = F.elu(self.g2(x,e))
        return self.g3(x,e)

# ────────────────────────────────────────────────────────────
# 6) Optuna Tuning: GCN first, then GAT
# ────────────────────────────────────────────────────────────

# 6.1) Objective for GCN: tune only graph‐augmentation parameters,
#      with baseline GCN hyperparameters fixed
fixed_gcn_init = {
    'in_channels': len(feature_cols),
    'hidden_channels': 64,
    'out_channels': 1
}
fixed_gcn_optim = {
    'learning_rate': 0.009136733981799275,
    'weight_decay': 4.291139762395118e-05
}

def objective_gcn(trial):
    # Propose new graph‐augmentation parameters
    aug_params = {
        'top_n':        trial.suggest_int('top_n',        500, 1500, step=500),
        'neighbors':    trial.suggest_int('neighbors',    10, 200,   step=10),
        'sim_thresh':   trial.suggest_float('sim_thresh', 0.80, 0.9999),
        'min_dist':     trial.suggest_int('min_dist',     50,  500),
        'max_dist':     trial.suggest_int('max_dist',     500, 5000, log=True),
        'hop_thresh':   trial.suggest_int('hop_thresh',   1,    5),
        'max_edges':    trial.suggest_int('max_edges',    500, 5000, step=500),
        'per_node_cap': trial.suggest_int('per_node_cap',  1,   10),
    }

    # Build augmented graph & Data object
    G_aug, _ = augment_grouped_far_knn(
        G, gdf, groups, coords,
        feature_matrix, feature_cols,
        **aug_params
    )
    data = build_data_mask_missing(
        G_aug, gdf, feature_cols, target_col,
        outliers=error_segs
    )

    # Train & evaluate using baseline GCN args + optimizer
    return train_and_eval(
        GCN, data,
        torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
        fixed_gcn_init, fixed_gcn_optim
    )

# Run GCN study
study_gcn = optuna.create_study(direction='minimize', sampler=TPESampler())
study_gcn.optimize(objective_gcn, n_trials=200)
best_params_gcn = study_gcn.best_params
print("🔍 Best augmentation params for GCN:", best_params_gcn)

# 6.2) Consolidate best graph params for GCN
best_graph_params_gcn = {
    'top_n':        best_params_gcn['top_n'],
    'neighbors':    best_params_gcn['neighbors'],
    'sim_thresh':   best_params_gcn['sim_thresh'],
    'min_dist':     best_params_gcn['min_dist'],
    'max_dist':     best_params_gcn['max_dist'],
    'hop_thresh':   best_params_gcn['hop_thresh'],
    'max_edges':    best_params_gcn['max_edges'],
    'per_node_cap': best_params_gcn['per_node_cap']
}

# 6.3) Objective for GAT: tune augmentation except top_n (use same top_n as GCN)
fixed_gat_init = {
    'in_c': len(feature_cols),
    'h_c': 16,
    'out_c': 1,
    'heads': 2
}
fixed_gat_optim = {
    'learning_rate': 0.009136733981799275,
    'weight_decay': 4.291139762395118e-05
}

def objective_gat(trial):
    aug_params = {
        'top_n':        best_graph_params_gcn['top_n'],
        'neighbors':    trial.suggest_int('neighbors',    10, 200,   step=10),
        'sim_thresh':   trial.suggest_float('sim_thresh', 0.80, 0.9999),
        'min_dist':     trial.suggest_int('min_dist',     50,  500),
        'max_dist':     trial.suggest_int('max_dist',     500, 5000, log=True),
        'hop_thresh':   trial.suggest_int('hop_thresh',   1,    5),
        'max_edges':    trial.suggest_int('max_edges',    500, 5000, step=500),
        'per_node_cap': trial.suggest_int('per_node_cap',  1,   10),
    }

    # Build augmented graph & Data object
    G_aug, _ = augment_grouped_far_knn(
        G, gdf, groups, coords,
        feature_matrix, feature_cols,
        **aug_params
    )
    data = build_data_mask_missing(
        G_aug, gdf, feature_cols, target_col,
        outliers=error_segs
    )

    # Train & evaluate using baseline GAT args + optimizer
    return train_and_eval(
        lambda in_c, h_c, out_c, heads: GAT(in_c, h_c, out_c, heads),
        data,
        torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
        fixed_gat_init,
        fixed_gat_optim
    )

# Run GAT study
study_gat = optuna.create_study(direction='minimize', sampler=TPESampler())
study_gat.optimize(objective_gat, n_trials=200)
best_params_gat_partial = study_gat.best_params
print("🔍 Best augmentation params (except top_n) for GAT:", best_params_gat_partial)

# 6.4) Consolidate best graph params for GAT
best_graph_params_gat = {
    'top_n':        best_graph_params_gcn['top_n'],
    'neighbors':    best_params_gat_partial['neighbors'],
    'sim_thresh':   best_params_gat_partial['sim_thresh'],
    'min_dist':     best_params_gat_partial['min_dist'],
    'max_dist':     best_params_gat_partial['max_dist'],
    'hop_thresh':   best_params_gat_partial['hop_thresh'],
    'max_edges':    best_params_gat_partial['max_edges'],
    'per_node_cap': best_params_gat_partial['per_node_cap']
}

# ────────────────────────────────────────────────────────────
# 7) External Validation & Final Predictions
# ────────────────────────────────────────────────────────────

# 7.1) Load Palmes measurements
palmes_fp  = '/content/drive/MyDrive/Universiteit Utrecht/Thesis/data/road_palmes_25m.geojson'
palmes_gdf = gpd.read_file(palmes_fp).to_crs(gdf.crs)

def eval_external(params, model_cls, model_init, optim_init):
    """
    Re‐augment graph, build data (excluding errors),
    train model_cls(**model_init) for 200 epochs with optim_init,
    then aggregate predictions at Palmes tubes and return RMSE.
    """
    # a) Augment graph
    G_aug, _ = augment_grouped_far_knn(
        G, gdf, groups, coords,
        feature_matrix, feature_cols,
        **params
    )
    # b) Build Data object
    data_aug = build_data_mask_missing(
        G_aug, gdf, feature_cols, target_col,
        outliers=error_segs
    ).to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))

    # c) Train fresh model
    model = model_cls(**model_init).to(data_aug.x.device)
    optimizer = torch.optim.Adam(
        model.parameters(),
        lr=optim_init['learning_rate'],
        weight_decay=optim_init['weight_decay']
    )
    for _ in range(200):
        model.train()
        optimizer.zero_grad()
        out = model(data_aug.x, data_aug.edge_index)
        loss = F.mse_loss(out[data_aug.train_mask], data_aug.y[data_aug.train_mask])
        loss.backward()
        optimizer.step()

    # d) Predict on all nodes
    model.eval()
    with torch.no_grad():
        preds = model(data_aug.x, data_aug.edge_index).detach().cpu().numpy().flatten()

    # e) Aggregate at Palmes locations
    tmp = gdf.copy()
    tmp['pred'] = preds
    actuals, pred_means = [], []
    for _, row in palmes_gdf.iterrows():
        pt = row.geometry
        cands = list(gdf.sindex.intersection(pt.buffer(50).bounds))
        if cands:
            dists = tmp.loc[cands, 'geometry'].distance(pt).values
            near = [cands[i] for i, d in enumerate(dists) if d <= 50]
        else:
            near = []
        if not near:
            near = [tmp.geometry.distance(pt).idxmin()]
        pred_means.append(tmp.loc[near, 'pred'].mean())
        actuals.append(row['mean_annual_palmes_no2'])
    return np.sqrt(mean_squared_error(actuals, pred_means))

# 7.2) External GCN RMSE
print("▶ External GCN RMSE:",
      eval_external(best_graph_params_gcn, GCN,
                    {'in_channels': len(feature_cols), 'hidden_channels': 64, 'out_channels': 1},
                    {'learning_rate': 0.009136733981799275, 'weight_decay': 4.291139762395118e-05}))

# 7.3) External GAT RMSE
print("▶ External GAT RMSE:",
      eval_external(best_graph_params_gat, GAT,
                    {'in_c': len(feature_cols), 'h_c': 16, 'out_c': 1, 'heads': 2},
                    {'learning_rate': 0.009136733981799275, 'weight_decay': 4.291139762395118e-05}))

# 7.4) Final retrain & save predictions on road segments
for name, (params, model_ctor, model_init, optim_init) in [
    ('GCN', (best_graph_params_gcn, GCN,
             {'in_channels': len(feature_cols), 'hidden_channels': 64, 'out_channels': 1},
             {'learning_rate': 0.009136733981799275, 'weight_decay': 4.291139762395118e-05})),
    ('GAT', (best_graph_params_gat, lambda in_c,h_c,out_c,heads: GAT(in_c,h_c,out_c,heads),
             {'in_c': len(feature_cols), 'h_c': 16, 'out_c': 1, 'heads': 2},
             {'learning_rate': 0.009136733981799275, 'weight_decay': 4.291139762395118e-05}))
]:
    G_fin, _ = augment_grouped_far_knn(
        G, gdf, groups, coords,
        feature_matrix, feature_cols,
        **params
    )
    data_fin = build_data_mask_missing(
        G_fin, gdf, feature_cols, target_col,
        outliers=error_segs
    ).to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
    # Train final model and collect predictions
    model = model_ctor(**model_init).to(data_fin.x.device)
    optimizer = torch.optim.Adam(
        model.parameters(),
        lr=optim_init['learning_rate'],
        weight_decay=optim_init['weight_decay']
    )
    for _ in range(200):
        model.train()
        optimizer.zero_grad()
        loss = F.mse_loss(
            model(data_fin.x, data_fin.edge_index)[data_fin.train_mask],
            data_fin.y[data_fin.train_mask]
        )
        loss.backward()
        optimizer.step()
    model.eval()
    with torch.no_grad():
        preds_all = model(data_fin.x, data_fin.edge_index).cpu().numpy().flatten()
    gdf[f'NO2_pred_{name}'] = preds_all

# 7.5) Save final GeoJSON with predictions
gdf = gdf.drop(columns='centroid')
out_fp = '/content/drive/MyDrive/Universiteit Utrecht/Thesis/outputs/road_segments_with_predictions_optuna.geojson'
gdf.to_file(out_fp, driver='GeoJSON')
print("✅ All predictions saved to:", out_fp)

[I 2025-06-03 09:43:37,258] A new study created in memory with name: no-name-284fe83c-0a6e-491f-92ea-d2c1590928c1


⚠️ Detected 2172 outlier segments.


[I 2025-06-03 09:44:19,065] Trial 0 finished with value: 9.361431016999227 and parameters: {'top_n': 1500, 'neighbors': 140, 'sim_thresh': 0.9045428335139303, 'min_dist': 362, 'max_dist': 1300, 'hop_thresh': 1, 'max_edges': 2000, 'per_node_cap': 6}. Best is trial 0 with value: 9.361431016999227.
[I 2025-06-03 09:44:46,202] Trial 1 finished with value: 9.14056406897427 and parameters: {'top_n': 500, 'neighbors': 200, 'sim_thresh': 0.9400459152127869, 'min_dist': 174, 'max_dist': 2068, 'hop_thresh': 3, 'max_edges': 3500, 'per_node_cap': 7}. Best is trial 1 with value: 9.14056406897427.
[I 2025-06-03 09:45:10,743] Trial 2 finished with value: 9.516810752013956 and parameters: {'top_n': 500, 'neighbors': 60, 'sim_thresh': 0.9041314051664475, 'min_dist': 196, 'max_dist': 4348, 'hop_thresh': 2, 'max_edges': 1500, 'per_node_cap': 2}. Best is trial 1 with value: 9.14056406897427.
[I 2025-06-03 09:45:37,355] Trial 3 finished with value: 9.301831989943736 and parameters: {'top_n': 500, 'neighbor

KeyboardInterrupt: 