In [None]:

import numpy as np

# ---------------- FEATURE KEYS ----------------
FEATURE_KEYS = ['Distance', 'Speed', 'Load', 'Capacity', 'TempRequirement', 'Containerization']

FEATURE_TARGET = [[1]*len(FEATURE_KEYS) for _ in range(10)]  # 10 nodes, all features active

def default_feature_values():
    return {k: 0.5 for k in FEATURE_KEYS}


METRIC_KEYS = ['Wait', 'Utilization', 'Throughput', 'Patience','Energy']
METRIC_TARGET = [
    [1,1,1,1,1],
    [1,1,1,1,1],
    [1,1,1,1,1],
    [1,1,1,1,1],
    [1,1,1,1,1],
]  # extend as needed

# metric formulas
METRIC_FORMULAS = [
    lambda x: np.tanh(x),
    lambda x: np.sqrt(x + 0.1),
    lambda x: np.exp(-0.5 * x),
    lambda x: 1 / (1 + x),
    lambda x: x * 0.7
]

# ---------------- LSS OPTIMIZER ----------------
class LSS:
    """List Sorting Search (beam-style deterministic optimizer using metric formulas)."""
    def __init__(self, num_candidates=10, beam_width=15, minimize=False):
        self.num_candidates = num_candidates
        self.beam_width = beam_width
        self.minimize = minimize

    def optimize(self, features, y=None, metric_mask=None):
        y = np.zeros(3) if y is None else np.array(y[:3])
        base = np.mean(list(features.values())) + np.mean(y)

        # generate initial candidate values
        candidates = base + 0.1 * np.random.randn(self.num_candidates)
        active_metric_count = sum(metric_mask) if metric_mask is not None else len(candidates)
        candidates += active_metric_count * 0.01

        # initialize beam: list of (candidate_value, metric_score)
        beam = []
        for val in candidates:
            mv = [f(val) if m else 0.0 for f, m in zip(METRIC_FORMULAS, metric_mask)]
            score = sum(mv)
            beam.append((val, score))

        # sort by metric-derived score and keep top beam_width
        beam.sort(key=lambda x: x[1], reverse=not self.minimize)
        beam = beam[:self.beam_width]

        # pick best candidate from beam
        best_value = beam[0][0]

        return best_value


# ---------------- METRICS EVALUATOR ----------------
class MetricsEvaluator:
    """Compute node metrics with feature and metric masks, using LSS optimizer."""
    def __init__(self, data_matrix, num_candidates=10, beam_width=5, minimize=True):
        self.data_matrix = data_matrix
        self.num_nodes = data_matrix.shape[0]
        self.optimizer = LSS(num_candidates=num_candidates, beam_width=beam_width, minimize=minimize)

    def extract_features(self, node_idx):
        node_data = self.data_matrix[node_idx, :]
        defaults = default_feature_values()
        features = {}
        for i, key in enumerate(FEATURE_KEYS):
            features[key] = node_data[i] if i < len(node_data) else defaults[key]
        # normalize roughly
        features = {k: v / (v + 1e-8) if v > 0 else defaults[k] for k,v in features.items()}
        # apply feature mask
        mask = FEATURE_TARGET[node_idx]
        features = {k: v for k, v, m in zip(FEATURE_KEYS, features.values(), mask) if m}
        return features

    def compute_node_metrics(self, node_idx, y=None):
        features = self.extract_features(node_idx)
        metric_mask = METRIC_TARGET[node_idx]
        opt_value = self.optimizer.optimize(features, y=y, metric_mask=metric_mask)

        # generate metrics safely
        metric_values = {}
        for key, formula, mask in zip(METRIC_KEYS, METRIC_FORMULAS, metric_mask):
            metric_values[key] = formula(opt_value) if mask else 0.0

        metric_values['score'] = sum(metric_values.values())
        return metric_values


In [None]:
import numpy as np
import pandas as pd

np.random.seed()
N = 1000   # number of synthetic ship logs

# ============================
# Domain 1: Route & Navigation
# ============================
route_nav = pd.DataFrame({
    'distance_nm': np.random.uniform(50, 5000, N),         # nautical miles
    'planned_speed_kn': np.random.uniform(10, 25, N),      # knots
    'actual_speed_kn': np.random.uniform(8, 26, N),
    'eta_hours': np.random.uniform(5, 500, N),
    'route_risk_score': np.random.uniform(0, 1, N),        # piracy/weather risk
    'fuel_capacity_tons': np.random.uniform(50, 300, N),
    'fuel_used_tons': np.random.uniform(20, 290, N),
})

route_nav['speed_variance'] = (
    route_nav['planned_speed_kn'] - route_nav['actual_speed_kn']
)

# ============================
# Domain 2: Weather Conditions
# ============================
weather = pd.DataFrame({
    'wave_height_m': np.random.uniform(0, 12, N),
    'wind_speed_kn': np.random.uniform(0, 60, N),
    'wind_direction_deg': np.random.uniform(0, 360, N),
    'visibility_km': np.random.uniform(1, 20, N),
    'storm_probability': np.random.uniform(0, 1, N),
    'precipitation_mm': np.random.uniform(0, 100, N),
})

# ============================
# Domain 3: Vessel Load & Capacity
# ============================
vessel_load = pd.DataFrame({
    'max_capacity_tons': np.random.uniform(1000, 30000, N),
    'current_load_tons': np.random.uniform(200, 29000, N),
    'num_containers': np.random.randint(50, 2000, N),
    'reefer_containers': np.random.randint(0, 300, N),
    'bulk_cargo_tons': np.random.uniform(0, 5000, N),
    'ballast_water_tons': np.random.uniform(0, 10000, N),
})

vessel_load['load_ratio'] = (
    vessel_load['current_load_tons'] / vessel_load['max_capacity_tons']
)

# ============================
# Domain 4: Cargo Storage & Loading
# ============================
cargo_types = [
    "containers", "bulk", "liquid", "hazardous", "reefer",
    "vehicles", "general_goods"
]

storage_loading = pd.DataFrame({
    'cargo_type': np.random.choice(cargo_types, N),
    'cargo_weight_tons': np.random.uniform(1, 500, N),
    'loading_speed_tph': np.random.uniform(20, 200, N),     # tons per hour
    'unloading_speed_tph': np.random.uniform(20, 200, N),
    'storage_temp_req_C': np.random.uniform(-20, 30, N),    # some cargo needs cooling
    'containerized': np.random.choice([0, 1], N),
})

# Convert cargo_type categorical → numeric index
storage_loading['cargo_type_id'] = pd.factorize(storage_loading['cargo_type'])[0]

# ============================
# Domain 5: Temporal & Port Activity
# ============================
temporal_port = pd.DataFrame({
    'arrival_hour': np.random.randint(0, 24, N),
    'arrival_day': np.random.randint(0, 7, N),
    'port_congestion_level': np.random.uniform(0, 1, N),
    'docking_delay_hours': np.random.uniform(0, 48, N),
    'tugboat_availability': np.random.choice([0, 1], N),
})

# ============================
# Combine & Normalize
# ============================
datasets = [
    route_nav,
    weather,
    vessel_load,
    storage_loading.drop(columns=['cargo_type']),  # use numeric features only
    temporal_port
]

DATA_MATRIX = np.hstack([df.values for df in datasets])
DATA_MATRIX = (DATA_MATRIX - DATA_MATRIX.min(axis=0)) / (np.ptp(DATA_MATRIX, axis=0) + 1e-8)

# ============================
# Multi-node target generator
# ============================
# ============================
# Graph-based Multi-node Targets
# ============================

D_graph = 5
candidate_dims = [7, 6, 6, 7, 6]  # updated target dimensions for 5 nodes

def generate_targets_per_node(DATA_MATRIX, candidate_dims, D_graph):
    targets = []
    for node_idx in range(D_graph):
        row = DATA_MATRIX[node_idx % len(DATA_MATRIX)]
        dim = candidate_dims[node_idx]
        if len(row) >= dim:
            sampled = row[:dim]
        else:
            sampled = np.pad(row, (0, dim - len(row)), constant_values=0.5)
        targets.append({'target': sampled})
    return targets

synthetic_targets = generate_targets_per_node(DATA_MATRIX, candidate_dims, D_graph)

# Test sizes
for i, t in enumerate(synthetic_targets):
    print(f"Node {i} target size: {len(t['target'])}")
candidate_dims = [[7], [6], [6], [7], [6]]  # updated target dimensions for 5 nodes


In [None]:
import numpy as np

# ---------------- FEATURE KEYS ----------------
FEATURE_KEYS = ['Distance', 'Speed', 'Load', 'Capacity', 'TempRequirement', 'Containerization']

FEATURE_TARGET = [[1]*len(FEATURE_KEYS) for _ in range(10)]  # 10 nodes, all features active

def default_feature_values():
    return {k: 0.5 for k in FEATURE_KEYS}


METRIC_KEYS = ['Wait', 'Utilization', 'Throughput', 'Patience','Energy']
METRIC_TARGET = [
    [1,1,1,1,1],
    [1,1,1,1,1],
    [1,1,1,1,1],
    [1,1,1,1,1],
    [1,1,1,1,1],
    [1,1,1,1,1],
    [1,1,1,1,1],
    [1,1,1,1,1],
    [1,1,1,1,1],
    [1,1,1,1,1],
]  # extend as needed

# metric formulas
METRIC_FORMULAS = [
    lambda x: np.tanh(x),
    lambda x: np.sqrt(x + 0.1),
    lambda x: np.exp(-0.5 * x),
    lambda x: 1 / (1 + x),
    lambda x: x * 0.7
]

# ---------------- GRID SEARCH OPTIMIZER ----------------
class GridSearchOptimizer:
    """Exact, non-recursive optimizer using candidate evaluation over a fixed grid."""
    def __init__(self, num_candidates=10, minimize=True, value_range=(0.0, 5.0)):
        self.num_candidates = num_candidates
        self.minimize = minimize
        self.value_range = value_range  # (min_value, max_value)

    def optimize(self, features, y=None, metric_mask=None):
        y = np.zeros(3) if y is None else np.array(y[:3])
        base = np.mean(list(features.values())) + np.mean(y)

        # create an evenly spaced grid of candidates around base
        min_val, max_val = self.value_range
        candidates = np.linspace(min_val + base, max_val + base, self.num_candidates)

        # evaluate each candidate
        best_score = -np.inf if not self.minimize else np.inf
        best_value = candidates[0]

        for val in candidates:
            mv = [f(val) if m else 0.0 for f, m in zip(METRIC_FORMULAS, metric_mask)]
            score = sum(mv)

            if (self.minimize and score < best_score) or (not self.minimize and score > best_score):
                best_score = score
                best_value = val

        return best_value


# ---------------- METRICS EVALUATOR ----------------
class MetricsEvaluator:
    """Compute node metrics with feature and metric masks, using GridSearch optimizer."""
    def __init__(self, data_matrix, num_candidates=10, minimize=False, value_range=(0.0,5.0)):
        self.data_matrix = data_matrix
        self.num_nodes = data_matrix.shape[0]
        self.optimizer = GridSearchOptimizer(num_candidates=num_candidates, minimize=minimize, value_range=value_range)

    def extract_features(self, node_idx):
        node_data = self.data_matrix[node_idx, :]
        defaults = default_feature_values()
        features = {}
        for i, key in enumerate(FEATURE_KEYS):
            features[key] = node_data[i] if i < len(node_data) else defaults[key]
        # normalize roughly
        features = {k: v / (v + 1e-8) if v > 0 else defaults[k] for k,v in features.items()}
        # apply feature mask
        mask = FEATURE_TARGET[node_idx]
        features = {k: v for k, v, m in zip(FEATURE_KEYS, features.values(), mask) if m}
        return features

    def compute_node_metrics(self, node_idx, y=None):
        features = self.extract_features(node_idx)
        metric_mask = METRIC_TARGET[node_idx]
        opt_value = self.optimizer.optimize(features, y=y, metric_mask=metric_mask)

        # generate metrics safely
        metric_values = {}
        for key, formula, mask in zip(METRIC_KEYS, METRIC_FORMULAS, metric_mask):
            metric_values[key] = formula(opt_value) if mask else 0.0

        metric_values['score'] = sum(metric_values.values())
        return metric_values


In [None]:
import numpy as np

class BBOptimizer:
    """
    Branch-and-Bound optimizer for 1D continuous problems.
    Finds the maximum or minimum of a function over an interval.
    """
    def __init__(self, minimize=False, tol=1e-3, max_depth=10, value_range=(0.0, 5.0)):
        self.minimize = minimize
        self.tol = tol
        self.max_depth = max_depth
        self.value_range = value_range

    def optimize(self, func):
        """
        func: callable f(x) -> score
        returns x_opt (best input) and f_opt (best score)
        """
        a, b = self.value_range
        best_x, best_f = self._branch(func, a, b, depth=0)
        return best_x

    def _branch(self, func, a, b, depth):
        if depth >= self.max_depth or (b - a) < self.tol:
            mid = (a + b) / 2
            f_mid = func(mid)
            return mid, f_mid

        # evaluate endpoints
        f_a, f_b = func(a), func(b)
        if self.minimize:
            best_f, best_x = (f_a, a) if f_a < f_b else (f_b, b)
        else:
            best_f, best_x = (f_a, a) if f_a > f_b else (f_b, b)

        # evaluate midpoint
        mid = (a + b) / 2
        f_mid = func(mid)
        if (self.minimize and f_mid < best_f) or (not self.minimize and f_mid > best_f):
            best_f, best_x = f_mid, mid

        # recursively branch left and right
        left_x, left_f = self._branch(func, a, mid, depth + 1)
        right_x, right_f = self._branch(func, mid, b, depth + 1)

        # select best among left, right, and midpoint
        candidates = [(best_x, best_f), (left_x, left_f), (right_x, right_f)]
        if self.minimize:
            best_x, best_f = min(candidates, key=lambda t: t[1])
        else:
            best_x, best_f = max(candidates, key=lambda t: t[1])

        return best_x, best_f
class MetricsEvaluator:
    """Compute node metrics with feature and metric masks, using BB optimizer."""
    def __init__(self, data_matrix, minimize=False, value_range=(0.0,5.0), tol=1e-3):
        self.data_matrix = data_matrix
        self.num_nodes = data_matrix.shape[0]
        self.optimizer = BBOptimizer(minimize=minimize, value_range=value_range, tol=tol)

    def extract_features(self, node_idx):
        node_data = self.data_matrix[node_idx, :]
        defaults = default_feature_values()
        features = {}
        for i, key in enumerate(FEATURE_KEYS):
            features[key] = node_data[i] if i < len(node_data) else defaults[key]
        # normalize roughly
        features = {k: v / (v + 1e-8) if v > 0 else defaults[k] for k,v in features.items()}
        # apply feature mask
        mask = FEATURE_TARGET[node_idx]
        features = {k: v for k, v, m in zip(FEATURE_KEYS, features.values(), mask) if m}
        return features

    def compute_node_metrics(self, node_idx, y=None):
        features = self.extract_features(node_idx)
        metric_mask = METRIC_TARGET[node_idx]

        # define score function for optimizer
        def score_func(x):
            mv = [f(x) if m else 0.0 for f, m in zip(METRIC_FORMULAS, metric_mask)]
            return sum(mv)

        opt_value = self.optimizer.optimize(score_func)

        metric_values = {}
        for key, formula, mask in zip(METRIC_KEYS, METRIC_FORMULAS, metric_mask):
            metric_values[key] = formula(opt_value) if mask else 0.0

        metric_values['score'] = sum(metric_values.values())
        return metric_values


In [None]:


import numpy as np
import matplotlib.pyplot as plt
import networkx as nx


# ---------------- CONFIG ----------------nsion

inner_archive_size = 80
inner_offspring = 40
outer_archive_size = 40
outer_offspring = 40
inner_iters_per_outer = 50
outer_generations = 10
outer_cost_limit = 10000
inner_learning = 0.1
gamma_interlayer = 1
top_k = 21
seed = np.random.seed()

new_DATA_MATRIX = np.random.rand(D_graph, DATA_MATRIX.shape[1])



class InterLayer:
    def __init__(self, D_graph, max_inner_dim, inter_dim=None, edge_threshold=0.02, gamma=1.0, seed=42):
        np.random.seed(seed)
        self.D_graph = D_graph
        self.edge_threshold = edge_threshold
        self.gamma = gamma
        self.inter_dim = inter_dim[0] if isinstance(inter_dim, list) else (inter_dim if inter_dim is not None else max_inner_dim[0] if isinstance(max_inner_dim, list) else max_inner_dim)
        self.max_input = 2 * (max_inner_dim[0] if isinstance(max_inner_dim, list) else max_inner_dim)
                    # Initialize weights proportional to synthetic correlation between nodes
        self.weights = {}
        self.bias = {}
        for i in range(D_graph):
            for j in range(D_graph):
                if i != j:
                    # small random + slight bias towards correlation
                    w_init = np.random.uniform(-0.1, 0.1, (self.inter_dim, self.max_input))
                    self.weights[(i,j)] = w_init
                    self.bias[(i,j)] = np.zeros(self.inter_dim)

    def compute_edge_activation(self, i, j, nested_reps):
        concat = np.concatenate([nested_reps[i], nested_reps[j]])
        concat = np.pad(concat, (0, max(0, self.max_input - len(concat))))[:self.max_input]

        # Normalize input to improve correlation
        concat = (concat - np.mean(concat)) / (np.std(concat) + 1e-12)

        # Compute activation
        v = self.weights[(i,j)].dot(concat) + self.bias[(i,j)]

        # Scale by correlation strength with input signals
        input_strength = np.clip(np.mean(np.abs(concat)), 0, 1)
        v = v * input_strength

        return 1 / (1 + np.exp(-v))

    def build_activations(self, Gmat, nested_reps):
        acts = {}
        for i in range(self.D_graph):
            for j in range(self.D_graph):
                if i == j:
                    continue
                if abs(Gmat[i,j]) > self.edge_threshold:
                    acts[(i,j)] = self.compute_edge_activation(i, j, nested_reps)
        return acts

    @staticmethod
    def pairwise_squared_corr(acts):
        if len(acts) < 2:
            return 0.0
        A = np.stack(list(acts.values()))
        A_centered = A - A.mean(axis=1, keepdims=True)
        stds = np.sqrt(np.sum(A_centered**2, axis=1) / (A.shape[1]-1) + 1e-12)
        cov = A_centered @ A_centered.T / (A.shape[1]-1)
        corr = cov / (np.outer(stds, stds) + 1e-12)
        np.fill_diagonal(corr, 0)
        return float((corr**2).sum())

    def mi_for_graph(self, Gmat, nested_reps):
        acts = self.build_activations(Gmat, nested_reps)
        if not acts:
            return 0.0
        return self.gamma * self.pairwise_squared_corr(acts)

    def correlate_shrink_interlayer(self, fmt_bounds=None, interaction_tensor=None, metrics_keys=None, verbose=True):
        """
        Compute Pearson correlation per node & metric between:
            - shrink factor (adaptive FMT)
            - mean outgoing inter-layer activations
        Returns: {node_idx: {metric: {'r':..., 'p':...}}}
        """
        from scipy.stats import pearsonr

        if metrics_keys is None:
            metrics_keys =self.MK

        D = self.D_graph

        # 1. Compute FMT bounds if not given
        if fmt_bounds is None:
            fmt_bounds = self.compute_fmt_with_bounds_adaptive(top_k=top_k)

        # 2. Get inter-layer activations if not provided
        if interaction_tensor is None:
            interaction_tensor = self.print_interactions(return_tensor=True, verbose=False)

        inter_mean = interaction_tensor.mean(axis=2)  # (D,D)
        shrink_factors = self.compute_fmt_shrink_factor(fmt_bounds, metrics_keys)  # (D, num_metrics)

        correlations = {}

        for i in range(D):
            correlations[i] = {}
            for k, key in enumerate(metrics_keys):
                # FMT shrink for node i (broadcasted across outgoing edges)
                shrink_vec = shrink_factors[i, k] * np.ones(D)
                # Outgoing inter-layer activations from node i
                inter_vec = inter_mean[i, :]
                # Remove self-loop
                mask = np.arange(D) != i
                shrink_vec = shrink_vec[mask]
                inter_vec = inter_vec[mask]

                # Compute Pearson correlation
                if np.std(inter_vec) > 1e-8:  # valid correlation
                    r, p = pearsonr(shrink_vec, inter_vec)
                else:
                    r, p = 0.0, 1.0  # no variability

                correlations[i][key] = {'r': r, 'p': p}
                if verbose:
                    print(f"Node {i} | {key} shrink vs inter-layer: r={r:.3f}, p={p:.3e}")

        return correlations



# ---------------- UNIFIED ACOR MULTIPLEX ----------------
class Fuzzy_Hierarchical_Multiplex:
    def __init__(self, candidate_dims, D_graph, inner_archive_size, inner_offspring,
                 outer_archive_size, outer_offspring, synthetic_targets, inner_learning,
                 gamma_interlayer=1.0, causal_flag=True,metrics=METRIC_KEYS):
        self.candidate_dims = candidate_dims
        self.D_graph = D_graph
        self.inner_archive_size = inner_archive_size
        self.inner_offspring = inner_offspring
        self.outer_archive_size = outer_archive_size
        self.outer_offspring = outer_offspring
        self.synthetic_targets = synthetic_targets
        self.inner_learning = inner_learning
        self.causal_flag = causal_flag
        self.best_dim_per_node = [len(t)-1 for t in synthetic_targets]  # last element as best dim

        self.MK = metrics
        self.MKI = metrics+['score']
        self.nested_reps = [np.zeros(c[0]) for c in candidate_dims]
      #  self.best_dim_per_node = [candidate_dims[0] for _ in range(D_graph)]
        self.inter_layer = InterLayer(D_graph, max_inner_dim=max(candidate_dims), gamma=gamma_interlayer)
        self.chosen_Gmat = np.random.uniform(-0.5,0.5,(D_graph,D_graph))
        np.fill_diagonal(self.chosen_Gmat,0)
        self.l2_before, self.l2_after = [], []

    # ---------- INNER LOOP (FCM) ----------
    def run_inner(self, node_idx, target, D_fcm,
              steps=100, lr_x=0.001, lr_y=0.001, lr_W=0.001,
              decorrelate_metrics=True):

        # --- Initialize activations ---
        x = target.copy()
        y = np.random.uniform(-0.6, 0.6, D_fcm)

        # Pad target for L2 computation
        target_padded = np.pad(target, (0, len(self.nested_reps[node_idx]) - len(target)),
                            mode='constant', constant_values=0.5)
        self.l2_before.append(np.linalg.norm(self.nested_reps[node_idx] - target_padded))

        # --- FCM updates ---
        W = np.random.uniform(-0.6, 0.6, (D_fcm, D_fcm))
        np.fill_diagonal(W, 0)

        for _ in range(steps):
            z = y.dot(W) + x
            Theta_grad_z = z - target
            Theta_grad_x = Theta_grad_z
            Theta_grad_y = Theta_grad_z.dot(W.T)
            Theta_grad_W = np.outer(y, Theta_grad_z)

            x -= lr_x * np.clip(Theta_grad_x, -0.05, 0.05)
            y -= lr_y * np.clip(Theta_grad_y, -0.05, 0.05)
            W -= lr_W * np.clip(Theta_grad_W, -0.01, 0.01)

            x = np.clip(x, 0, 1)
            y = np.clip(y, 0, 1)
            np.fill_diagonal(W, 0)
            W = np.clip(W, -1, 1)

        # --- Pad FCM output to nested representation ---
        x_padded = np.pad(x, (0, len(self.nested_reps[node_idx]) - len(x)),
                        mode='constant', constant_values=0.5)
        self.nested_reps[node_idx] = x_padded
        self.l2_after.append(np.linalg.norm(x_padded - target_padded))

        # --- Extract node features ---
        metrics_evaluator = MetricsEvaluator(data_matrix=DATA_MATRIX)
        features = metrics_evaluator.extract_features(node_idx)
        feat_vals = np.array(list(features.values()))

        # --- Compute metrics scaled by activations + features ---
        metric_mask = METRIC_TARGET[node_idx]
        metric_values = {}

        for key, formula, mask in zip(METRIC_KEYS, METRIC_FORMULAS, metric_mask):
            if mask:
                # Match activations to features
                act_vals = x[:len(feat_vals)]
                # --- Inner weighted sum ---
                weighted_input = np.mean(act_vals * feat_vals)

                # --- Apply outer scale if available ---
                outer_scale = self.best_node_weights[node_idx] if hasattr(self, 'best_node_weights') else 1.0
                weighted_input *= outer_scale

                # Optional: small node bias for uniqueness
                weighted_input += 0.05 * node_idx

                # Compute metric
                metric_values[key] = formula(weighted_input)
            else:
                metric_values[key] = 0.0

        # --- Total score ---
        metric_values['score'] = sum(metric_values.values())

        # --- Store inner activation for outer loop ---
        metric_values['x'] = x_padded.copy()        # store padded activation
        metric_values['feat_vals'] = feat_vals.copy()  # store features

        # --- Compute inter-layer MI ---
        mi_score = self.inter_layer.mi_for_graph(self.chosen_Gmat, self.nested_reps)

        return x, y, W, mi_score, metric_values





    def run_outer(self, outer_cost_limit=1000):
        from scipy.optimize import minimize

        node_metrics_list = self.capped_node_metrics
        D = self.D_graph
        gamma = self.inter_layer.gamma
        lambda_reg = 0.05

        # --- Compute raw scores using inner activations ---
        raw_scores = np.zeros(D)
        for node_idx, metrics in enumerate(node_metrics_list):
            act_vals = metrics['x'][:len(metrics['feat_vals'])]
            feat_vals = metrics['feat_vals']

            weighted_input = np.mean(act_vals * feat_vals)

            # Optional node bias
            weighted_input += 0.05 * node_idx

            # --- Apply outer node weighting if already present ---
            outer_scale = self.best_node_weights[node_idx] if hasattr(self, 'best_node_weights') else 1.0
            weighted_input *= outer_scale

            raw_scores[node_idx] = weighted_input

        # --- Apply cap if needed ---
        total_raw = raw_scores.sum()
        if total_raw > outer_cost_limit:
            scale_factor = outer_cost_limit / total_raw
            raw_scores *= scale_factor

        # --- Compute Fuzzy Metric Tensor ---
        fuzzy_tensor = self.compute_fuzzy_metric_tensor(normalize=True)
        fmt_node_sums = np.array([fuzzy_tensor[i,:,:].sum() - fuzzy_tensor[i,i,:].sum() for i in range(D)])

        # --- Node contribution optimization ---
        def objective(weights):
            node_contrib = weights * (raw_scores + gamma * fmt_node_sums)
            reg_penalty = lambda_reg * np.sum((weights - 1.0/D)**2)
            return - (node_contrib.sum() - reg_penalty)

        bounds = [(0.2, 0.4)] * D
        cons = {'type': 'eq', 'fun': lambda w: np.sum(w) - 1.0}
        x0 = np.ones(D)/D

        result = minimize(objective, x0=x0, bounds=bounds, constraints=cons, method='SLSQP')
        node_weights = result.x

        # --- Compute node contributions ---
        node_contributions = (raw_scores + gamma * fmt_node_sums) * node_weights

        # --- Weighted FMT ---
        weighted_fmt = fuzzy_tensor.copy()
        for i in range(D):
            weighted_fmt[i,:,:] *= node_weights[i]

        # Normalize for visualization
        weighted_fmt = (weighted_fmt - weighted_fmt.min()) / (weighted_fmt.max() - weighted_fmt.min() + 1e-12)

        # --- Correlation penalty ---
        interaction_tensor = self.print_interactions(return_tensor=True, verbose=False)
        fmt_mean = fuzzy_tensor.mean(axis=2)
        inter_mean = interaction_tensor.mean(axis=2)
        corr_penalty = 0.0
        for i in range(D):
            fmt_vec = fmt_mean[i,:] * node_weights[i]
            inter_vec = inter_mean[i,:] * node_weights[i]
            if np.std(fmt_vec) > 1e-8 and np.std(inter_vec) > 1e-8:
                corr_penalty += abs(np.corrcoef(fmt_vec, inter_vec)[0,1])**2
        corr_penalty /= D

        combined_score = node_contributions.sum() - corr_penalty

        # --- Save attributes ---
        self.node_score_contributions = node_contributions
        self.correlation_penalty = corr_penalty
        self.weighted_fmt = weighted_fmt
        self.best_node_weights = node_weights

        return node_metrics_list, combined_score, node_contributions









    def run(self, outer_generations=outer_generations):
        best_score = -np.inf

        for gen in range(outer_generations):
            node_metrics_list = []

            for node_idx in range(self.D_graph):
                full_target = self.synthetic_targets[node_idx]['target']
                D_fcm = self.candidate_dims[node_idx][0]
                target = full_target[:D_fcm]

                _, _, _, _, metrics = self.run_inner(node_idx, target, D_fcm)
                node_metrics_list.append(metrics)

            # Outer loop
            self.capped_node_metrics = node_metrics_list
            _, capped_score, node_contributions = self.run_outer()

            if capped_score > best_score:
                best_score = capped_score
            else:
                print(f"\n--- Generation {gen} Metrics (NEW BEST) ---")
                for i, m in enumerate(node_metrics_list):
                    out_str = []
                    for k, v in m.items():
                        if np.isscalar(v):
                            out_str.append(f"{k}: {v:.2f}")
                        elif isinstance(v, np.ndarray):
                            out_str.append(f"{k}: mean {v.mean():.2f}, shape {v.shape}")
                        else:
                            out_str.append(f"{k}: {v}")
                    print(f"Node {i} | " + " | ".join(out_str))

                print(f"\n--- Generation {gen} Node Contributions (NEW BEST) ---")
                for i, c in enumerate(node_contributions):
                    print(f"Node {i}: Contribution = {c:.4f}")

                print(f"Outer Score (capped): {capped_score:.3f} <-- NEW BEST")

        return best_score




    # ---------- VISUALIZATIONS ----------
    # ---------- VISUALIZATIONS ----------
    def plot_pointwise_minmax_elite(self, top_k=21):
        plt.figure(figsize=(14,3))
        for i in range(self.D_graph):
            # Node's actual dimension
            dim_i = self.candidate_dims[i][0]  # ✅ integer
            base = self.nested_reps[i][:dim_i]  # slice to candidate dim
            reps = np.clip(base + np.random.normal(0,0.05,(top_k,len(base))),0,1)
            y_min, y_max = reps.min(axis=0), reps.max(axis=0)
            y_sel = base

            # True target for this node, sliced to candidate dim
            y_true = self.synthetic_targets[i]['target'][:len(y_sel)]
            if len(y_true) < len(y_sel):
                y_true = np.pad(y_true, (0, len(y_sel)-len(y_true)), "constant")
            else:
                y_true = y_true[:len(y_sel)]

            plt.subplot(1,self.D_graph,i+1)
            plt.fill_between(range(len(y_min)),y_min,y_max,color='skyblue',alpha=0.4,label='Elite Interval')
            plt.plot(y_sel,'k-',lw=2,label='Estimated')
            plt.plot(y_true,'r--',lw=2,label='True')
            plt.ylim(0,1.05)
            plt.title(f"Node {i+1}")
            if i==0: plt.legend()
        plt.tight_layout()
        plt.show()


    def plot_nested_activations(self):
        plt.figure(figsize=(12,3))
        for i,rep in enumerate(self.nested_reps):
            dim_i = self.candidate_dims[i][0]
            rep_i = rep[:dim_i]  # slice to candidate dim
            plt.subplot(1,self.D_graph,i+1)
            plt.bar(range(len(rep_i)), rep_i, color=plt.cm.plasma(rep_i))
            plt.ylim(0,1)
            plt.title(f"Node {i+1}")
        plt.tight_layout()
        plt.show()


    def plot_outer_fuzzy_graph(self):
        G = nx.DiGraph()
        for i in range(self.D_graph): G.add_node(i)
        for i in range(self.D_graph):
            for j in range(self.D_graph):
                if i!=j and abs(self.chosen_Gmat[i,j])>0.02:
                    G.add_edge(i,j,weight=self.chosen_Gmat[i,j])
        node_sizes = [self.best_dim_per_node[i]*200 for i in range(self.D_graph)]
        edge_colors = ['green' if d['weight']>0 else 'red' for _,_,d in G.edges(data=True)]
        edge_widths = [abs(d['weight'])*3 for _,_,d in G.edges(data=True)]
        pos = nx.circular_layout(G)
        plt.figure(figsize=(6,6))
        nx.draw(G,pos,node_size=node_sizes,node_color='skyblue',
                edge_color=edge_colors,width=edge_widths,arrows=True,with_labels=True)
        plt.title("Outer Fuzzy Multiplex Graph")
        plt.show()
# ---------------- INTERACTIONS INSPECTOR ----------------

    def print_interactions(self, return_tensor=True, verbose=True):
            D_graph = self.D_graph
            inter_dim = self.inter_layer.inter_dim
            inter_tensor = np.zeros((D_graph, D_graph, inter_dim))

            acts = self.inter_layer.build_activations(self.chosen_Gmat, self.nested_reps)
            if not acts:
                if verbose:
                    print("No active edges above threshold.")
                return inter_tensor if return_tensor else None

            for (i, j), vec in acts.items():
                inter_tensor[i, j, :] = vec
                if verbose:
                    act_str = ", ".join([f"{v:.3f}" for v in vec])
                    print(f"Node {i} -> Node {j}: [{act_str}]")
            return inter_tensor if return_tensor else None

        # Move these outside of print_interactions (class-level)
    def print_l2_summary(self):
            print("\nL2 Distances to Target per Node:")
            for idx, (before, after) in enumerate(zip(self.l2_before, self.l2_after)):
                print(f"Node {idx}: Before={before:.4f}, After={after:.4f}")

    def compute_fuzzy_metric_tensor(self, normalize=True, verbose=False):
            """
            Computes a Fuzzy Metric Tensor (D_graph x D_graph x num_metrics)
            using current nested reps and node metrics.
            Each slice [i,j,:] represents metrics of node j (optionally weighted by Gmat[i,j])
            """
            metrics_keys =self.MK
            D = self.D_graph
            num_metrics = len(metrics_keys)
            tensor = np.zeros((D, D, num_metrics))

            metrics_evaluator = MetricsEvaluator(DATA_MATRIX)

            node_metrics = []
            for i, rep in enumerate(self.nested_reps):
                metrics = metrics_evaluator.compute_node_metrics(i, y=rep)
                node_metrics.append(np.array([metrics[k] for k in metrics_keys]))
            node_metrics = np.array(node_metrics)  # (D, num_metrics)

            for i in range(D):
                for j in range(D):
                    if i==j:
                        tensor[i,j,:] = node_metrics[j]
                    else:
                        weight = np.clip(abs(self.chosen_Gmat[i,j]), 0, 1)
                        tensor[i,j,:] = weight * node_metrics[j]

            if normalize:
                tensor = (tensor - tensor.min()) / (tensor.max() - tensor.min() + 1e-12)

            if verbose:
                print("Fuzzy Metric Tensor shape:", tensor.shape)

            return tensor



    def compute_fmt_shrink_factor(self, fmt_bounds, metrics_keys=None):
        """
        Returns shrink factor per node and metric.
        shrink_factor = 1 - (current_interval / original_interval)
        """
        if metrics_keys is None:
            metrics_keys = self.MK

        D = self.D_graph
        num_metrics = len(metrics_keys)
        shrink_factors = np.zeros((D, num_metrics))

        for i in range(D):
            for k in range(num_metrics):
                lower, upper = fmt_bounds[i, i, k, 0], fmt_bounds[i, i, k, 1]  # self-node interval
                interval_width = upper - lower + 1e-12  # normalized [0,1]
                shrink_factors[i, k] = 1 - interval_width  # more shrink = higher value

        return shrink_factors

    def compute_fmt_with_bounds_adaptive(self, top_k=21, max_shrink=0.5, metrics_keys=None):
        """
        Computes FMT bounds using pointwise min/max across elite solutions,
        and applies dynamic adaptive shrinking where variability is low.
        Returns tensor shape (D,D,num_metrics,2) [lower, upper].
        """
        if metrics_keys is None:
            metrics_keys =self.MK

        D = self.D_graph
        num_metrics = len(metrics_keys)
        tensor_bounds = np.zeros((D, D, num_metrics, 2))
        metrics_evaluator = MetricsEvaluator(DATA_MATRIX)

        variability = np.zeros((D, num_metrics))

        # Step 1: compute bounds from perturbations
        for i in range(D):
            base = self.nested_reps[i]
            reps = np.clip(base + np.random.normal(0, 0.05, (top_k, len(base))), 0, 1)
            metrics_matrix = np.zeros((top_k, num_metrics))
            for idx, rep in enumerate(reps):
                m = metrics_evaluator.compute_node_metrics(i, y=rep)
                metrics_matrix[idx, :] = [m[k] for k in metrics_keys]

            lower_i = metrics_matrix.min(axis=0)
            upper_i = metrics_matrix.max(axis=0)
            tensor_bounds[i, :, :, 0] = lower_i[np.newaxis, :]  # broadcast to all j
            tensor_bounds[i, :, :, 1] = upper_i[np.newaxis, :]
            variability[i, :] = metrics_matrix.std(axis=0)

        # Step 2: adaptive shrinking
        for i in range(D):
            for j in range(D):
                for k in range(num_metrics):
                    lower, upper = tensor_bounds[i,j,k,0], tensor_bounds[i,j,k,1]
                    mean = (lower + upper)/2
                    var_norm = min(1.0, variability[i,k]/(upper-lower + 1e-12))
                    shrink_factor = max_shrink * (1 - var_norm)
                    tensor_bounds[i,j,k,0] = mean - shrink_factor*(mean - lower)
                    tensor_bounds[i,j,k,1] = mean + shrink_factor*(upper - mean)

        return tensor_bounds



    def plot_fuzzy_metric_tensor_heatmaps(self, fuzzy_tensor=None, metrics_keys=['wait','throughput','util','patience']):
        """
        Plot a heatmap panel for each metric in the FMT.
        Rows: source node i
        Columns: target node j
        """
        if fuzzy_tensor is None:
            fuzzy_tensor = self.compute_fuzzy_metric_tensor(normalize=True)

        D = self.D_graph
        num_metrics = len(metrics_keys)

        fig, axes = plt.subplots(1, num_metrics, figsize=(4*num_metrics,4))
        if num_metrics == 1: axes = [axes]

        for k, key in enumerate(metrics_keys):
            data = fuzzy_tensor[:,:,k]
            im = axes[k].imshow(data, cmap='viridis', vmin=0, vmax=1)
            for i in range(D):
                for j in range(D):
                    axes[k].text(j,i,f"{data[i,j]:.2f}",ha='center',va='center',color='white',fontsize=9)
            axes[k].set_xticks(range(D))
            axes[k].set_yticks(range(D))
            axes[k].set_xticklabels([f'Node {j}' for j in range(D)])
            axes[k].set_yticklabels([f'Node {i}' for i in range(D)])
            axes[k].set_title(f'FMT - {key}')

        fig.colorbar(im, ax=axes, orientation='vertical', fraction=0.025, pad=0.04, label='Normalized Metric Value')
        plt.tight_layout()
        plt.show()

    def compute_fmt_with_elite_bounds(self, top_k=21):
        """
        Computes FMT bounds using pointwise min/max across elite solutions.
        Returns tensor shape (D,D,num_metrics,2) [lower, upper].
        """
        metrics_keys = self.MK
        D = self.D_graph
        num_metrics = len(metrics_keys)
        tensor_bounds = np.zeros((D,D,num_metrics,2))

        metrics_evaluator = MetricsEvaluator(DATA_MATRIX)

        for i in range(D):
            # Generate top_k perturbations around current nested_rep (like in plot_pointwise_minmax_elite)
            base = self.nested_reps[i]
            reps = np.clip(base + np.random.normal(0,0.05,(top_k,len(base))),0,1)

            # Compute node metrics for each perturbed solution
            metrics_matrix = np.zeros((top_k, num_metrics))
            for idx, rep in enumerate(reps):
                m = metrics_evaluator.compute_node_metrics(i, y=rep)
                metrics_matrix[idx,:] = [m[k] for k in metrics_keys]

            # Compute pointwise min/max across elite solutions
            lower_i = metrics_matrix.min(axis=0)
            upper_i = metrics_matrix.max(axis=0)

            # Fill bounds tensor for all source nodes (i->j)
            for j in range(D):
                tensor_bounds[i,j,:,0] = lower_i
                tensor_bounds[i,j,:,1] = upper_i

        return tensor_bounds


    def plot_fmt_with_bounds(self, fmt_tensor_bounds):
        """
        Plot the FMT with lower/upper bounds, applying outer-loop weights if available.
        Single row: mean across all source nodes
        Columns: metrics (mean across target nodes)
        """
        D = self.D_graph
        metrics_keys = self.MK
        M_actual = len(metrics_keys)

        # Compute mean value per metric across all target nodes
        mean_vals = (fmt_tensor_bounds[:, :, :, 0] + fmt_tensor_bounds[:, :, :, 1]) / 2  # (D, D, M_actual)
        mean_vals = mean_vals.mean(axis=1)  # mean across targets -> (D, M_actual)

        # Take mean across nodes to reduce to 1xM
        mean_vals = mean_vals.mean(axis=0, keepdims=True)  # shape (1, M_actual)

        # Apply outer-loop weights if available (optional: mean weight)
        if hasattr(self, 'best_alpha') and hasattr(self, 'best_w_contrib'):
            mean_weight = (self.best_alpha * self.best_w_contrib).mean()
            mean_vals = mean_vals * mean_weight

        # Normalize for heatmap clarity
        mean_vals_norm = (mean_vals - mean_vals.min()) / (mean_vals.max() - mean_vals.min() + 1e-12)

        # Plot
        fig, ax = plt.subplots(figsize=(1.2*M_actual + 4, 2))
        im = ax.imshow(mean_vals_norm, cmap='viridis', aspect='auto', vmin=0, vmax=1)

        # Annotate cells
        for i in range(mean_vals.shape[0]):  # only 1 row
            for k in range(M_actual):
                ax.text(k, i, f"{mean_vals[0,k]:.2f}", ha='center', va='center', color='white', fontsize=8)

        ax.set_xticks(range(M_actual))
        ax.set_xticklabels(metrics_keys[:M_actual], rotation=45, ha='right')
        ax.set_yticks([0])
        ax.set_yticklabels(['Mean across nodes'])
        ax.set_title("Weighted FMT with Bounds (Collapsed to 1 Row)")
        fig.colorbar(im, ax=ax, label='Weighted Mean Metric Value')
        plt.tight_layout()
        plt.show()

    def plot_node_score_contribution(self, metrics_keys=METRIC_KEYS):
        """
        Plot per-node total score contribution:
            - 3 panels: Raw, FMT (interaction), Total
            - Diagonal represents raw contributions
            - FMT scaled by best_weights if available
            - Annotated cells
            - Normalized across metrics for consistent visualization
        """
        D = self.D_graph
        node_contributions = np.array(self.node_score_contributions)

        # --- FMT contribution ---
        if hasattr(self, 'weighted_fmt'):
            fuzzy_tensor = np.array(self.weighted_fmt)
        else:
            fuzzy_tensor = self.compute_fuzzy_metric_tensor(normalize=True)

        # Normalize across entire tensor for plotting
        fuzzy_tensor_norm = (fuzzy_tensor - fuzzy_tensor.min()) / (fuzzy_tensor.max() - fuzzy_tensor.min() + 1e-12)
        fmt_matrix = fuzzy_tensor_norm.sum(axis=2)  # sum over metrics
        np.fill_diagonal(fmt_matrix, 0)

        # --- Raw matrix on diagonal ---
        raw_matrix = np.zeros((D,D))
        np.fill_diagonal(raw_matrix, node_contributions)

        # --- Total contribution ---
        total_matrix = raw_matrix + fmt_matrix

        # --- Global min/max for color scale ---
        global_min, global_max = total_matrix.min(), total_matrix.max()

        # --- Plot ---
        fig, axes = plt.subplots(1, 3, figsize=(15, 4))
        matrices = [raw_matrix, fmt_matrix, total_matrix]
        titles = ["Raw Node Contribution", "Normalized FMT Contribution", "Total Contribution"]

        for ax, mat, title in zip(axes, matrices, titles):
            im = ax.imshow(mat, cmap='viridis', vmin=0, vmax=1)  # normalized
            for i in range(D):
                for j in range(D):
                    ax.text(j, i, f"{mat[i,j]:.2f}", ha='center', va='center', color='white', fontsize=8)
            ax.set_title(title)
            ax.set_xticks(range(D))
            ax.set_xticklabels([f"Node {i+1}" for i in range(D)])
            ax.set_yticks(range(D))
            ax.set_yticklabels([f"Node {i+1}" for i in range(D)])

        fig.colorbar(im, ax=axes, orientation='vertical', fraction=0.025, pad=0.04, label='Contribution Value')
        plt.tight_layout()
        plt.show()

    def correlate_fmt_interactions_per_node(self, fmt_bounds=None, interaction_tensor=None, verbose=True):
        """
        Correlate the FMT bounds with inter-layer interactions per node and per metric.
        Returns a dict of shape: {node_idx: {metric: {'r':..., 'p':...}}}.
        """
        from scipy.stats import pearsonr
        import matplotlib.pyplot as plt

        metrics_keys = self.MK
        D = self.D_graph

        # Compute tensors if not provided
        if fmt_bounds is None:
            fmt_bounds = self.compute_fmt_with_elite_bounds(top_k=21)
        if interaction_tensor is None:
            interaction_tensor = self.print_interactions(return_tensor=True, verbose=False)

        # Reduce interaction tensor along inter_dim
        inter_mean = interaction_tensor.mean(axis=2)  # (D,D)

        node_correlations = {}

        for i in range(D):
            node_correlations[i] = {}
            for k, key in enumerate(metrics_keys):
                # FMT bounds for target node j from source i (mean of lower/upper)
                fmt_mean = fmt_bounds[i,:,k,:].mean(axis=1)  # shape (D,)
                # Interaction tensor for edges from node i to j
                inter_vec = inter_mean[i,:]  # shape (D,)
                # Pearson correlation
                corr, pval = pearsonr(fmt_mean, inter_vec)
                node_correlations[i][key] = {'r': corr, 'p': pval}

                if verbose:
                    print(f"Node {i} | {key}: r = {corr:.3f}, p = {pval:.3e}")
                    plt.figure(figsize=(4,3))
                    plt.scatter(fmt_mean, inter_vec, alpha=0.7, edgecolor='k', color='skyblue')
                    plt.xlabel(f"FMT {key} (Node {i} -> others)")
                    plt.ylabel(f"Interaction mean (Node {i} -> others)")
                    plt.title(f"Node {i} | {key} correlation: r={corr:.3f}")
                    plt.grid(True)
                    plt.show()

        return node_correlations

    def plot_fmt_with_run_metrics(self, metrics_keys=None):
        """
        Plot FMT heatmaps using the actual weighted FMT metrics from the last run_outer.
        Rows: source nodes
        Columns: metrics (mean across target nodes)
        """
        if metrics_keys is None:
            metrics_keys = self.MK

        D = self.D_graph
        M_actual = len(metrics_keys)

        # Use the weighted FMT from the last run_outer
        if not hasattr(self, 'weighted_fmt'):
            raise ValueError("Weighted FMT not available. Run run_outer() first.")

        weighted_fmt = np.array(self.weighted_fmt)  # shape (D, D, M_actual)
        mean_vals = weighted_fmt.mean(axis=1)       # mean across target nodes -> (D, M_actual)

        # Normalize for visualization
        mean_vals_norm = (mean_vals - mean_vals.min()) / (mean_vals.max() - mean_vals.min() + 1e-12)

        # Plot
        fig, ax = plt.subplots(figsize=(1.2*M_actual + 4, 0.35*D + 4))
        im = ax.imshow(mean_vals_norm, cmap='viridis', aspect='auto', vmin=0, vmax=1)

        # Annotate cells
        for i in range(D):
            for k in range(M_actual):
                ax.text(k, i, f"{mean_vals[i,k]:.2f}", ha='center', va='center', color='white', fontsize=8)

        ax.set_xticks(range(M_actual))
        ax.set_xticklabels(metrics_keys[:M_actual], rotation=45, ha='right')
        ax.set_yticks(range(D))
        ax.set_yticklabels([f"Node {i}" for i in range(D)])
        ax.set_title("Weighted FMT Metrics (Run Output)")
        fig.colorbar(im, ax=ax, label='Weighted Metric Value')
        plt.tight_layout()
        plt.show()


    def correlation_penalty(self, fmt_bounds=None, interaction_tensor=None):
        """
        Computes a penalty term that is high if per-node FMT metrics correlate with interactions.
        Returns total penalty to subtract from the outer score.
        """
        from scipy.stats import pearsonr

        D = self.D_graph
        metrics_keys = self.MK
        if fmt_bounds is None:
            fmt_bounds = self.compute_fmt_with_elite_bounds(top_k=top_k)
        if interaction_tensor is None:
            interaction_tensor = self.print_interactions(return_tensor=True, verbose=False)

        inter_mean = interaction_tensor.mean(axis=2)
        total_penalty = 0.0

        for i in range(D):
            for k in range(len(metrics_keys)):
                fmt_mean = fmt_bounds[i,:,k,:].mean(axis=1)
                inter_vec = inter_mean[i,:]
                if np.std(fmt_mean) > 1e-8 and np.std(inter_vec) > 1e-8:
                    corr, _ = pearsonr(fmt_mean, inter_vec)
                    total_penalty += abs(corr)  # penalize high correlation

        # normalize by number of nodes × metrics
        total_penalty /= (D * len(metrics_keys))**2
        return total_penalty


# ---------------- USAGE ----------------
if __name__ == "__main__":
    optimizer = Fuzzy_Hierarchical_Multiplex(
        candidate_dims, D_graph,
        inner_archive_size, inner_offspring,
        outer_archive_size, outer_offspring,
        synthetic_targets,
        inner_learning, gamma_interlayer=1,
        causal_flag=False
    )
    metrics_list = optimizer.run()
    optimizer.plot_pointwise_minmax_elite()
    optimizer.plot_nested_activations()
    # Compute FMT with elite bounds
    fmt_elite_bounds = optimizer.compute_fmt_with_elite_bounds(top_k=top_k+10)

# Plot as heatmaps
    optimizer.plot_fmt_with_run_metrics()

    # Compute fuzzy multiplex tensor
    fmt_tensor = optimizer.compute_fuzzy_metric_tensor(normalize=True)
    optimizer.plot_fuzzy_metric_tensor_heatmaps(fmt_tensor)

    # Compute FMT with bounds (minimax elite intervals)
    optimizer.plot_node_score_contribution()
    optimizer.plot_outer_fuzzy_graph()
  #  optimizer.print_interactions()
    tensor = optimizer.print_interactions()

    print("Tensor shape:", tensor.shape,'\n',tensor)
    # Compute tensors first

    fmt_elite_bounds = optimizer.compute_fmt_with_elite_bounds(top_k=top_k)
    interaction_tensor = optimizer.print_interactions(return_tensor=True, verbose=False)
    optimizer.plot_fmt_with_bounds(fmt_elite_bounds)

    #interaction_tensor = optimizer.print_interactions(return_tensor=True, verbose=False)

    # Get per-node, per-metric correlations
    #node_metric_corrs = optimizer.correlate_fmt_interactions_per_node(
     #   fmt_bounds=fmt_elite_bounds,
      #  interaction_tensor=interaction_tensor
   # )