fix: end hungarian attempt

Spelkington · Spelkington · commit 3231e338d7f8 · 2026-04-13T19:17:34.000-06:00
diff --git a/examples/advanced/DroppedNeuralNet/Data/_04_Analysis/Schemas/BlockAssignment.cs b/examples/advanced/DroppedNeuralNet/Data/_04_Analysis/Schemas/BlockAssignment.cs
@@ -4,7 +4,7 @@ namespace DroppedNeuralNet.Data._04_Analysis.Schemas;
 
 /// <summary>
 /// A single Block pairing selected by the Hungarian algorithm: the globally optimal
-/// assignment of inp pieces to out pieces under the minimum total ProductNorm objective.
+/// assignment of inp pieces to out pieces under the minimum total CoherenceScore objective.
 /// </summary>
 [FlowthruSchema]
 public partial record BlockAssignment
@@ -15,6 +15,6 @@ public partial record BlockAssignment
   public int InpPieceIndex { get; init; }
   public int OutPieceIndex { get; init; }
 
-  /// <summary>ProductNorm score for this pairing — the cost the solver minimized.</summary>
-  public float AssignmentScore { get; init; }
+  /// <summary>Sinkhorn-normalized coherence score for this pairing — the cost the solver minimized.</summary>
+  public float CoherenceScore { get; init; }
 }
diff --git a/examples/advanced/DroppedNeuralNet/Data/_04_Analysis/Schemas/PairingScore.cs b/examples/advanced/DroppedNeuralNet/Data/_04_Analysis/Schemas/PairingScore.cs
@@ -14,5 +14,5 @@ public partial record PairingScore
   public int OutPieceIndex { get; init; }
 
   /// <summary>||W_out @ W_inp||_F. Lower = stronger residual coupling between these two layers.</summary>
-  public float ProductNorm { get; init; }
+  public float CoherenceScore { get; init; }
 }
diff --git a/examples/advanced/DroppedNeuralNet/Flows/Exploration/Steps/compute_pairing_scores.py b/examples/advanced/DroppedNeuralNet/Flows/Exploration/Steps/compute_pairing_scores.py
@@ -1,7 +1,17 @@
-"""Compute ||W_out @ W_inp||_F for every legal (inp, out) Block pairing.
+"""Compute a normalized coherence score for every legal (inp, out) Block pairing.
 
-Lower values indicate that the out layer approximates the inverse of the inp layer —
-the residual coupling signal left in the weights by the training objective.
+The score measures structural alignment between the two weight matrices, independent of
+their individual magnitudes:
+
+    CoherenceScore(inp, out) = ||W_out @ W_inp||_F / (||W_out||_F * ||W_inp||_F)
+
+This is the Frobenius norm of the product normalized by the product of the individual
+norms — analogous to cosine similarity but in matrix space.  A low score means
+W_out approximately inverts W_inp (the residual coupling signal left by training).
+
+Using the raw ||W_out @ W_inp||_F directly contaminates the cost matrix with weight
+magnitude: a piece whose weights happen to be 2× larger will score high in every row
+regardless of structural compatibility, drowning the signal the Hungarian solver needs.
 """
 import io
 import logging
@@ -29,7 +39,7 @@ def compute_pairing_scores(
         legal_pairings: Dimension-valid (InpPieceIndex, OutPieceIndex) candidates.
 
     Returns:
-        DataFrame with [InpPieceIndex, OutPieceIndex, ProductNorm].
+        DataFrame with [InpPieceIndex, OutPieceIndex, CoherenceScore].
     """
     blob_by_index: dict[int, bytes] = {
         int(r["PieceIndex"]): r["Data"] for _, r in pieces.iterrows()
@@ -71,6 +81,10 @@ def load_weight(piece_idx: int) -> torch.Tensor:
     inp_weights = {idx: load_weight(idx) for idx in inp_indices}  # each (96, 48)
     out_weights = {idx: load_weight(idx) for idx in out_indices}  # each (48, 96)
 
+    # Pre-compute per-piece Frobenius norms for the denominator
+    inp_norms = {idx: float(torch.norm(w, p="fro")) for idx, w in inp_weights.items()}
+    out_norms = {idx: float(torch.norm(w, p="fro")) for idx, w in out_weights.items()}
+
     rows = []
     with torch.no_grad():
         for inp_idx in inp_indices:
@@ -79,13 +93,14 @@ def load_weight(piece_idx: int) -> torch.Tensor:
                 if (inp_idx, out_idx) not in legal_set:
                     continue
                 W_out = out_weights[out_idx]  # (48, 96)
-                product = W_out @ W_inp      # (48, 48)
-                norm = float(torch.norm(product, p="fro"))
+                product_norm = float(torch.norm(W_out @ W_inp, p="fro"))
+                denom = inp_norms[inp_idx] * out_norms[out_idx]
+                coherence = product_norm / denom if denom > 0 else product_norm
                 rows.append({
                     "InpPieceIndex": inp_idx,
                     "OutPieceIndex": out_idx,
-                    "ProductNorm": norm,
+                    "CoherenceScore": coherence,
                 })
 
     logger.info(f"[compute_pairing_scores] Computed {len(rows)} scores")
-    return pd.DataFrame(rows, columns=["InpPieceIndex", "OutPieceIndex", "ProductNorm"])
+    return pd.DataFrame(rows, columns=["InpPieceIndex", "OutPieceIndex", "CoherenceScore"])
diff --git a/examples/advanced/DroppedNeuralNet/Flows/Exploration/Steps/run_hungarian.py b/examples/advanced/DroppedNeuralNet/Flows/Exploration/Steps/run_hungarian.py
@@ -1,6 +1,11 @@
 """Apply the Hungarian algorithm to the pairing score matrix to find optimal Block pairings.
 
-The 48×48 cost matrix is built from ProductNorm scores (lower = better residual coupling).
+The 48×48 cost matrix is built from CoherenceScore values (lower = better structural alignment).
+Before solving, Sinkhorn normalization is applied to make the matrix doubly stochastic:
+each row and column is iteratively divided by its sum until convergence.  This equalizes
+the per-row and per-column pressure so that a row with a very sharp minimum cannot steal
+an out-piece from a row whose minimum is only weakly discriminated.
+
 scipy.optimize.linear_sum_assignment solves the minimum-cost perfect matching in O(n³).
 """
 import logging
@@ -20,10 +25,10 @@ def run_hungarian(pairing_scores: pd.DataFrame) -> pd.DataFrame:
     """Solve the minimum-cost Block pairing via the Hungarian algorithm.
 
     Args:
-        pairing_scores: DataFrame with [InpPieceIndex, OutPieceIndex, ProductNorm].
+        pairing_scores: DataFrame with [InpPieceIndex, OutPieceIndex, CoherenceScore].
 
     Returns:
-        DataFrame with [BlockIndex, InpPieceIndex, OutPieceIndex, AssignmentScore].
+        DataFrame with [BlockIndex, InpPieceIndex, OutPieceIndex, CoherenceScore].
         BlockIndex is a sequential label (0–47), not the execution order.
     """
     inp_indices = sorted(pairing_scores["InpPieceIndex"].astype(int).unique().tolist())
@@ -42,24 +47,33 @@ def run_hungarian(pairing_scores: pd.DataFrame) -> pd.DataFrame:
     for _, row in pairing_scores.iterrows():
         i = inp_pos[int(row["InpPieceIndex"])]
         j = out_pos[int(row["OutPieceIndex"])]
-        cost_matrix[i, j] = float(row["ProductNorm"])
+        cost_matrix[i, j] = float(row["CoherenceScore"])
+
+    # Sinkhorn normalization: iteratively divide rows then columns by their sums.
+    # ~30 iterations is sufficient for convergence at n=48.
+    # Replaces inf with a large finite value first so row/col sums are never zero.
+    sinkhorn = cost_matrix.copy()
+    sinkhorn[sinkhorn == np.inf] = sinkhorn[sinkhorn < np.inf].max() * 10
+    for _ in range(30):
+        sinkhorn /= sinkhorn.sum(axis=1, keepdims=True)   # normalize rows
+        sinkhorn /= sinkhorn.sum(axis=0, keepdims=True)   # normalize columns
 
-    row_ind, col_ind = linear_sum_assignment(cost_matrix)
+    row_ind, col_ind = linear_sum_assignment(sinkhorn)
 
     rows = []
     for block_idx, (i, j) in enumerate(zip(row_ind, col_ind)):
         rows.append({
             "BlockIndex": block_idx,
             "InpPieceIndex": inp_indices[i],
             "OutPieceIndex": out_indices[j],
-            "AssignmentScore": float(cost_matrix[i, j]),
+            "CoherenceScore": float(cost_matrix[i, j]),  # report original (pre-Sinkhorn) score
         })
 
-    total_cost = sum(r["AssignmentScore"] for r in rows)
+    total_cost = sum(r["CoherenceScore"] for r in rows)
     logger.info(
-        f"[run_hungarian] Assigned {len(rows)} blocks, total cost={total_cost:.4f}, "
+        f"[run_hungarian] Assigned {len(rows)} blocks, total CoherenceScore={total_cost:.4f}, "
         f"mean={total_cost / len(rows):.4f}"
     )
     return pd.DataFrame(
-        rows, columns=["BlockIndex", "InpPieceIndex", "OutPieceIndex", "AssignmentScore"]
+        rows, columns=["BlockIndex", "InpPieceIndex", "OutPieceIndex", "CoherenceScore"]
     )
diff --git a/examples/advanced/DroppedNeuralNet/Flows/Solver/Steps/validate_permutations.py b/examples/advanced/DroppedNeuralNet/Flows/Solver/Steps/validate_permutations.py
@@ -18,6 +18,7 @@
 
 
 class Block(nn.Module):
+    """Mirrors the original Block architecture: Linear(48 → 96) + ReLU + Linear(96 → 48) with a residual connection."""
     def __init__(self, in_dim: int, hidden_dim: int):
         super().__init__()
         self.inp = nn.Linear(in_dim, hidden_dim)
@@ -33,6 +34,7 @@ def forward(self, x):
 
 
 class LastLayer(nn.Module):
+    """Mirrors the original LastLayer architecture: Linear(48 → 1) regression head."""
     def __init__(self, in_dim: int, out_dim: int):
         super().__init__()
         self.layer = nn.Linear(in_dim, out_dim)
@@ -42,6 +44,7 @@ def forward(self, x):
 
 
 def _load_linear(raw_bytes: bytes) -> nn.Linear:
+    """Deserialize raw .pth bytes into a nn.Linear layer with weights and bias."""
     state_dict = torch.load(
         io.BytesIO(raw_bytes),
         weights_only=True,
diff --git a/examples/advanced/DroppedNeuralNet/Flows/Validation/Steps/diagnose_pairings.py b/examples/advanced/DroppedNeuralNet/Flows/Validation/Steps/diagnose_pairings.py
@@ -10,7 +10,7 @@
   small the pairings are correct and only the beam search range needs widening.
 
 Probe 2 — PairingSignal
-  Summarises the distribution of ProductNorm scores that fed into the Hungarian solver.
+  Summarises the distribution of CoherenceScore values that fed into the Hungarian solver.
   A flat distribution (near-zero std) means the cost matrix has no discriminating power
   and the assignment is essentially random.
 
@@ -107,7 +107,7 @@ def diagnose_pairings(
 
     Args:
         block_assignments: Hungarian-optimal (BlockIndex, InpPieceIndex, OutPieceIndex,
-            AssignmentScore) — 48 rows.
+            CoherenceScore) — 48 rows.
         pieces: Raw byte blobs indexed by PieceIndex.
         historical_data: Sensor measurements; the ``pred`` column is the validation target.
         candidate_permutations: Ranked int[97] candidates from rank_orderings.
@@ -167,18 +167,18 @@ def diagnose_pairings(
     print(f"[diagnose_pairings] FixedOrdering max_err={max_err:.6f} ({pairing_verdict})", flush=True)
 
     # ------------------------------------------------------------------
-    # Probe 2 — ProductNorm signal statistics
+    # Probe 2 — CoherenceScore signal statistics
     # ------------------------------------------------------------------
-    logger.info("[diagnose_pairings] Probe 2: ProductNorm score distribution")
+    logger.info("[diagnose_pairings] Probe 2: CoherenceScore distribution")
 
-    scores = block_assignments["AssignmentScore"].astype(float).values
+    scores = block_assignments["CoherenceScore"].astype(float).values
     score_mean  = float(np.mean(scores))
     score_std   = float(np.std(scores))
     score_range = float(np.max(scores) - np.min(scores))
 
-    rows.append({"Category": "PairingSignal", "Metric": "ScoreMean",  "Value": score_mean,  "Notes": "mean ProductNorm of Hungarian-assigned pairs"})
+    rows.append({"Category": "PairingSignal", "Metric": "ScoreMean",  "Value": score_mean,  "Notes": "mean CoherenceScore of Hungarian-assigned pairs"})
     rows.append({"Category": "PairingSignal", "Metric": "ScoreStd",   "Value": score_std,   "Notes": "near-zero => cost matrix is flat => Hungarian is guessing"})
-    rows.append({"Category": "PairingSignal", "Metric": "ScoreRange", "Value": score_range, "Notes": "max - min ProductNorm across 48 assigned pairs"})
+    rows.append({"Category": "PairingSignal", "Metric": "ScoreRange", "Value": score_range, "Notes": "max - min CoherenceScore across 48 assigned pairs"})
 
     logger.info(
         f"[diagnose_pairings] PairingSignal mean={score_mean:.6f} std={score_std:.6f} range={score_range:.6f}"

Original file line number	Diff line number	Diff line change
`@@ -14,5 +14,5 @@ public partial record PairingScore`
`14`	`14`	`public int OutPieceIndex { get; init; }`
`15`	`15`
`16`	`16`	`/// <summary>\|\|W_out @ W_inp\|\|_F. Lower = stronger residual coupling between these two layers.</summary>`
`17`		`- public float ProductNorm { get; init; }`
	`17`	`+ public float CoherenceScore { get; init; }`
`18`	`18`	`}`