Skip to content

Commit 3231e33

Browse files
committed
fix: end hungarian attempt
1 parent bfa64e0 commit 3231e33

6 files changed

Lines changed: 60 additions & 28 deletions

File tree

examples/advanced/DroppedNeuralNet/Data/_04_Analysis/Schemas/BlockAssignment.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ namespace DroppedNeuralNet.Data._04_Analysis.Schemas;
44

55
/// <summary>
66
/// A single Block pairing selected by the Hungarian algorithm: the globally optimal
7-
/// assignment of inp pieces to out pieces under the minimum total ProductNorm objective.
7+
/// assignment of inp pieces to out pieces under the minimum total CoherenceScore objective.
88
/// </summary>
99
[FlowthruSchema]
1010
public partial record BlockAssignment
@@ -15,6 +15,6 @@ public partial record BlockAssignment
1515
public int InpPieceIndex { get; init; }
1616
public int OutPieceIndex { get; init; }
1717

18-
/// <summary>ProductNorm score for this pairing — the cost the solver minimized.</summary>
19-
public float AssignmentScore { get; init; }
18+
/// <summary>Sinkhorn-normalized coherence score for this pairing — the cost the solver minimized.</summary>
19+
public float CoherenceScore { get; init; }
2020
}

examples/advanced/DroppedNeuralNet/Data/_04_Analysis/Schemas/PairingScore.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,5 +14,5 @@ public partial record PairingScore
1414
public int OutPieceIndex { get; init; }
1515

1616
/// <summary>||W_out @ W_inp||_F. Lower = stronger residual coupling between these two layers.</summary>
17-
public float ProductNorm { get; init; }
17+
public float CoherenceScore { get; init; }
1818
}

examples/advanced/DroppedNeuralNet/Flows/Exploration/Steps/compute_pairing_scores.py

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,17 @@
1-
"""Compute ||W_out @ W_inp||_F for every legal (inp, out) Block pairing.
1+
"""Compute a normalized coherence score for every legal (inp, out) Block pairing.
22
3-
Lower values indicate that the out layer approximates the inverse of the inp layer —
4-
the residual coupling signal left in the weights by the training objective.
3+
The score measures structural alignment between the two weight matrices, independent of
4+
their individual magnitudes:
5+
6+
CoherenceScore(inp, out) = ||W_out @ W_inp||_F / (||W_out||_F * ||W_inp||_F)
7+
8+
This is the Frobenius norm of the product normalized by the product of the individual
9+
norms — analogous to cosine similarity but in matrix space. A low score means
10+
W_out approximately inverts W_inp (the residual coupling signal left by training).
11+
12+
Using the raw ||W_out @ W_inp||_F directly contaminates the cost matrix with weight
13+
magnitude: a piece whose weights happen to be 2× larger will score high in every row
14+
regardless of structural compatibility, drowning the signal the Hungarian solver needs.
515
"""
616
import io
717
import logging
@@ -29,7 +39,7 @@ def compute_pairing_scores(
2939
legal_pairings: Dimension-valid (InpPieceIndex, OutPieceIndex) candidates.
3040
3141
Returns:
32-
DataFrame with [InpPieceIndex, OutPieceIndex, ProductNorm].
42+
DataFrame with [InpPieceIndex, OutPieceIndex, CoherenceScore].
3343
"""
3444
blob_by_index: dict[int, bytes] = {
3545
int(r["PieceIndex"]): r["Data"] for _, r in pieces.iterrows()
@@ -71,6 +81,10 @@ def load_weight(piece_idx: int) -> torch.Tensor:
7181
inp_weights = {idx: load_weight(idx) for idx in inp_indices} # each (96, 48)
7282
out_weights = {idx: load_weight(idx) for idx in out_indices} # each (48, 96)
7383

84+
# Pre-compute per-piece Frobenius norms for the denominator
85+
inp_norms = {idx: float(torch.norm(w, p="fro")) for idx, w in inp_weights.items()}
86+
out_norms = {idx: float(torch.norm(w, p="fro")) for idx, w in out_weights.items()}
87+
7488
rows = []
7589
with torch.no_grad():
7690
for inp_idx in inp_indices:
@@ -79,13 +93,14 @@ def load_weight(piece_idx: int) -> torch.Tensor:
7993
if (inp_idx, out_idx) not in legal_set:
8094
continue
8195
W_out = out_weights[out_idx] # (48, 96)
82-
product = W_out @ W_inp # (48, 48)
83-
norm = float(torch.norm(product, p="fro"))
96+
product_norm = float(torch.norm(W_out @ W_inp, p="fro"))
97+
denom = inp_norms[inp_idx] * out_norms[out_idx]
98+
coherence = product_norm / denom if denom > 0 else product_norm
8499
rows.append({
85100
"InpPieceIndex": inp_idx,
86101
"OutPieceIndex": out_idx,
87-
"ProductNorm": norm,
102+
"CoherenceScore": coherence,
88103
})
89104

90105
logger.info(f"[compute_pairing_scores] Computed {len(rows)} scores")
91-
return pd.DataFrame(rows, columns=["InpPieceIndex", "OutPieceIndex", "ProductNorm"])
106+
return pd.DataFrame(rows, columns=["InpPieceIndex", "OutPieceIndex", "CoherenceScore"])

examples/advanced/DroppedNeuralNet/Flows/Exploration/Steps/run_hungarian.py

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
"""Apply the Hungarian algorithm to the pairing score matrix to find optimal Block pairings.
22
3-
The 48×48 cost matrix is built from ProductNorm scores (lower = better residual coupling).
3+
The 48×48 cost matrix is built from CoherenceScore values (lower = better structural alignment).
4+
Before solving, Sinkhorn normalization is applied to make the matrix doubly stochastic:
5+
each row and column is iteratively divided by its sum until convergence. This equalizes
6+
the per-row and per-column pressure so that a row with a very sharp minimum cannot steal
7+
an out-piece from a row whose minimum is only weakly discriminated.
8+
49
scipy.optimize.linear_sum_assignment solves the minimum-cost perfect matching in O(n³).
510
"""
611
import logging
@@ -20,10 +25,10 @@ def run_hungarian(pairing_scores: pd.DataFrame) -> pd.DataFrame:
2025
"""Solve the minimum-cost Block pairing via the Hungarian algorithm.
2126
2227
Args:
23-
pairing_scores: DataFrame with [InpPieceIndex, OutPieceIndex, ProductNorm].
28+
pairing_scores: DataFrame with [InpPieceIndex, OutPieceIndex, CoherenceScore].
2429
2530
Returns:
26-
DataFrame with [BlockIndex, InpPieceIndex, OutPieceIndex, AssignmentScore].
31+
DataFrame with [BlockIndex, InpPieceIndex, OutPieceIndex, CoherenceScore].
2732
BlockIndex is a sequential label (0–47), not the execution order.
2833
"""
2934
inp_indices = sorted(pairing_scores["InpPieceIndex"].astype(int).unique().tolist())
@@ -42,24 +47,33 @@ def run_hungarian(pairing_scores: pd.DataFrame) -> pd.DataFrame:
4247
for _, row in pairing_scores.iterrows():
4348
i = inp_pos[int(row["InpPieceIndex"])]
4449
j = out_pos[int(row["OutPieceIndex"])]
45-
cost_matrix[i, j] = float(row["ProductNorm"])
50+
cost_matrix[i, j] = float(row["CoherenceScore"])
51+
52+
# Sinkhorn normalization: iteratively divide rows then columns by their sums.
53+
# ~30 iterations is sufficient for convergence at n=48.
54+
# Replaces inf with a large finite value first so row/col sums are never zero.
55+
sinkhorn = cost_matrix.copy()
56+
sinkhorn[sinkhorn == np.inf] = sinkhorn[sinkhorn < np.inf].max() * 10
57+
for _ in range(30):
58+
sinkhorn /= sinkhorn.sum(axis=1, keepdims=True) # normalize rows
59+
sinkhorn /= sinkhorn.sum(axis=0, keepdims=True) # normalize columns
4660

47-
row_ind, col_ind = linear_sum_assignment(cost_matrix)
61+
row_ind, col_ind = linear_sum_assignment(sinkhorn)
4862

4963
rows = []
5064
for block_idx, (i, j) in enumerate(zip(row_ind, col_ind)):
5165
rows.append({
5266
"BlockIndex": block_idx,
5367
"InpPieceIndex": inp_indices[i],
5468
"OutPieceIndex": out_indices[j],
55-
"AssignmentScore": float(cost_matrix[i, j]),
69+
"CoherenceScore": float(cost_matrix[i, j]), # report original (pre-Sinkhorn) score
5670
})
5771

58-
total_cost = sum(r["AssignmentScore"] for r in rows)
72+
total_cost = sum(r["CoherenceScore"] for r in rows)
5973
logger.info(
60-
f"[run_hungarian] Assigned {len(rows)} blocks, total cost={total_cost:.4f}, "
74+
f"[run_hungarian] Assigned {len(rows)} blocks, total CoherenceScore={total_cost:.4f}, "
6175
f"mean={total_cost / len(rows):.4f}"
6276
)
6377
return pd.DataFrame(
64-
rows, columns=["BlockIndex", "InpPieceIndex", "OutPieceIndex", "AssignmentScore"]
78+
rows, columns=["BlockIndex", "InpPieceIndex", "OutPieceIndex", "CoherenceScore"]
6579
)

examples/advanced/DroppedNeuralNet/Flows/Solver/Steps/validate_permutations.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919

2020
class Block(nn.Module):
21+
"""Mirrors the original Block architecture: Linear(48 → 96) + ReLU + Linear(96 → 48) with a residual connection."""
2122
def __init__(self, in_dim: int, hidden_dim: int):
2223
super().__init__()
2324
self.inp = nn.Linear(in_dim, hidden_dim)
@@ -33,6 +34,7 @@ def forward(self, x):
3334

3435

3536
class LastLayer(nn.Module):
37+
"""Mirrors the original LastLayer architecture: Linear(48 → 1) regression head."""
3638
def __init__(self, in_dim: int, out_dim: int):
3739
super().__init__()
3840
self.layer = nn.Linear(in_dim, out_dim)
@@ -42,6 +44,7 @@ def forward(self, x):
4244

4345

4446
def _load_linear(raw_bytes: bytes) -> nn.Linear:
47+
"""Deserialize raw .pth bytes into a nn.Linear layer with weights and bias."""
4548
state_dict = torch.load(
4649
io.BytesIO(raw_bytes),
4750
weights_only=True,

examples/advanced/DroppedNeuralNet/Flows/Validation/Steps/diagnose_pairings.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
small the pairings are correct and only the beam search range needs widening.
1111
1212
Probe 2 — PairingSignal
13-
Summarises the distribution of ProductNorm scores that fed into the Hungarian solver.
13+
Summarises the distribution of CoherenceScore values that fed into the Hungarian solver.
1414
A flat distribution (near-zero std) means the cost matrix has no discriminating power
1515
and the assignment is essentially random.
1616
@@ -107,7 +107,7 @@ def diagnose_pairings(
107107
108108
Args:
109109
block_assignments: Hungarian-optimal (BlockIndex, InpPieceIndex, OutPieceIndex,
110-
AssignmentScore) — 48 rows.
110+
CoherenceScore) — 48 rows.
111111
pieces: Raw byte blobs indexed by PieceIndex.
112112
historical_data: Sensor measurements; the ``pred`` column is the validation target.
113113
candidate_permutations: Ranked int[97] candidates from rank_orderings.
@@ -167,18 +167,18 @@ def diagnose_pairings(
167167
print(f"[diagnose_pairings] FixedOrdering max_err={max_err:.6f} ({pairing_verdict})", flush=True)
168168

169169
# ------------------------------------------------------------------
170-
# Probe 2 — ProductNorm signal statistics
170+
# Probe 2 — CoherenceScore signal statistics
171171
# ------------------------------------------------------------------
172-
logger.info("[diagnose_pairings] Probe 2: ProductNorm score distribution")
172+
logger.info("[diagnose_pairings] Probe 2: CoherenceScore distribution")
173173

174-
scores = block_assignments["AssignmentScore"].astype(float).values
174+
scores = block_assignments["CoherenceScore"].astype(float).values
175175
score_mean = float(np.mean(scores))
176176
score_std = float(np.std(scores))
177177
score_range = float(np.max(scores) - np.min(scores))
178178

179-
rows.append({"Category": "PairingSignal", "Metric": "ScoreMean", "Value": score_mean, "Notes": "mean ProductNorm of Hungarian-assigned pairs"})
179+
rows.append({"Category": "PairingSignal", "Metric": "ScoreMean", "Value": score_mean, "Notes": "mean CoherenceScore of Hungarian-assigned pairs"})
180180
rows.append({"Category": "PairingSignal", "Metric": "ScoreStd", "Value": score_std, "Notes": "near-zero => cost matrix is flat => Hungarian is guessing"})
181-
rows.append({"Category": "PairingSignal", "Metric": "ScoreRange", "Value": score_range, "Notes": "max - min ProductNorm across 48 assigned pairs"})
181+
rows.append({"Category": "PairingSignal", "Metric": "ScoreRange", "Value": score_range, "Notes": "max - min CoherenceScore across 48 assigned pairs"})
182182

183183
logger.info(
184184
f"[diagnose_pairings] PairingSignal mean={score_mean:.6f} std={score_std:.6f} range={score_range:.6f}"

0 commit comments

Comments
 (0)