
# Multi-Factor Unbinding and Factorization Methods

Topics: Factorization, multi-factor unbinding, composite structures, iterative cleanup
Time: 15 minutes
Prerequisites: 27_cleanup_strategies.py, 24_app_working_memory.py
Related: 23_app_symbolic_reasoning.py

This example demonstrates advanced techniques for factorizing composite
hypervectors - decomposing bundled representations back into their constituent
factors. Mastering factorization is essential for information retrieval from
distributed hyperdimensional representations.

Key concepts:
- Bundle factorization: Decompose bundled items (A ⊕ B ⊕ C → A, B, C)
- Binding factorization: Decompose bound structures (A ⊗ B ⊗ C → A, B, C)
- Iterative unbinding: Sequential factor extraction
- Noise accumulation: How errors compound during factorization
- Practical strategies: When and how to factorize

Factorization enables querying and retrieving information from complex
composite representations built through binding and bundling operations.


In [None]:
import numpy as np
from holovec import VSA
from holovec.utils.cleanup import BruteForceCleanup, ResonatorCleanup

print("=" * 70)
print("Multi-Factor Unbinding and Factorization")
print("=" * 70)
print()

# Create model
model = VSA.create('FHRR', dim=10000, seed=42)

# Create cleanup strategy
cleanup = BruteForceCleanup()

# ============================================================================
# Demo 1: Bundle Factorization - Recovering Bundled Items
# ============================================================================
print("=" * 70)
print("Demo 1: Bundle Factorization")
print("=" * 70)

print("\nScenario: Bundle of 4 items")

# Create codebook
items = {}
for i in range(10):
    items[f"item_{i}"] = model.random(seed=100 + i)

# Bundle 4 specific items
bundled = [items["item_0"], items["item_2"], items["item_5"], items["item_7"]]
bundle = model.bundle(bundled)

print("  Bundle: item_0 ⊕ item_2 ⊕ item_5 ⊕ item_7")

# Factorize to recover all items
print("\n" + "=" * 70)
print("Factorizing bundle:")
print("=" * 70)

labels, sims = cleanup.factorize(bundle, items, model, n_factors=6)

print("\nRecovered factors (top 6):")
for i, (label, sim) in enumerate(zip(labels, sims), 1):
    in_bundle = "✓" if label in ["item_0", "item_2", "item_5", "item_7"] else "✗"
    print(f"  {i}. {label:10s}: {sim:.3f}  [{in_bundle}]")

# Calculate recall
correct_in_top4 = sum(1 for l in labels[:4] if l in ["item_0", "item_2", "item_5", "item_7"])
recall = correct_in_top4 / 4.0

print(f"\nRecall@4: {recall:.2f} ({correct_in_top4}/4 factors recovered)")

print("\nKey observation:")
print("  - Top factors are the original bundled items")
print("  - Similarity degrades but items still identifiable")
print("  - Ideal for 'what's in this bundle?' queries")

# ============================================================================
# Demo 2: Binding Chain Factorization - Sequential Unbinding
# ============================================================================
print("\n" + "=" * 70)
print("Demo 2: Binding Chain Factorization")
print("=" * 70)

print("\nScenario: Chain of bindings (A ⊗ B ⊗ C)")

# Create binding chain: A ⊗ B ⊗ C
A = items["item_0"]
B = items["item_1"]
C = items["item_2"]

chain = model.bind(model.bind(A, B), C)

print("  Chain: item_0 ⊗ item_1 ⊗ item_2")

# Method 1: Sequential unbinding (if you know the order)
print("\n" + "=" * 70)
print("Method 1: Sequential unbinding (knowing order)")
print("=" * 70)

# Unbind C to get (A ⊗ B)
step1 = model.unbind(chain, C)
print("\n  Step 1: Unbind item_2")
label1, sim1 = cleanup.cleanup(step1, {k: model.bind(v, items["item_1"]) for k, v in items.items()}, model)
print(f"    Result ≈ (item_0 ⊗ item_1), found: {label1} (sim={sim1:.3f})")

# Unbind B to get A
step2 = model.unbind(step1, B)
print("\n  Step 2: Unbind item_1")
label2, sim2 = cleanup.cleanup(step2, items, model)
print(f"    Result ≈ item_0, found: {label2} (sim={sim2:.3f})")

print("\nKey observation:")
print("  - Sequential unbinding requires knowing binding order")
print("  - Each unbind step recovers one factor")
print("  - Most reliable when order is known")

# ============================================================================
# Demo 3: Mixed Binding and Bundling - Structured Factorization
# ============================================================================
print("\n" + "=" * 70)
print("Demo 3: Mixed Operations - Structured Factorization")
print("=" * 70)

print("\nScenario: Role-filler structure with multiple bindings")
print("  Structure: (role_A ⊗ filler_1) ⊕ (role_B ⊗ filler_2)")

# Create roles and fillers
role_A = model.random(seed=200)
role_B = model.random(seed=201)
filler_1 = items["item_3"]
filler_2 = items["item_4"]

# Create structure
struct = model.bundle([
    model.bind(role_A, filler_1),
    model.bind(role_B, filler_2)
])

print("\n  role_A ⊗ item_3")
print("  role_B ⊗ item_4")
print("  → bundled together")

# Query by role
print("\n" + "=" * 70)
print("Query: What is bound to role_A?")
print("=" * 70)

result_A = model.unbind(struct, role_A)
label_A, sim_A = cleanup.cleanup(result_A, items, model)

print(f"\n  Unbind role_A: {label_A} (similarity={sim_A:.3f})")
print(f"  Expected: item_3")

# Query by role B
print("\n" + "=" * 70)
print("Query: What is bound to role_B?")
print("=" * 70)

result_B = model.unbind(struct, role_B)
label_B, sim_B = cleanup.cleanup(result_B, items, model)

print(f"\n  Unbind role_B: {label_B} (similarity={sim_B:.3f})")
print(f"  Expected: item_4")

print("\nKey observation:")
print("  - Can query structure by role (dimension)")
print("  - Unbinding isolates specific role-filler pairs")
print("  - Essential pattern for structured retrieval")

# ============================================================================
# Demo 4: Noise Accumulation in Factorization
# ============================================================================
print("\n" + "=" * 70)
print("Demo 4: Noise Accumulation During Factorization")
print("=" * 70)

print("\nTesting: Bundle size vs. factorization accuracy")

# Test different bundle sizes
sizes = [2, 4, 6, 8, 10]

print(f"\n{'Size':>5s} | {'Recall@Top':>12s} | {'Avg Sim':>10s} | {'Correct':>10s}")
print("-" * 45)

for size in sizes:
    # Create bundle of 'size' items
    selected = [items[f"item_{i}"] for i in range(size)]
    test_bundle = model.bundle(selected)

    # Factorize
    labels_test, sims_test = cleanup.factorize(test_bundle, items, model, n_factors=size)

    # Calculate metrics
    expected = {f"item_{i}" for i in range(size)}
    correct_count = sum(1 for l in labels_test[:size] if l in expected)
    recall = correct_count / size
    avg_sim = np.mean(sims_test[:size])

    print(f"{size:>5d} | {recall:>12.2f} | {avg_sim:>10.3f} | {correct_count:>10d}/{size}")

print("\nKey observation:")
print("  - Accuracy decreases with more bundled items")
print("  - Similarities degrade due to interference")
print("  - Practical limit: ~5-7 factors for reliable recovery")
print("  - Mirrors human working memory capacity!")

# ============================================================================
# Demo 5: Practical Application - Query Decomposition
# ============================================================================
print("\n" + "=" * 70)
print("Demo 5: Practical Application - Complex Query")
print("=" * 70)

print("\nScenario: Multi-attribute product search")
print("  Query: color=red AND category=laptop AND price=affordable")

# Define attributes
COLOR = model.random(seed=300)
CATEGORY = model.random(seed=301)
PRICE = model.random(seed=302)

# Define values
red = model.random(seed=400)
laptop = model.random(seed=401)
affordable = model.random(seed=402)

# Create query
query = model.bundle([
    model.bind(COLOR, red),
    model.bind(CATEGORY, laptop),
    model.bind(PRICE, affordable)
])

print("\n  Query HV created (color ⊗ red) ⊕ (category ⊗ laptop) ⊕ (price ⊗ affordable)")

# Decompose query to understand it
print("\n" + "=" * 70)
print("Decomposing query attributes:")
print("=" * 70)

# Create attribute codebook
attributes = {
    "COLOR": COLOR,
    "CATEGORY": CATEGORY,
    "PRICE": PRICE
}

# Factorize to find which attributes are in query
attr_labels, attr_sims = cleanup.factorize(query, attributes, model, n_factors=3)

print("\nQuery contains these attributes:")
for label, sim in zip(attr_labels, attr_sims):
    print(f"  {label:10s}: {sim:.3f}")

# Extract values for each attribute
print("\n" + "=" * 70)
print("Extracting attribute values:")
print("=" * 70)

values = {
    "red": red,
    "blue": model.random(seed=403),
    "laptop": laptop,
    "phone": model.random(seed=404),
    "affordable": affordable,
    "expensive": model.random(seed=405)
}

# Extract color value
color_val = model.unbind(query, COLOR)
color_label, color_sim = cleanup.cleanup(color_val, values, model)
print(f"\n  COLOR value: {color_label} (similarity={color_sim:.3f})")

# Extract category value
category_val = model.unbind(query, CATEGORY)
category_label, category_sim = cleanup.cleanup(category_val, values, model)
print(f"  CATEGORY value: {category_label} (similarity={category_sim:.3f})")

# Extract price value
price_val = model.unbind(query, PRICE)
price_label, price_sim = cleanup.cleanup(price_val, values, model)
print(f"  PRICE value: {price_label} (similarity={price_sim:.3f})")

print("\nKey observation:")
print("  - Can decompose complex queries into attributes + values")
print("  - Enables query understanding and refinement")
print("  - Practical for search engines and databases")

# ============================================================================
# Demo 6: Best Practices for Factorization
# ============================================================================
print("\n" + "=" * 70)
print("Demo 6: Factorization Best Practices")
print("=" * 70)

print("\n✓ DO:")
print("  - Factorize bundles with ≤ 7 items for best results")
print("  - Use cleanup strategies (BruteForce or Resonator)")
print("  - Provide comprehensive codebook for cleanup")
print("  - Check similarity scores to assess confidence")
print("  - Sequential unbinding when order is known")
print()
print("✗ DON'T:")
print("  - Bundle > 10 items if you need to factorize later")
print("  - Expect perfect recovery (always approximate)")
print("  - Unbind without cleanup (results are noisy)")
print("  - Ignore similarity scores (they indicate confidence)")
print("  - Chain too many unbind operations (noise compounds)")
print()
print("Strategies by use case:")
print()
print("  Bundle factorization:")
print("    - Use: factorize() method")
print("    - Returns: top-k most similar items")
print("    - Best for: 'what's in this bundle?' queries")
print()
print("  Binding chain factorization:")
print("    - Use: sequential unbind() + cleanup()")
print("    - Requires: knowing binding order")
print("    - Best for: structured data with known schema")
print()
print("  Mixed operations:")
print("    - Use: unbind() by dimension + cleanup()")
print("    - Pattern: role-filler binding in bundles")
print("    - Best for: attribute-value structures")
print()

# ============================================================================
# Summary
# ============================================================================
print("=" * 70)
print("Summary: Factorization Key Takeaways")
print("=" * 70)
print()
print("✓ Bundle factorization: Decompose A ⊕ B ⊕ C → A, B, C")
print("✓ Binding chains: Sequential unbinding with known order")
print("✓ Mixed structures: Combine unbinding + cleanup")
print("✓ Noise accumulation: Accuracy degrades with complexity")
print("✓ Practical limit: ~5-7 factors for reliable recovery")
print()
print("Core factorization pattern:")
print("  1. Create comprehensive codebook")
print("  2. Call cleanup.factorize(composite, codebook, model, n_factors=k)")
print("  3. Check similarity scores for confidence")
print("  4. Use top-k results as recovered factors")
print()
print("When to factorize:")
print("  - Retrieving bundled items from working memory")
print("  - Decomposing composite queries")
print("  - Understanding structured representations")
print("  - Multi-attribute search and filtering")
print()
print("Complexity considerations:")
print("  - Bundle of 3 items: Easy, high accuracy")
print("  - Bundle of 5-7 items: Moderate, good accuracy")
print("  - Bundle of 10+ items: Hard, degraded accuracy")
print("  - Deep binding chains: Noise compounds exponentially")
print()
print("Next steps:")
print("  → Apply factorization in your domain")
print("  → Combine with 27_cleanup_strategies.py techniques")
print("  → Use in 24_app_working_memory.py patterns")
print()
print("=" * 70)