
# Integration Patterns and Multimodal Fusion

Topics: Multimodal fusion, encoder integration, design patterns, hierarchical encoding
Time: 25 minutes
Prerequisites: 10_encoders_scalar.py, 14_encoders_ngram.py, 23_app_symbolic_reasoning.py
Related: 20-22_app_*.py (domain-specific applications)

This example demonstrates integration patterns for building complex hyperdimensional
computing systems. Learn how to combine multiple encoders, fuse multimodal data,
and structure hierarchical representations for real-world applications.

Key concepts:
- Multimodal fusion: Combining text, images, and numeric data
- Encoder composition: Integrate scalar, sequence, spatial encoders
- Hierarchical encoding: Build layered representations
- Design patterns: Reusable structures for HDC systems
- End-to-end workflow: From raw data to query answering

This capstone example shows how the individual encoders and techniques
work together to build sophisticated AI systems.


In [None]:
import numpy as np
from holovec import VSA
from holovec.encoders import (
    FractionalPowerEncoder,
    NGramEncoder,
    VectorEncoder,
    PositionBindingEncoder,
)
from holovec.retrieval import ItemStore

print("=" * 70)
print("Integration Patterns and Multimodal Fusion")
print("=" * 70)
print()

# Create model
model = VSA.create('FHRR', dim=10000, seed=42)

# ============================================================================
# Pattern 1: Multimodal Fusion - Combining Different Data Types
# ============================================================================
print("=" * 70)
print("Pattern 1: Multimodal Fusion - Product Reviews")
print("=" * 70)

print("\nScenario: Encode product reviews with text + rating + price")

# Setup encoders for different modalities
print("\n  Setting up encoders:")

# Text modality: N-gram encoder
text_encoder = NGramEncoder(model, n=3, seed=42)
print("    - Text: NGramEncoder (3-grams)")

# Rating modality: Scalar encoder (1-5 stars)
rating_encoder = FractionalPowerEncoder(model, min_val=1, max_val=5, bandwidth=0.2, seed=43)
print("    - Rating: FractionalPowerEncoder (1-5 stars)")

# Price modality: Scalar encoder ($0-$1000)
price_encoder = FractionalPowerEncoder(model, min_val=0, max_val=1000, bandwidth=20, seed=44)
print("    - Price: FractionalPowerEncoder ($0-$1000)")

# Define modality dimension vectors
TEXT_DIM = model.random(seed=100)
RATING_DIM = model.random(seed=101)
PRICE_DIM = model.random(seed=102)

print("\n  Modality dimensions: TEXT, RATING, PRICE")

# Encode example product reviews
print("\n" + "=" * 70)
print("Encoding 3 product reviews:")
print("=" * 70)

reviews = [
    {
        "id": "product_A",
        "text": "excellent quality fast shipping",
        "rating": 5.0,
        "price": 49.99
    },
    {
        "id": "product_B",
        "text": "poor quality slow shipping",
        "rating": 2.0,
        "price": 39.99
    },
    {
        "id": "product_C",
        "text": "excellent product fast delivery",
        "rating": 4.5,
        "price": 89.99
    }
]

encoded_reviews = {}

for review in reviews:
    print(f"\n  {review['id']}:")
    print(f"    Text: '{review['text']}'")
    print(f"    Rating: {review['rating']}/5, Price: ${review['price']}")

    # Encode each modality
    text_hv = text_encoder.encode(review["text"])
    rating_hv = rating_encoder.encode(review["rating"])
    price_hv = price_encoder.encode(review["price"])

    # Bind each modality to its dimension vector
    text_bound = model.bind(TEXT_DIM, text_hv)
    rating_bound = model.bind(RATING_DIM, rating_hv)
    price_bound = model.bind(PRICE_DIM, price_hv)

    # Bundle all modalities into single multimodal representation
    multimodal_hv = model.bundle([text_bound, rating_bound, price_bound])
    encoded_reviews[review["id"]] = multimodal_hv

    print(f"    → Multimodal HV shape: {multimodal_hv.shape}")

# Query: Find products similar to "excellent fast shipping, 5 stars, ~$50"
print("\n" + "=" * 70)
print("Query: Products like 'excellent fast', 5 stars, $50")
print("=" * 70)

query_text = "excellent fast"
query_rating = 5.0
query_price = 50.0

# Encode query
query_text_hv = text_encoder.encode(query_text)
query_rating_hv = rating_encoder.encode(query_rating)
query_price_hv = price_encoder.encode(query_price)

# Bind to dimensions
query_text_bound = model.bind(TEXT_DIM, query_text_hv)
query_rating_bound = model.bind(RATING_DIM, query_rating_hv)
query_price_bound = model.bind(PRICE_DIM, query_price_hv)

# Bundle query modalities
query_hv = model.bundle([query_text_bound, query_rating_bound, query_price_bound])

print("\nSimilarity to products:")
for prod_id, prod_hv in encoded_reviews.items():
    sim = float(model.similarity(query_hv, prod_hv))
    print(f"  {prod_id}: {sim:.3f}")

print("\nKey observation:")
print("  - Product A ranks highest (excellent/fast + 5 stars + $49.99)")
print("  - Multimodal fusion combines complementary information")
print("  - Each modality contributes to overall similarity")

# ============================================================================
# Pattern 2: Hierarchical Encoding - Part-Whole Relationships
# ============================================================================
print("\n" + "=" * 70)
print("Pattern 2: Hierarchical Encoding - Document Structure")
print("=" * 70)

print("\nScenario: Encode document with sections and paragraphs")

# Define hierarchy levels
DOCUMENT = model.random(seed=200)
SECTION = model.random(seed=201)
PARAGRAPH = model.random(seed=202)

print("  Hierarchy: DOCUMENT → SECTION → PARAGRAPH")

# Encode document structure
print("\n" + "=" * 70)
print("Document: 'Machine Learning Tutorial'")
print("=" * 70)

# Section 1: Introduction
intro_para1 = "machine learning algorithms process data"
intro_para2 = "supervised learning uses labeled data"

intro_p1_hv = text_encoder.encode(intro_para1)
intro_p2_hv = text_encoder.encode(intro_para2)

intro_section = model.bundle([
    model.bind(PARAGRAPH, intro_p1_hv),
    model.bind(PARAGRAPH, intro_p2_hv)
])

print("\n  Section 1 (Introduction):")
print(f"    Para 1: '{intro_para1}'")
print(f"    Para 2: '{intro_para2}'")

# Section 2: Methods
methods_para1 = "neural networks learn patterns from data"
methods_para2 = "decision trees create classification rules"

methods_p1_hv = text_encoder.encode(methods_para1)
methods_p2_hv = text_encoder.encode(methods_para2)

methods_section = model.bundle([
    model.bind(PARAGRAPH, methods_p1_hv),
    model.bind(PARAGRAPH, methods_p2_hv)
])

print("\n  Section 2 (Methods):")
print(f"    Para 1: '{methods_para1}'")
print(f"    Para 2: '{methods_para2}'")

# Combine sections into document
document_hv = model.bundle([
    model.bind(SECTION, intro_section),
    model.bind(SECTION, methods_section)
])

print("\n  → Document HV (hierarchical encoding)")

# Query: Which section discusses "neural networks"?
print("\n" + "=" * 70)
print("Query: Which section discusses 'neural networks'?")
print("=" * 70)

query_nn = text_encoder.encode("neural networks")

# Check similarity to each section
sim_intro = float(model.similarity(intro_section, model.bind(PARAGRAPH, query_nn)))
sim_methods = float(model.similarity(methods_section, model.bind(PARAGRAPH, query_nn)))

print(f"\n  Intro section:   {sim_intro:.3f}")
print(f"  Methods section: {sim_methods:.3f}  ← Best match!")

print("\nKey observation:")
print("  - Hierarchical structure preserves part-whole relationships")
print("  - Can query at different levels (document, section, paragraph)")
print("  - Enables structured document retrieval")

# ============================================================================
# Pattern 3: Encoder Composition - Building Complex Encoders
# ============================================================================
print("\n" + "=" * 70)
print("Pattern 3: Encoder Composition - Feature Vectors with Labels")
print("=" * 70)

print("\nScenario: Encode labeled feature vectors (supervised learning)")

# Setup: Vector encoder for features + position encoder for class label
feature_dim = 5
feature_encoder = VectorEncoder(
    model,
    FractionalPowerEncoder(model, min_val=0, max_val=1, seed=45),
    n_dimensions=feature_dim,
    seed=46
)

label_encoder = PositionBindingEncoder(model, seed=47)

print(f"  Features: VectorEncoder ({feature_dim}D)")
print("  Labels: PositionBindingEncoder")

# Define label and feature dimensions
FEATURES = model.random(seed=300)
LABEL = model.random(seed=301)

# Encode training examples
print("\n" + "=" * 70)
print("Encoding training examples:")
print("=" * 70)

training_data = [
    {"features": np.array([0.8, 0.9, 0.1, 0.2, 0.1]), "label": ["class_A"]},
    {"features": np.array([0.2, 0.1, 0.9, 0.8, 0.9]), "label": ["class_B"]},
    {"features": np.array([0.7, 0.8, 0.2, 0.3, 0.2]), "label": ["class_A"]},
]

encoded_examples = []

for i, example in enumerate(training_data):
    print(f"\n  Example {i+1}:")
    print(f"    Features: {example['features']}")
    print(f"    Label: {example['label'][0]}")

    # Encode features and label
    features_hv = feature_encoder.encode(example["features"])
    label_hv = label_encoder.encode(example["label"])

    # Bind to dimensions
    features_bound = model.bind(FEATURES, features_hv)
    label_bound = model.bind(LABEL, label_hv)

    # Bundle into example representation
    example_hv = model.bundle([features_bound, label_bound])
    encoded_examples.append(example_hv)

# Create training memory
training_memory = model.bundle(encoded_examples)
print("\n  → Training memory (bundled all examples)")

# Query: Predict label for new features
print("\n" + "=" * 70)
print("Prediction: Classify new features")
print("=" * 70)

test_features = np.array([0.85, 0.90, 0.15, 0.25, 0.15])
print(f"\n  Test features: {test_features}")

# Encode test features
test_features_hv = feature_encoder.encode(test_features)
test_features_bound = model.bind(FEATURES, test_features_hv)

# Query training memory for similar examples
print("\n  Similarity to training examples:")
for i, ex_hv in enumerate(encoded_examples):
    sim = float(model.similarity(test_features_bound, ex_hv))
    label = training_data[i]["label"][0]
    print(f"    Example {i+1} ({label}): {sim:.3f}")

# Extract label from most similar example
query_label = model.unbind(encoded_examples[0], FEATURES)  # Most similar to example 1
sim_a = float(model.similarity(query_label, label_encoder.encode(["class_A"])))
sim_b = float(model.similarity(query_label, label_encoder.encode(["class_B"])))

print(f"\n  Predicted label probabilities:")
print(f"    class_A: {sim_a:.3f}  ← Prediction")
print(f"    class_B: {sim_b:.3f}")

print("\nKey observation:")
print("  - Composition combines multiple encoders")
print("  - Enables flexible, structured data encoding")
print("  - Supports supervised learning paradigms")

# ============================================================================
# Pattern 4: Semantic Binding - Context-Dependent Representations
# ============================================================================
print("\n" + "=" * 70)
print("Pattern 4: Semantic Binding - Word Sense Disambiguation")
print("=" * 70)

print("\nScenario: Encode words with context to disambiguate meaning")

# Setup: Encode word + context
WORD = model.random(seed=400)
CONTEXT = model.random(seed=401)

# Encode "bank" in different contexts
bank = model.random(seed=500)

context1 = text_encoder.encode("river water shore")
context2 = text_encoder.encode("money deposit account")

print("\n  Word: 'bank'")
print("  Context 1: 'river water shore' (riverbank)")
print("  Context 2: 'money deposit account' (financial bank)")

# Create context-dependent representations
bank_river = model.bundle([
    model.bind(WORD, bank),
    model.bind(CONTEXT, context1)
])

bank_financial = model.bundle([
    model.bind(WORD, bank),
    model.bind(CONTEXT, context2)
])

print("\n  → bank_river (riverbank sense)")
print("  → bank_financial (financial sense)")

# Test: Which sense matches new context?
print("\n" + "=" * 70)
print("Test: 'bank' in context 'deposit money account'")
print("=" * 70)

test_context = text_encoder.encode("deposit money account")
test_bank = model.bundle([
    model.bind(WORD, bank),
    model.bind(CONTEXT, test_context)
])

sim_river = float(model.similarity(test_bank, bank_river))
sim_financial = float(model.similarity(test_bank, bank_financial))

print(f"\n  Similarity to riverbank sense:   {sim_river:.3f}")
print(f"  Similarity to financial sense:   {sim_financial:.3f}  ← Best match!")

print("\nKey observation:")
print("  - Context binding creates distinct word senses")
print("  - Disambiguates polysemous words automatically")
print("  - Models compositional semantics")

# ============================================================================
# Pattern 5: End-to-End Application - Multimodal Search Engine
# ============================================================================
print("\n" + "=" * 70)
print("Pattern 5: End-to-End Multimodal Search Engine")
print("=" * 70)

print("\nScenario: Search products by text description + price range")

# Build product database
product_store = ItemStore(model)

products = [
    {"id": "laptop_premium", "desc": "high performance laptop fast processor", "price": 1200},
    {"id": "laptop_budget", "desc": "affordable laptop basic performance", "price": 400},
    {"id": "phone_premium", "desc": "smartphone fast processor excellent camera", "price": 900},
    {"id": "phone_budget", "desc": "basic phone affordable price", "price": 200},
]

print("\n  Building product database:")

for prod in products:
    desc_hv = text_encoder.encode(prod["desc"])
    price_hv = price_encoder.encode(float(prod["price"]))

    prod_hv = model.bundle([
        model.bind(TEXT_DIM, desc_hv),
        model.bind(PRICE_DIM, price_hv)
    ])

    product_store.add(prod["id"], prod_hv)
    print(f"    {prod['id']}: '{prod['desc']}' (${prod['price']})")

# Query: "fast laptop under $500"
print("\n" + "=" * 70)
print("Query: 'fast laptop' under $500")
print("=" * 70)

query_desc = "fast laptop"
query_max_price = 500

query_desc_hv = text_encoder.encode(query_desc)
query_price_hv = price_encoder.encode(float(query_max_price))

query_multimodal = model.bundle([
    model.bind(TEXT_DIM, query_desc_hv),
    model.bind(PRICE_DIM, query_price_hv)
])

# Search product database
results = product_store.query(query_multimodal, k=4)

print("\n  Search results (ranked by relevance):")
for i, (prod_id, sim) in enumerate(results, 1):
    # Find product details
    prod = next(p for p in products if p["id"] == prod_id)
    print(f"    {i}. {prod_id}: {sim:.3f}")
    print(f"       Description: '{prod['desc']}'")
    print(f"       Price: ${prod['price']}")

print("\nKey observation:")
print("  - End-to-end workflow: data → encoding → retrieval")
print("  - Multimodal query combines text and price constraints")
print("  - ItemStore enables efficient similarity search")

# ============================================================================
# Summary
# ============================================================================
print("\n" + "=" * 70)
print("Summary: Integration Patterns Key Takeaways")
print("=" * 70)
print()
print("✓ Multimodal fusion: Combine text, numeric, and other data types")
print("✓ Hierarchical encoding: Build layered, structured representations")
print("✓ Encoder composition: Integrate multiple encoders flexibly")
print("✓ Semantic binding: Context-dependent representations")
print("✓ End-to-end systems: Complete application workflows")
print()
print("Integration pattern recipes:")
print()
print("1. Multimodal Fusion:")
print("   - Encode each modality separately")
print("   - Bind each to unique dimension vector")
print("   - Bundle all modalities into single HV")
print("   - Query: bundle query modalities same way")
print()
print("2. Hierarchical Encoding:")
print("   - Define level dimension vectors (document, section, paragraph)")
print("   - Encode bottom-up: leaf → intermediate → root")
print("   - Bind each level's content to its dimension")
print("   - Bundle across levels for complete structure")
print()
print("3. Encoder Composition:")
print("   - Combine encoders for complex data (features + labels)")
print("   - Use consistent dimension binding pattern")
print("   - Enable flexible, reusable encoding pipelines")
print()
print("4. Context Binding:")
print("   - Bind primary concept to WORD dimension")
print("   - Bind context to CONTEXT dimension")
print("   - Bundle for context-dependent representation")
print("   - Enables disambiguation and semantic composition")
print()
print("Design principles:")
print("  - Modularity: Combine encoders like building blocks")
print("  - Consistency: Use same bind/bundle patterns")
print("  - Flexibility: Adapt patterns to your domain")
print("  - Scalability: Patterns work for any data scale")
print()
print("Applications:")
print("  - Multimodal search: Text + image + metadata")
print("  - Document understanding: Hierarchical structure")
print("  - Recommendation systems: User preferences + item features")
print("  - Semantic search: Context-aware retrieval")
print()
print("Next steps:")
print("  → Apply patterns to your domain")
print("  → Combine with cleanup strategies (27, 28)")
print("  → Build domain-specific applications (20-22)")
print()
print("=" * 70)