
# Multivariate Vector Encoding

Topics: VectorEncoder, scalar encoder composition, high-dimensional data
Time: 15 minutes
Prerequisites: 00_quickstart.py, 10_encoders_scalar.py
Related: 17_encoders_image.py, 21_app_image_recognition.py

This example demonstrates the VectorEncoder, which encodes multivariate vectors
by binding each dimension with its scalar value. This creates distributed
representations that preserve similarity between vectors with similar values.

Key concepts:
- Dimension binding: bind(dim_i, scalar_encode(value_i))
- Scalar composition: works with any scalar encoder (FPE, Thermometer, Level)
- High-dimensional: scales to 64D, 784D, or higher
- Reversible: can decode to approximate original values

The VectorEncoder is fundamental for feature vectors, embeddings, sensor data,
and flattened images (MNIST-style pixel arrays).


In [None]:
import numpy as np
from holovec import VSA
from holovec.encoders import (
    VectorEncoder,
    FractionalPowerEncoder,
    ThermometerEncoder,
    LevelEncoder,
)

print("=" * 70)
print("Multivariate Vector Encoding")
print("=" * 70)
print()

# ============================================================================
# Demo 1: Basic VectorEncoder Usage
# ============================================================================
print("=" * 70)
print("Demo 1: Basic VectorEncoder Usage")
print("=" * 70)

# Create model and scalar encoder
model = VSA.create('FHRR', dim=5000, seed=42)
scalar_enc = FractionalPowerEncoder(model, min_val=0, max_val=10, seed=42)

# Create vector encoder for 5D vectors
encoder = VectorEncoder(
    model,
    scalar_encoder=scalar_enc,
    n_dimensions=5,
    seed=42
)

print(f"\nEncoder: {encoder}")
print(f"Reversible: {encoder.is_reversible}")
print(f"Input type: {encoder.input_type}")

# Encode some vectors
vectors = {
    "v1": np.array([1.0, 2.0, 3.0, 4.0, 5.0]),
    "v2": np.array([1.1, 2.1, 3.1, 4.1, 5.1]),  # Close to v1
    "v3": np.array([5.0, 4.0, 3.0, 2.0, 1.0]),  # Reversed
    "v4": np.array([8.0, 9.0, 7.0, 6.0, 10.0]), # Different
}

print("\nEncoding vectors:")
encoded = {}
for name, vec in vectors.items():
    hv = encoder.encode(vec)
    encoded[name] = hv
    print(f"  {name}: {vec} → HV shape: {hv.shape}")

# Similarity matrix
print("\nSimilarity Matrix:")
names = list(vectors.keys())
print("     ", "  ".join(f"{n:>6}" for n in names))
for i, name1 in enumerate(names):
    similarities = [
        float(model.similarity(encoded[name1], encoded[name2]))
        for name2 in names
    ]
    print(f"{name1:>4}", "  ".join(f"{s:6.3f}" for s in similarities))

# Test decoding
print("\nDecoding test:")
for name in ['v1', 'v2']:
    original = vectors[name]
    decoded = encoder.decode(encoded[name])
    error = np.abs(original - decoded)
    print(f"\n  {name} (original): {original}")
    print(f"  {name} (decoded):  {decoded}")
    print(f"  Error:            {error}")
    print(f"  Max error:        {np.max(error):.3f}")

print("\nKey observations:")
print("  - Similar vectors have high similarity")
print("  - Different vectors are distinguishable")
print("  - Decoding recovers approximate values")

# ============================================================================
# Demo 2: Scalar Encoder Composition
# ============================================================================
print("\n" + "=" * 70)
print("Demo 2: Scalar Encoder Composition")
print("=" * 70)

# Test vector
test_vector = np.array([2.5, 5.0, 7.5])

# Create encoders with different scalar encoders
encoders = {
    "FractionalPower": VectorEncoder(
        model,
        FractionalPowerEncoder(model, 0, 10, seed=42),
        n_dimensions=3,
        seed=42
    ),
    "Thermometer": VectorEncoder(
        model,
        ThermometerEncoder(model, 0, 10, n_bins=20),
        n_dimensions=3,
        seed=42
    ),
    "Level": VectorEncoder(
        model,
        LevelEncoder(model, 0, 10, n_levels=11),
        n_dimensions=3,
        seed=42
    ),
}

print(f"\nTest vector: {test_vector}")
print("\nScalar Encoder      | Reversible | HV Shape    | Can Decode")
print("-" * 65)

encoded_comp = {}
for name, enc in encoders.items():
    hv = enc.encode(test_vector)
    encoded_comp[name] = hv
    reversible = "Yes" if enc.is_reversible else "No "
    can_decode = "Yes" if enc.is_reversible else "No "
    print(f"{name:18} | {reversible:>10} | {hv.shape} | {can_decode}")

# Compare similarities
print("\nCross-encoder similarities:")
names = list(encoders.keys())
print("                   ", "  ".join(f"{n:>15}" for n in names))
for i, name1 in enumerate(names):
    similarities = [
        float(model.similarity(encoded_comp[name1], encoded_comp[name2]))
        for name2 in names
    ]
    print(f"{name1:18}", "  ".join(f"{s:15.3f}" for s in similarities))

# Decode where possible
print("\nDecoding (where supported):")
for name, enc in encoders.items():
    if enc.is_reversible:
        decoded = enc.decode(encoded_comp[name])
        error = np.abs(test_vector - decoded)
        print(f"\n  {name}:")
        print(f"    Original: {test_vector}")
        print(f"    Decoded:  {decoded}")
        print(f"    Max err:  {np.max(error):.3f}")
    else:
        print(f"\n  {name}: Not reversible (skipped)")

print("\nKey observations:")
print("  - VectorEncoder works with any scalar encoder")
print("  - Different scalar encoders give different properties")
print("  - Composition enables flexible encoding strategies")

# ============================================================================
# Demo 3: High-Dimensional Data (MNIST-style Images)
# ============================================================================
print("\n" + "=" * 70)
print("Demo 3: High-Dimensional Data (MNIST-style)")
print("=" * 70)

# Simulate small grayscale images (8x8 like mini-MNIST)
model_hd = VSA.create('FHRR', dim=10000, seed=42)

# Create encoder for 64-dimensional vectors (8x8 images)
scalar_enc_hd = FractionalPowerEncoder(model_hd, min_val=0, max_val=255, seed=42)
encoder_hd = VectorEncoder(
    model_hd,
    scalar_encoder=scalar_enc_hd,
    n_dimensions=64,  # 8x8 flattened
    normalize_input=False,
    seed=42
)

print(f"\nEncoder: {encoder_hd}")
print(f"Image size: 8x8 = 64 pixels")
print(f"Pixel range: 0-255 (grayscale)")

# Create synthetic "images"
np.random.seed(42)
images = {
    "digit_1": np.random.randint(0, 50, 64),    # Dark image
    "digit_1b": np.random.randint(0, 50, 64) + np.random.randint(-5, 5, 64),  # Similar
    "digit_7": np.random.randint(100, 255, 64),  # Bright image
    "noise": np.random.randint(0, 255, 64),      # Random
}

print("\nEncoding images:")
encoded_imgs = {}
for name, img in images.items():
    hv = encoder_hd.encode(img.astype(float))
    encoded_imgs[name] = hv
    print(f"  {name:10s}: mean={np.mean(img):6.1f}, std={np.std(img):5.1f} "
          f"→ HV shape: {hv.shape}")

# Similarity matrix
print("\nSimilarity Matrix:")
names = list(images.keys())
print("           ", "  ".join(f"{n:>10}" for n in names))
for i, name1 in enumerate(names):
    similarities = [
        float(model_hd.similarity(encoded_imgs[name1], encoded_imgs[name2]))
        for name2 in names
    ]
    print(f"{name1:10}", "  ".join(f"{s:10.3f}" for s in similarities))

# Test reconstruction
print("\nReconstruction test (first image):")
original = images["digit_1"].astype(float)
decoded = encoder_hd.decode(encoded_imgs["digit_1"])

# Reshape for display (8x8)
orig_grid = original.reshape(8, 8)
dec_grid = decoded.reshape(8, 8)

print("\nOriginal image (8x8):")
print(orig_grid.astype(int))

print("\nDecoded image (8x8):")
print(dec_grid.astype(int))

rmse = np.sqrt(np.mean((original - decoded) ** 2))
print(f"\nReconstruction RMSE: {rmse:.2f}")
print(f"Pixel correlation: {np.corrcoef(original, decoded)[0, 1]:.3f}")

print("\nKey observations:")
print("  - VectorEncoder scales to high-dimensional data (64D, 784D, etc.)")
print("  - Similar images have higher similarity")
print("  - Approximate reconstruction preserves main features")
print("  - Ready for classification via similarity search")

# ============================================================================
# Summary
# ============================================================================
print("\n" + "=" * 70)
print("Summary: VectorEncoder Key Takeaways")
print("=" * 70)
print()
print("✓ Multivariate: Encodes vectors by binding dimensions with values")
print("✓ Compositional: Works with any scalar encoder (FPE, Thermometer, Level)")
print("✓ Scalable: Handles 5D, 64D, 784D, or higher dimensions")
print("✓ Reversible: Decodes to approximate original values (with FPE)")
print("✓ Similarity-preserving: Similar vectors → similar hypervectors")
print()
print("Use cases:")
print("  - Feature vectors: ML model outputs, embeddings")
print("  - Sensor data: Multiple sensors, time steps")
print("  - Flattened images: MNIST (784D), CIFAR (3072D)")
print("  - Scientific data: Multivariate measurements")
print()
print("Next steps:")
print("  → 17_encoders_image.py - 2D spatial structure encoding")
print("  → 21_app_image_recognition.py - Apply to real image data")
print("  → 25_app_integration_patterns.py - Combine with other encoders")
print()
print("=" * 70)