# SGS AI Core - HllSet Wrapper Testing

This notebook tests the Python wrapper for Julia HllSets with BSS metrics.

In [1]:
import os
import sys

# Set the HLLSETS_PATH environment variable
hllsets_path = os.path.join(os.getcwd(), "core", "HllSets", "src", "HllSets.jl")
os.environ["HLLSETS_PATH"] = hllsets_path

print(f"HLLSETS_PATH set to: {hllsets_path}")
print(f"File exists: {os.path.exists(hllsets_path)}")

HLLSETS_PATH set to: /home/alexmy/SGS/tao-te-ching/core/HllSets/src/HllSets.jl
File exists: True


In [2]:
# Import the HllSet wrapper
from core.hllset_wrapper import HllSet, BSSMetrics

## Test 1: Basic HllSet Creation and Operations

In [3]:
# Create two HllSets with default BSS metrics
hll_a = HllSet(P=10, tau=0.7, rho=0.21, seed=42)
hll_b = HllSet(P=10, tau=0.8, rho=0.15, seed=42)

print(f"HllSet A: {hll_a}")
print(f"HllSet B: {hll_b}")

HllSet A: HllSet(P=10, count=1, tau=0.700, rho=0.210)
HllSet B: HllSet(P=10, count=1, tau=0.800, rho=0.150)


## Test 2: Adding Elements

In [None]:
# Add elements to HllSet A
elements_a = [f"element_{i}" for i in range(1000)]
hll_a.add_batch(elements_a)

# Add elements to HllSet B (with some overlap)
elements_b = [f"element_{i}" for i in range(500, 1500)]
hll_b.add_batch(elements_b)

print(f"\nAfter adding elements:")
print(f"HllSet A count: {hll_a.count():.0f}")
print(f"HllSet B count: {hll_b.count():.0f}")

## Test 3: BSS Metrics Calculation

In [None]:
# Calculate BSS metrics from A to B
metrics_ab = hll_a.calculate_bss_to(hll_b)
print(f"\nBSS Metrics (A → B):")
print(f"  BSS_τ (Coverage): {metrics_ab.tau:.3f}")
print(f"  BSS_ρ (Exclusion): {metrics_ab.rho:.3f}")

# Calculate BSS metrics from B to A
metrics_ba = hll_b.calculate_bss_to(hll_a)
print(f"\nBSS Metrics (B → A):")
print(f"  BSS_τ (Coverage): {metrics_ba.tau:.3f}")
print(f"  BSS_ρ (Exclusion): {metrics_ba.rho:.3f}")

## Test 4: Union Operation

In [None]:
# Perform union
hll_union = hll_a.union(hll_b)

print(f"\nUnion Operation:")
print(f"  A count: {hll_a.count():.0f}")
print(f"  B count: {hll_b.count():.0f}")
print(f"  Union count: {hll_union.count():.0f}")
print(f"\nUnion BSS Metrics:")
print(f"  tau = min({hll_a.tau:.3f}, {hll_b.tau:.3f}) = {hll_union.tau:.3f}")
print(f"  rho = max({hll_a.rho:.3f}, {hll_b.rho:.3f}) = {hll_union.rho:.3f}")

## Test 5: Intersection Operation

In [None]:
# Perform intersection
hll_intersect = hll_a.intersection(hll_b)

print(f"\nIntersection Operation:")
print(f"  A count: {hll_a.count():.0f}")
print(f"  B count: {hll_b.count():.0f}")
print(f"  Intersection count: {hll_intersect.count():.0f}")
print(f"\nIntersection BSS Metrics:")
print(f"  tau = min({hll_a.tau:.3f}, {hll_b.tau:.3f}) = {hll_intersect.tau:.3f}")
print(f"  rho = max({hll_a.rho:.3f}, {hll_b.rho:.3f}) = {hll_intersect.rho:.3f}")

## Test 6: Difference Operation

In [None]:
# Perform difference
deleted, retained, new = hll_a.difference(hll_b)

print(f"\nDifference Operation (A \\ B):")
print(f"  Deleted count: {deleted.count():.0f} (elements in A but not in B)")
print(f"  Retained count: {retained.count():.0f} (elements in both A and B)")
print(f"  New count: {new.count():.0f} (elements in B but not in A)")

print(f"\nDifference BSS Metrics (all same):")
print(f"  tau = min({hll_a.tau:.3f}, {hll_b.tau:.3f}) = {deleted.tau:.3f}")
print(f"  rho = max({hll_a.rho:.3f}, {hll_b.rho:.3f}) = {deleted.rho:.3f}")

## Test 7: HllSet ID and Equality

In [None]:
# Get HllSet IDs
print(f"\nHllSet IDs:")
print(f"  A ID: {hll_a.id()}")
print(f"  B ID: {hll_b.id()}")

# Test equality
hll_c = HllSet(P=10, tau=0.7, rho=0.21, seed=42)
hll_c.add_batch(elements_a)

print(f"\nEquality Tests:")
print(f"  A == B: {hll_a == hll_b}")
print(f"  A == C (same elements): {hll_a == hll_c}")

## Test 8: Verification of BSS Formulas

In [None]:
# Verify BSS formulas
count_a = hll_a.count()
count_b = hll_b.count()
count_intersect = hll_intersect.count()
count_deleted = deleted.count()

print(f"\nBSS Formula Verification:")
print(f"\nBSS_τ(A→B) = |A∩B| / |B|")
print(f"  Calculated: {metrics_ab.tau:.3f}")
print(f"  Expected: {count_intersect / count_b:.3f}")

print(f"\nBSS_ρ(A→B) = |A∖B| / |B|")
print(f"  Calculated: {metrics_ab.rho:.3f}")
print(f"  Expected: {count_deleted / count_b:.3f}")

# Verify tau + rho relationship
print(f"\nVerify: BSS_τ + BSS_ρ should account for coverage and exclusion")
print(f"  BSS_τ + BSS_ρ = {metrics_ab.tau + metrics_ab.rho:.3f}")

## Test 9: Multiple Set Operations

In [None]:
# Create a third set
hll_d = HllSet(P=10, tau=0.6, rho=0.3, seed=42)
elements_d = [f"element_{i}" for i in range(250, 750)]
hll_d.add_batch(elements_d)

print(f"HllSet D: {hll_d}")

# Chain operations: (A ∪ B) ∩ D
result = hll_union.intersection(hll_d)

print(f"\nChained Operation: (A ∪ B) ∩ D")
print(f"  Result: {result}")
print(f"  tau = min({hll_union.tau:.3f}, {hll_d.tau:.3f}) = {result.tau:.3f}")
print(f"  rho = max({hll_union.rho:.3f}, {hll_d.rho:.3f}) = {result.rho:.3f}")

## Summary

This notebook demonstrates:
- Creating HllSets with custom BSS metrics (tau, rho)
- Adding elements individually and in batches
- Calculating BSS_τ (coverage) and BSS_ρ (exclusion) metrics
- Set operations (union, intersection, difference) with metric propagation
- Verification of BSS formulas
- Chained operations maintaining BSS metrics