In [2]:
import redis
import hashlib

# Connect to Redis
r = redis.Redis(host='localhost', port=6379, db=0)

In [3]:
def serialize_hllset(hllset):
    """Serialize HllSet (list of 64-bit integers) into a byte array."""
    return b''.join([x.to_bytes(8, 'big') for x in hllset])

def deserialize_hllset(serialized_content):
    """Deserialize byte array into HllSet (list of 64-bit integers)."""
    return [int.from_bytes(serialized_content[i:i+8], 'big') for i in range(0, len(serialized_content), 8)]

def generate_hllset_id(hllset):
    """Generate a SHA-256 hash of the HllSet content as its ID."""
    serialized_content = serialize_hllset(hllset)
    sha256_hash = hashlib.sha256(serialized_content).hexdigest()
    return sha256_hash

# Example usage
hllset = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]  # Example HllSet content
hllset_id = generate_hllset_id(hllset)
print(f"HllSet ID: {hllset_id}")

HllSet ID: 84141aab906fb1954aa2019023255258b2e388f467c7bc47294667aaa749cd00


In [4]:
def store_hllset_redis(hllset):
    """Store HllSet in Redis using its SHA hash as the key."""
    hllset_id = generate_hllset_id(hllset)
    serialized_content = serialize_hllset(hllset)
    r.set(hllset_id, serialized_content)
    return hllset_id

def retrieve_hllset_redis(hllset_id):
    """Retrieve HllSet from Redis using its SHA hash as the key."""
    serialized_content = r.get(hllset_id)
    if serialized_content:
        return deserialize_hllset(serialized_content)
    return None

# Example usage
hllset = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]  # Example HllSet content
hllset_id = store_hllset_redis(hllset)
print(f"Stored HllSet with ID: {hllset_id}")

retrieved_hllset = retrieve_hllset_redis(hllset_id)
print(f"Retrieved HllSet: {retrieved_hllset}")

Stored HllSet with ID: 84141aab906fb1954aa2019023255258b2e388f467c7bc47294667aaa749cd00
Retrieved HllSet: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


In [5]:
def add_token_to_hllset_redis(hllset_key, token):
    """
    Add a single token to an HllSet stored in Redis and update its ID atomically.
    
    Args:
        hllset_key (str): The Redis key for the HllSet.
        token (bytes): The token to add.
    
    Returns:
        str: The new SHA ID of the updated HllSet.
    """
    # Fetch the HllSet from Redis
    hllset_bytes = r.get(hllset_key)
    if hllset_bytes is None:
        hllset = [0] * 1024  # Initialize if not exists
    else:
        hllset = deserialize_hllset(hllset_bytes)
    
    # Add the token
    hllset = add_token_to_hllset(hllset, token)
    
    # Compute the new SHA ID
    new_id = generate_hllset_id(hllset)
    
    # Store the updated HllSet in a temporary key
    temp_key = f"temp:{new_id}"
    r.set(temp_key, serialize_hllset(hllset))
    
    # Atomically rename the temporary key to the original key
    r.rename(temp_key, hllset_key)
    
    return new_id

def add_tokens_to_hllset_redis(hllset_key, tokens):
    """
    Add a vector of tokens to an HllSet stored in Redis and update its ID atomically.
    
    Args:
        hllset_key (str): The Redis key for the HllSet.
        tokens (list): A list of tokens to add.
    
    Returns:
        str: The new SHA ID of the updated HllSet.
    """
    # Fetch the HllSet from Redis
    hllset_bytes = r.get(hllset_key)
    if hllset_bytes is None:
        hllset = [0] * 1024  # Initialize if not exists
    else:
        hllset = deserialize_hllset(hllset_bytes)
    
    # Add the tokens
    hllset = add_tokens_to_hllset(hllset, tokens)
    
    # Compute the new SHA ID
    new_id = generate_hllset_id(hllset)
    
    # Store the updated HllSet in a temporary key
    temp_key = f"temp:{new_id}"
    r.set(temp_key, serialize_hllset(hllset))
    
    # Atomically rename the temporary key to the original key
    r.rename(temp_key, hllset_key)
    
    return new_id

# Helper functions
def add_token_to_hllset(hllset, token):
    """Add a single token to an HllSet."""
    hash_val = int(hashlib.sha256(token).hexdigest()[:16], 16) & 0xFFFFFFFFFFFFFFFF
    index = (hash_val >> 54) & 0x3FF
    trailing_zeros = (hash_val & -hash_val).bit_length() - 1
    hllset[index] |= (1 << trailing_zeros)
    return hllset

def add_tokens_to_hllset(hllset, tokens):
    """Add a vector of tokens to an HllSet."""
    for token in tokens:
        hash_val = int(hashlib.sha256(token).hexdigest()[:16], 16) & 0xFFFFFFFFFFFFFFFF
        index = (hash_val >> 54) & 0x3FF
        trailing_zeros = (hash_val & -hash_val).bit_length() - 1
        hllset[index] |= (1 << trailing_zeros)
    return hllset

def generate_hllset_id(hllset):
    """Generate a SHA-256 hash of the HllSet content as its ID."""
    serialized_content = serialize_hllset(hllset)
    sha256_hash = hashlib.sha256(serialized_content).hexdigest()
    return sha256_hash

def serialize_hllset(hllset):
    """Serialize HllSet to bytes."""
    return b''.join([x.to_bytes(8, 'big') for x in hllset])

def deserialize_hllset(hllset_bytes):
    """Deserialize bytes to HllSet."""
    return [int.from_bytes(hllset_bytes[i:i+8], 'big') for i in range(0, len(hllset_bytes), 8)]

# Example usage
hllset_key = "hllset_key"
new_id = add_token_to_hllset_redis(hllset_key, b"token1")
print(f"New ID after adding single token: {new_id}")

new_id = add_tokens_to_hllset_redis(hllset_key, [b"token2", b"token3", b"token4"])
print(f"New ID after adding multiple tokens: {new_id}")

New ID after adding single token: a01b87572ef174a409d65c09170f410bba900ba7c7fe32f728a14f3e16fc82e2
New ID after adding multiple tokens: a01b87572ef174a409d65c09170f410bba900ba7c7fe32f728a14f3e16fc82e2


In [6]:
def create_hllset(tokens):
    hllset = [0] * 1024  # Initialize HllSet vector
    for token in tokens:
        # Hash token to 64-bit integer
        hash_val = int(hashlib.sha256(token).hexdigest()[:16], 16) & 0xFFFFFFFFFFFFFFFF
        # Get index (first 10 bits)
        index = (hash_val >> 54) & 0x3FF  # 0x3FF is 1023 in decimal
        # Count trailing zeros
        trailing_zeros = (hash_val & -hash_val).bit_length() - 1
        # Set bit in HllSet
        hllset[index] |= (1 << trailing_zeros)
    return hllset

def store_hllset_redis(hllset, set_id):
    # Convert HllSet to bytes
    hllset_bytes = b''.join([x.to_bytes(8, 'big') for x in hllset])
    # Store in Redis
    r.set(set_id, hllset_bytes)

def union_hllsets(set_id_1, set_id_2, result_id):
    # Perform bitwise OR operation in Redis
    r.bitop('OR', result_id, set_id_1, set_id_2)

# Example usage
tokens_A = [b'token1', b'token2', b'token3']
tokens_B = [b'token2', b'token3', b'token4']

hllset_A = create_hllset(tokens_A)
hllset_B = create_hllset(tokens_B)

store_hllset_redis(hllset_A, 'hllset_A')
store_hllset_redis(hllset_B, 'hllset_B')

union_hllsets('hllset_A', 'hllset_B', 'hllset_union')