# Testing entity.jl

This notebook is dedicated to experimenting with a new structure called Entity, designed to encapsulate metadata using HllSets. This initiative builds on the innovative work by Mike Saint-Antoine of SimpleGrad.jl, who adapted Andrey Karpathy's foundational MicroGrad project. While the original frameworks by Mike S-A and Andrey K utilize Numbers as the fundamental components of their neural networks, our approach replaces Numbers with HllSets.

It is often argued that Numbers in conventional neural network models are derived by transforming "real" entities through numerical embedding. This method is effective for those entities. However, we aim to shift our focus from data to metadata, thereby describing entities more abstractly. A significant advantage of using metadata is its inherent ability to categorically separate entities into semantically similar groups. These groups are not strictly distinct; the sets of entities described by different metadata often overlap.

The most crucial aspect for us is that each piece of metadata correlates with a specific set of entities. In the realm of metadata, an HllSet serves as an embedding for a collection of entities. This embedding is represented not as a numerical value but as a fixed-size bit-vector, specifically a 2-dimensional Tensor (64, P). In Julia, this is expressed as Vector{BitVector}, where each vector has a fixed length of 64 bits and the number of these bit-sets is determined by P. The parameter P defines the precision of the HyperLogLog approximation of the collection of entities.


In [1]:
include("src/entity.jl")

using .HllGrad
using .HllSets

using Random
using Base



In [2]:
# Initialize test HllSets
hll1 = HllSets.HllSet{10}()
hll2 = HllSets.HllSet{10}()
hll3 = HllSets.HllSet{10}()
hll4 = HllSets.HllSet{10}()
hll5 = HllSets.HllSet{10}()

# Generate datasets from random strings
s1 = Set(randstring(7) for _ in 1:10)
s2 = Set(randstring(7) for _ in 1:15)
s3 = Set(randstring(7) for _ in 1:100)
s4 = Set(randstring(7) for _ in 1:20)
s5 = Set(randstring(7) for _ in 1:130)

# Add datasets to HllSets
HllSets.add!(hll1, s1)
HllSets.add!(hll2, s2)
HllSets.add!(hll3, s3)
HllSets.add!(hll4, s4)
HllSets.add!(hll5, s5)

In [3]:
entity1 = HllGrad.Entity{10}(hll1)
entity2 = HllGrad.Entity{10}(hll2)
HllGrad.isequal(entity1, entity2)

# Access the type parameter P
P_type = typeof(entity1).parameters[1]

println("The type parameter P is: ", P_type)

entity1

The type parameter P is: 10



Entity(sha1: 8fe6a17a8c280a6716da73b194812b0ff02e1d61;
 hll_count: 11;
 grad: 0.0;
 op: nothing);



In [4]:
c = HllGrad.union(entity1, entity2)
c
println(c)


Entity(sha1: 04abb4ce6da6da921a395c74dc8f585e91c9580f;
 hll_count: 27;
 grad: 0.0;
 op: Main.HllGrad.Operation{typeof(union), Tuple{Entity{10}, Entity{10}}}(union, (
Entity(sha1: 8fe6a17a8c280a6716da73b194812b0ff02e1d61;
 hll_count: 11;
 grad: 0.0;
 op: nothing);

, 
Entity(sha1: aaadde0638b68333c4384820a6d505f4081a20fe;
 hll_count: 16;
 grad: 0.0;
 op: nothing);

)));




In [5]:
println("Entity 1: ", HllSets.count(entity1.hll))

Entity 1: 11


In [6]:
println(c.op)
HllGrad.backprop!(c, c.op)
println(c.op)
c

Main.HllGrad.Operation{typeof(union), Tuple{Entity{10}, Entity{10}}}(union, (
Entity(sha1: 8fe6a17a8c280a6716da73b194812b0ff02e1d61;
 hll_count: 11;
 grad: 0.0;
 op: nothing);

, 
Entity(sha1: aaadde0638b68333c4384820a6d505f4081a20fe;
 hll_count: 16;
 grad: 0.0;
 op: nothing);

))
Main.HllGrad.Operation{typeof(union), Tuple{Entity{10}, Entity{10}}}(union, (
Entity(sha1: 8fe6a17a8c280a6716da73b194812b0ff02e1d61;
 hll_count: 11;
 grad: 0.0;
 op: nothing);

, 
Entity(sha1: aaadde0638b68333c4384820a6d505f4081a20fe;
 hll_count: 16;
 grad: 0.0;
 op: nothing);

))



Entity(sha1: 04abb4ce6da6da921a395c74dc8f585e91c9580f;
 hll_count: 27;
 grad: 0.0;
 op: Main.HllGrad.Operation{typeof(union), Tuple{Entity{10}, Entity{10}}}(union, (
Entity(sha1: 8fe6a17a8c280a6716da73b194812b0ff02e1d61;
 hll_count: 11;
 grad: 0.0;
 op: nothing);

, 
Entity(sha1: aaadde0638b68333c4384820a6d505f4081a20fe;
 hll_count: 16;
 grad: 0.0;
 op: nothing);

)));



In [7]:
d = HllGrad.intersect(entity1, entity2)

println(HllSets.count(d.hll))

1


In [8]:
println(d.op)
HllGrad.backprop!(d, d.op)
println(d.op)
d

Main.HllGrad.Operation{typeof(intersect), Tuple{Entity{10}, Entity{10}}}(intersect, (
Entity(sha1: 8fe6a17a8c280a6716da73b194812b0ff02e1d61;
 hll_count: 11;
 grad: 0.0;
 op: nothing);

, 
Entity(sha1: aaadde0638b68333c4384820a6d505f4081a20fe;
 hll_count: 16;
 grad: 0.0;
 op: nothing);

))
Main.HllGrad.Operation{typeof(intersect), Tuple{Entity{10}, Entity{10}}}(intersect, (
Entity(sha1: 8fe6a17a8c280a6716da73b194812b0ff02e1d61;
 hll_count: 11;
 grad: 0.0;
 op: nothing);

, 
Entity(sha1: aaadde0638b68333c4384820a6d505f4081a20fe;
 hll_count: 16;
 grad: 0.0;
 op: nothing);

))



Entity(sha1: 4a9b66603d1b6446899a1e7fcbbd2bd8378d4e61;
 hll_count: 1;
 grad: 0.0;
 op: Main.HllGrad.Operation{typeof(intersect), Tuple{Entity{10}, Entity{10}}}(intersect, (
Entity(sha1: 8fe6a17a8c280a6716da73b194812b0ff02e1d61;
 hll_count: 11;
 grad: 0.0;
 op: nothing);

, 
Entity(sha1: aaadde0638b68333c4384820a6d505f4081a20fe;
 hll_count: 16;
 grad: 0.0;
 op: nothing);

)));



In [9]:
e = HllGrad.xor(entity1, entity2)


println(HllSets.count(e.hll))

27


In [10]:
println(e.op)
HllGrad.backprop!(e, e.op)
println(e.op)
e

Main.HllGrad.Operation{typeof(xor), Tuple{Entity{10}, Entity{10}}}(xor, (
Entity(sha1: 8fe6a17a8c280a6716da73b194812b0ff02e1d61;
 hll_count: 11;
 grad: 0.0;
 op: nothing);

, 
Entity(sha1: aaadde0638b68333c4384820a6d505f4081a20fe;
 hll_count: 16;
 grad: 0.0;
 op: nothing);

))
Main.HllGrad.Operation{typeof(xor), Tuple{Entity{10}, Entity{10}}}(xor, (
Entity(sha1: 8fe6a17a8c280a6716da73b194812b0ff02e1d61;
 hll_count: 11;
 grad: 0.0;
 op: nothing);

, 
Entity(sha1: aaadde0638b68333c4384820a6d505f4081a20fe;
 hll_count: 16;
 grad: 0.0;
 op: nothing);

))



Entity(sha1: 04abb4ce6da6da921a395c74dc8f585e91c9580f;
 hll_count: 27;
 grad: 0.0;
 op: Main.HllGrad.Operation{typeof(xor), Tuple{Entity{10}, Entity{10}}}(xor, (
Entity(sha1: 8fe6a17a8c280a6716da73b194812b0ff02e1d61;
 hll_count: 11;
 grad: 0.0;
 op: nothing);

, 
Entity(sha1: aaadde0638b68333c4384820a6d505f4081a20fe;
 hll_count: 16;
 grad: 0.0;
 op: nothing);

)));



In [11]:
n, rem, del = HllGrad.diff(c,d)
println(del)
println(rem)
println(n)



Entity(sha1: 4a9b66603d1b6446899a1e7fcbbd2bd8378d4e61;
 hll_count: 1;
 grad: 1.0;
 op: Main.HllGrad.Operation{typeof(Main.HllGrad.added), Tuple{Entity{10}, Entity{10}}}(Main.HllGrad.added, (
Entity(sha1: 04abb4ce6da6da921a395c74dc8f585e91c9580f;
 hll_count: 27;
 grad: 0.0;
 op: Main.HllGrad.Operation{typeof(union), Tuple{Entity{10}, Entity{10}}}(union, (
Entity(sha1: 8fe6a17a8c280a6716da73b194812b0ff02e1d61;
 hll_count: 11;
 grad: 0.0;
 op: nothing);

, 
Entity(sha1: aaadde0638b68333c4384820a6d505f4081a20fe;
 hll_count: 16;
 grad: 0.0;
 op: nothing);

)));

, 
Entity(sha1: 4a9b66603d1b6446899a1e7fcbbd2bd8378d4e61;
 hll_count: 1;
 grad: 0.0;
 op: Main.HllGrad.Operation{typeof(intersect), Tuple{Entity{10}, Entity{10}}}(intersect, (
Entity(sha1: 8fe6a17a8c280a6716da73b194812b0ff02e1d61;
 hll_count: 11;
 grad: 0.0;
 op: nothing);

, 
Entity(sha1: aaadde0638b68333c4384820a6d505f4081a20fe;
 hll_count: 16;
 grad: 0.0;
 op: nothing);

)));

)));



Entity(sha1: 4a9b66603d1b6446899a1e7fcbbd2bd

In [12]:
f = HllGrad.adv(c,d)

println(f)

LoadError: UndefVarError: `adv` not defined

In [None]:
g = HllGrad.adv(d,f)
println(f.op.args[3])

In [None]:
h = HllGrad.adv(f,g)

println(h.op.args[1])

In [None]:
using Random

function remove_random_bits(bitvectors::Vector{BitVector}, N::Int)
    # Filter out empty BitVectors
    non_empty_bitvectors = filter(bv -> !isempty(bv), bitvectors)
    
    total_bits = sum(count(b -> b, bv) for bv in non_empty_bitvectors)
    if N > total_bits
        error("N is greater than the total number of true bits in the BitVectors")
    end

    for _ in 1:N
        # Randomly select a non-empty BitVector
        bv_index = rand(1:length(non_empty_bitvectors))
        bv = non_empty_bitvectors[bv_index]

        # Get indices with true values
        true_indices = findall(bv)

        # Randomly select one of the true indices
        if !isempty(true_indices)
            bit_index = rand(true_indices)

            # Set the selected bit to false
            bv[bit_index] = false
        end
    end

    return bitvectors
end

# Example usage
bitvectors = [BitVector([true, false, true]), BitVector([true, true, false]), BitVector([false, true, true]), BitVector([])]
N = 3
modified_bitvectors = remove_random_bits(bitvectors, N)
println(modified_bitvectors)