In [1]:
using Random
using FilePathsBase: extension, Path

include("src/sets32.jl")

import .HllSets as set

# Initialize test HllSets
hll1 = set.HllSet{10}()
hll2 = set.HllSet{10}()
hll3 = set.HllSet{10}()
hll4 = set.HllSet{10}()
hll5 = set.HllSet{10}()

# Generate datasets from random strings
s1 = Set(randstring(7) for _ in 1:10)
s2 = Set(randstring(7) for _ in 1:15)
s3 = Set(randstring(7) for _ in 1:100)
s4 = Set(randstring(7) for _ in 1:20)
s5 = Set(randstring(7) for _ in 1:130)

# Add datasets to HllSets
set.add!(hll1, s1)
set.add!(hll2, s2)
set.add!(hll3, s3)
set.add!(hll4, s4)
set.add!(hll5, s5)

println(hll1)

println("Size of: ", set.sizeof(hll1))

HllSet{10}()

Size of: 1024


In [2]:
# Print cardinality of datasets and HllSets side by side
print(length(s1), " : ", count(hll1), "\n")
print(length(s2), " : ", count(hll2), "\n")
print(length(s3), " : ", count(hll3), "\n")
print(length(s4), " : ", count(hll4), "\n")
print(length(s5), " : ", count(hll5), "\n\n")

# union
print(length(s1 ∪ s2 ∪ s3 ∪ s4 ∪ s5), " : ", count(hll1 ∪ hll2 ∪ hll3 ∪ hll4 ∪ hll5), "\n")

# intersection
print(length(s1 ∩ s2 ∩ s3 ∩ s4 ∩ s5), " : ", count(hll1 ∩ hll2 ∩ hll3 ∩ hll4 ∩ hll5), "\n")

10 : 9
15 : 18
100 : 99
20 : 18
130 : 132

275 : 280
0 : 1


In [3]:
A = set.HllSet{10}()
B = set.HllSet{10}()
C = set.HllSet{10}()

items_t1 = Set(["string0", "string1", "string2", "string3", "string4", "string5", "string6", "string7", "string8", "string9", "string10"])
items_t2 = Set(["string3", "string4", "string5", "string6", "string7", "string8", "string9", "string10", "string11"])
items_t3 = Set(["string5", "string6", "string7", "string8", "string9", "string10", "string11"])

set.add!(A, items_t1)
set.add!(B, items_t2)
set.add!(C, items_t3)

U = A ∪ B ∪ C

println("A: ", count(A))
println("B: ", count(B))
println("C: ", count(C))
println("U: ", count(U), "\n")

println("AB = A ∩ B: ", count(A ∩ B))
println("AC = A ∩ C: ", count(A ∩ C))
println("BC = B ∩ C: ", count(B ∩ C), "\n")

println("P(A) = A / U: ", count(A) / count(U))
println("P(B) = B / U: ", count(B) / count(U))
println("P(C) = C / U: ", count(C) / count(U), "\n")

println("P(A | B) = AB / B: ", count(A ∩ B) / count(B))
println("P(B | A) = AB / A: ", count(A ∩ B) / count(A))
println("P(A | C) = AC / C: ", count(A ∩ C) / count(C))
println("P(C | A) = AC / A: ", count(A ∩ C) / count(A), "\n")

println("P(B | C) = BC / C: ", count(B ∩ C) / count(C))
println("P(C | B) = BC / B: ", count(B ∩ C) / count(B), "\n")

# hll_diff = set.set_xor(hll_1, hll_2)
# println("HLL xor: ", count(hll_diff))

# hll_int = intersect(hll_1, hll_2)

# println("hll_int: ", count(hll_int))

# println()
# println("=====================================")
# hll_comp_1 = set.set_comp(hll_1, hll_2)
# println("Comp 1: ", count(hll_comp_1))
# println("hll_1: ", count(hll_1))

# println()
# println("=====================================")
# hll_comp_2 = set.set_comp(hll_2, hll_1)
# println("Comp 2: ", count(hll_comp_2))
# println("hll_2: ", count(hll_2))

A: 11
B: 9
C: 7
U: 12

AB = A ∩ B: 8
AC = A ∩ C: 6
BC = B ∩ C: 7

P(A) = A / U: 0.9166666666666666
P(B) = B / U: 0.75
P(C) = C / U: 0.5833333333333334

P(A | B) = AB / B: 0.8888888888888888
P(B | A) = AB / A: 0.7272727272727273
P(A | C) = AC / C: 0.8571428571428571
P(C | A) = AC / A: 0.5454545454545454

P(B | C) = BC / C: 1.0
P(C | B) = BC / B: 0.7777777777777778



In [4]:
# Define two BitVectors
bv1 = bitrand(10)
bv2 = bitrand(10)

# Find the complement of bv1 in bv2
complement = .~bv1 .& bv2

println(complement)

Bool[0, 0, 0, 1, 0, 0, 1, 0, 0, 0]


In [5]:
# vec = dump(hll_1)
hll_3 = set.HllSet{10}()
hll_res = set.restore(hll_3, hll1.counts)

set.isequal(hll1, hll_res)

true

In [6]:
delta1 = set.diff(hll1, hll2)
println(typeof(delta1))

for (name, value) in pairs(delta1)
    println(name, ": ", count(value))
end

@NamedTuple{DEL::Main.HllSets.HllSet{10}, RET::Main.HllSets.HllSet{10}, NEW::Main.HllSets.HllSet{10}}
DEL: 9
RET: 1
NEW: 15


In [7]:
_dump = set.dump(hll_3)
# println(_dump)
hll_dump = set.HllSet{10}()
restored = set.restore(hll_dump, Vector(_dump))
println(set.count(hll_3))
println(set.count(hll_dump))

9
9


# Proving Fundamental Set properties
## Fundamental properties:

    Commutative property

            1. (A ∪ B) = (B ∪ A)
            2. (A ∩ B) = (B ∩ A)

    Associative property

            3. (A ∪ B) ∪ C) = (A ∪ (B ∪ C))
            4. (A ∩ B) ∩ C) = (A ∩ (B ∩ C))

    Distributive property:

            5. ((A ∪ B) ∩ C) = (A ∩ C) ∪ (B ∩ C)
            6.  ((A ∩ B) ∪ C) = (A ∪ C) ∩ (B ∪ C)

    Identity:

            7.  (A ∪ Z) = A   
            8.  (A ∩ U) = A
## Some additional laws:

    Idempotent laws:

            1. (A ∪ A) = A 
            3. (A ∩ U) = A

In [8]:
A = hll1
B = hll2
C = hll_3

# Defining local empty Set
Z = set.HllSet{10}()

# Defining local universal Set
U = A ∪ B ∪ C

print("\n 1. (A ∪ B) = (B ∪ A): ", count(A ∪ B) == count(B ∪ A))
print("\n 2. (A ∩ B) = (B ∩ A): ", count(A ∩ B) == count(B ∩ A))
print("\n 3. (A ∪ B) ∪ C) = (A ∪ (B ∪ C)): ", count((A ∪ B) ∪ C) == count(A ∪ (B ∪ C)))
print("\n 4. (A ∩ B) ∩ C) = (A ∩ (B ∩ C)): ", count((A ∩ B) ∩ C) == count(A ∩ (B ∩ C)))
print("\n 5. ((A ∪ B) ∩ C) = (A ∩ C) ∪ (B ∩ C): ", count(((A ∪ B) ∩ C)) == count((A ∩ C) ∪ (B ∩ C)))
print("\n 6. ((A ∩ B) ∪ C) = (A ∪ C) ∩ (B ∪ C): ", count(((A ∩ B) ∪ C)) == count((A ∪ C) ∩ (B ∪ C)))
print("\n 7. (A ∪ Z) = A: ", count(A ∪ Z) == count(A))
print("\n 8. (A ∩ U) = A: ", count(A ∩ U) == count(A))
print("\n 9. (A ∪ A) = A: ", count(A ∪ A) == count(A))
print("\n10. (A ∩ U) = A: ", count(A ∩ U) == count(A))


 1. (A ∪ B) = (B ∪ A): true
 2. (A ∩ B) = (B ∩ A): true
 3. (A ∪ B) ∪ C) = (A ∪ (B ∪ C)): true
 4. (A ∩ B) ∩ C) = (A ∩ (B ∩ C)): true
 5. ((A ∪ B) ∩ C) = (A ∩ C) ∪ (B ∩ C): true
 6. ((A ∩ B) ∪ C) = (A ∪ C) ∩ (B ∪ C): true
 7. (A ∪ Z) = A: true
 8. (A ∩ U) = A: true
 9. (A ∪ A) = A: true
10. (A ∩ U) = A: true