In [None]:
import math
import itertools

from functools import partial
from collections import Counter
from pathlib import Path
from time import time

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.ticker import MaxNLocator

%matplotlib inline
mpl.rcParams['figure.dpi']     = 100
mpl.rcParams['figure.figsize'] = [10, 5]

if not hasattr(math, 'comb'):
    def math_comb(n, r):
        f = math.factorial
        return f(n) // (f(r) * f(n - r))


    math.comb = math_comb

import fpfz
import primes
from itertools_recipes import take, first_true

csvdir = Path('csv')
csvdir.mkdir(exist_ok=True)

In [None]:
pg     = primes.Generator()
pp     = primes.Power(pg)
calc_u = fpfz.UniverseSizeCalculator(pg)
calc_m = fpfz.MemoryCalculator(pg, calc_u)
matgen = fpfz.MatrixGenerator(calc_u, calc_m, pg, rotate=False)

rng = np.random.default_rng()

In [None]:
dicts_dir = Path('/Users/mizrahi/Library/CloudStorage/OneDrive-Technion/Data')
dict_format = 'calc_m_{}.pkl'

def load_dict():
    d = {}
    for l in dicts_dir.glob(dict_format.format('*')):
        d.update(fpfz.read_from_disk(l))
        print(f'loaded {l}, |d| = {len(d)}')
    print('done')
    return d

In [None]:
calc_m.rec_cache.update(load_dict())

In [None]:
fpfz.write_to_disk(
    dicts_dir / dict_format.format(int(time()*100)),
    calc_m.rec_cache
)

# Analysis

## Hard error rate
We can bound the number of errors in the whole message

In [None]:
def f(lffz, d, b, w):
    t = math.ceil(b/w)
    l = w + math.ceil(math.log2(t))
    n = 2**l
    # print(f'{t=},{w=},{l=},{n=},{d=}')
    return 2*l*lffz(n, d)

lffz   = calc_m.recursive
egh    = calc_m.egh
lffz_k = lambda n,d: calc_m.recursive(n, d, k=4)

In [None]:
b = 2**16

ws_exp = np.arange(1, 1+3)
ws = 2**ws_exp
for d in [3, 5, 7, 11, 13]:
    plt.plot(ws, [f(lffz, d, b, w)/b for w in ws], label=d)
plt.xlabel('words size')
plt.ylabel('redundancy')
plt.loglog()
plt.legend(title='$d$')
plt.show()

In [None]:
w = 2**1

bs = 2**np.arange(4, 1+20)
df = pd.DataFrame(index=bs)
for d in [2, 5, 7, 11]:
    df[f'{d//2}']    = [f(lffz  , d, b, w)/b for b in bs]
    df[f'k4_{d//2}'] = [f(lffz_k, d, b, w)/b for b in bs]
    # plt.plot(bs, [f(egh , d, b, w)/b for b in bs], '.-', label=f'EGH{d}')
df.plot()
df.to_csv(csvdir / 'biff.csv', index_label='b')
plt.xlabel('message size')
plt.ylabel('redundancy')
plt.loglog()
plt.legend(title='$\delta$')
plt.show()

In [None]:
w = 2**1

bs = 2**np.arange(4, 1+21)
for d in [2, 5, 7, 11]:
    plt.plot(bs, [f(lffz, d, b, w)/math.log2(b) for b in bs], label=f'{d//2}')
plt.xlabel('message size')
plt.ylabel('???')
plt.loglog()
plt.legend(title='$\delta$')
plt.show()

In [None]:
w = 2**1

bs = 2**np.arange(4, 1+21)
for d in [2, 3, 5, 7, 9, 11]:
    plt.plot(bs, [f(lffz, d, b, w)/(math.ceil(d/2)*math.log2(b)) for b in bs], '.-', label=f'{d}')
plt.xlabel('message size')
plt.ylabel('???')
plt.loglog()
plt.legend(title='$d$')
plt.show()

## Evaluation

In [None]:
lffz = lambda n,d: calc_m.recursive(n,d,None)

In [None]:
b = 2**12
w = 2
t = math.ceil(b/w)
f = math.ceil(math.log2(t))
l = w + f
n = 2**l
n

In [None]:
e = 2
d=2*e

In [None]:
lffz(n=n, d=d)

In [None]:
total_size = 2*l*lffz(n, d)
total_size

In [None]:
r = total_size/b
r

In [None]:
rec  = matgen.recursive(n=n, d=d)
m    = rec.shape[1]
iblt = matgen.iblt(n=n, m=m)

assert rec.shape==iblt.shape

rec  = fpfz.MatrixDecoder(rec ).is_decodable
iblt = fpfz.MatrixDecoder(iblt).is_decodable

In [None]:
def try_once(iblt, e):
    # choose erronous bits
    idxs = rng.choice(b, e)
    # words
    idxs = np.unique(idxs//w)

    # random the words themselvs
    words = rng.integers(2**w, size=len(idxs))
    # add errors to them
    error = words ^ (rng.integers(2**w-1, size=len(idxs))+1)

    # bitwise concat
    delta = (np.vstack([words, error]) + (idxs << w)).ravel()

    # add all to the filters
    return iblt(delta)

In [None]:
def try_once_iblt(r, b, s=1, e=1):
    n=2*e
    w = 10
    t = math.ceil(b/w)
    f = math.ceil(math.log2(t))
    l = w + f
    m = int(b*r//(2*l))
    return fpfz.MatrixDecoder(mmh3_iblt(m=m, n=n, s=s)).is_decodable(list(range(n)))

In [None]:
from mmh3 import hash as mmh3

In [None]:
def mmh3_iblt(m, n, s, k=4):
    bucket_size = m / k
    a = np.zeros((n, m), dtype=int)

    for i in range(n):
        for j in range(k):
            b0 = round(bucket_size * j)
            b1 = round(bucket_size * (j + 1))
            a[i, b0 + int(mmh3(repr(j), i+s) % (b1 - b0))] = 1
    return a

In [None]:
# b20 e1
reps=1000000
b=2**20
for r in np.linspace(.000881195068359375, .004, num=7):
    print(r, sum(map(lambda s: try_once_iblt(r, b, s), range(reps))))

In [None]:
for i in 1-(np.array([
958147,
999365,
999893,
999970,
999989,
999998,
999993,
])/reps):
    print(f'{i:.8f}')

In [None]:
# b20 e1
reps=1000000
b=2**12
for r in np.linspace(0.0888671875, .95849609375, num=7):
    print(r, sum(map(lambda s: try_once_iblt(r, b, s), range(reps))))

In [None]:
# b20 d2 k4
reps=100000
for e in [1, 2, 3, 4, 5, 6, 7]:
    print(e, '='*20)
    print('rec ', sum(map(lambda _: try_once(rec , e), range(reps))))
    print('iblt', sum(map(lambda _: try_once(iblt, e), range(reps))))

In [None]:
# b11 d2 k4
reps=100000
for e in [1, 2, 3, 4, 5, 6, 7]:
    print(e, '='*20)
    print('rec ', sum(map(lambda _: try_once(rec , e), range(reps))))
    print('iblt', sum(map(lambda _: try_once(iblt, e), range(reps))))

In [None]:
# b16 d4
reps=100000
for e in [1, 2, 3, 4, 5, 6, 7]:
    print(e, '='*20)
    print('rec ', sum(map(lambda _: try_once(rec , e), range(reps))))
    print('iblt', sum(map(lambda _: try_once(iblt, e), range(reps))))

In [None]:
# b14 d4
reps=100000
for e in [1, 2, 3, 4, 5, 6, 7]:
    print(e, '='*20)
    print('rec ', sum(map(lambda _: try_once(rec , e), range(reps))))
    print('iblt', sum(map(lambda _: try_once(iblt, e), range(reps))))

In [None]:
#b = 2**12
#d = 4
reps=100000
for e in [1, 2, 3, 4, 5, 6, 7]:
    print(e, '='*20)
    print('rec ', sum(map(lambda _: try_once(rec , e), range(reps))))
    print('iblt', sum(map(lambda _: try_once(iblt, e), range(reps))))

In [None]:
#b20
#d=2
reps=100000
for e in [1, 2, 3, 4, 5, 6, 7]:
    print(e, '='*20)
    print('rec ', sum(map(lambda _: try_once(rec , e), range(reps))))
    print('iblt', sum(map(lambda _: try_once(iblt, e), range(reps))))

In [None]:
w = 10
t = math.ceil(b/w)
f = math.ceil(math.log2(t))
l = w + f
m = (total_size//(2*l))
m

In [None]:
def try_once_iblt(e):
    return fpfz.MatrixDecoder(matgen.random_iblt(m=m, n=2*e)).is_decodable(list(range(2*e)))

In [None]:
for e in [2, 3, 4, 5, 6, 7]:
    print(e, '='*20)
    print('iblt', sum(map(lambda _: try_once_iblt(e), range(100000))))

# bitwise error rate
Every bit have a probability $p$ to be flipped

In [None]:
def p_word_f(p_bit_f, w):
    return 1-(1-p_bit_f)**w

In [None]:
def rand_n_words_fail(p_bit_f, w, n_words):
    return rng.binomial(n_words, p_word_f(p_bit_f, w))

In [None]:
ws_exp = np.arange(1, 1+5)
ws = 2**ws_exp
for p in 1/10**np.arange(3,1+7):
    plt.plot(ws, p_word_f(p, ws), label=p)
plt.loglog()
plt.legend();

## Lets go straight empirical
No time to spend on the interesting things :(

In [None]:
# probability for a single bit flip
p = 1/10**5

b = 2**20
w = 2
t = math.ceil(b/w)
f = math.ceil(math.log2(t))
l = w + f
n = 2**l
n

In [None]:
rec  = matgen.recursive(n=n, d=5)
m    = rec.shape[1]
iblt = matgen.iblt(n=n, m=m)

assert rec.shape==iblt.shape

rec  = fpfz.MatrixDecoder(rec ).is_decodable
iblt = fpfz.MatrixDecoder(iblt).is_decodable

In [None]:
def try_once(tests):
    # first, randomize the number of errornoues words
    nf = rand_n_words_fail(p, w, b//w)

    # for each word, choose index
    idxs = rng.choice(t, nf, replace=False)

    # random the words themselvs
    words = rng.integers(2**w, size=nf)
    # add errors to them
    error = words ^ (rng.integers(2**w-1, size=nf)+1)

    # bitwise concat
    delta = (np.vstack([words, error]) + (idxs << w)).ravel()

    # add all to the filters
    return np.fromiter((t(delta) for t in tests), dtype=bool)

In [None]:
sum(map(lambda _: try_once((rec,iblt)), range(10000)))