In [1]:
import numpy as np
import pandas as pd

from utils.learnedbloomfilter import LearnedModel, LearnedBloomFilter
from utils.timer import Timer

import torch

from tqdm import tqdm, trange

import time

import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

In [2]:
dataset = pd.read_csv('data/Japan_dataset_octet_3.csv')
dataset.head()

Unnamed: 0,integer,label
0,65552,1
1,65553,1
2,65554,1
3,65555,1
4,65556,1


### LEARNED BLOOM FILTER

In [3]:
## LOAD MODEL
model_in = 24
model_arch = [model_in, 256, 128, 64, 32]
model_out = 1

model = torch.nn.Sequential()
for i in range(1, len(model_arch)):
    model.append(torch.nn.Linear(model_arch[i - 1], model_arch[i]))
    model.append(torch.nn.ReLU())
model.append(torch.nn.Linear(model_arch[-1], model_out))

model.load_state_dict(torch.load("saved_model/Japan_256_128_64_32_fp_4430.pth"))

<All keys matched successfully>

In [4]:
positives = dataset[dataset.label == 1].integer.to_numpy()

In [5]:
device = "cpu"
input_size = 24
lm = LearnedModel(model=model, input_size=input_size, device=device)

fpr = 0.01
lbf = LearnedBloomFilter(lm=lm, fpr=fpr, positives=positives)

### TIME ANALYSIS

In [6]:
rng = np.random.default_rng()

n_test = 100_000
test_set = rng.choice(2**24, size=n_test, replace=False)

In [7]:
color = {
    'integer' : [],
    'time' : [],
    'lbf_label' : [],
    'internal_label' : [],
}

In [8]:
# TEST_SET
color['integer'] = test_set

# PREDICTIONS FROM LBF
lbf_labels = lbf.query(test_set)
color['lbf_label'] = lbf_labels


color['internal_label'] = []
# WHITE BOX ACCESS PREDICTIONS
for test_val in tqdm(test_set):
    _pred = lbf.lm.predict(test_val)[0]
    if _pred:
        internal_label = 'FT'
    elif lbf.bfilter.query(test_val):
        internal_label = 'ST'
    else:
        internal_label = 'SF'
    color['internal_label'].append(internal_label)

100%|████████████████████████████████| 100000/100000 [00:08<00:00, 11166.87it/s]


In [9]:
# TIME ANALYSIS

func = time.clock_gettime_ns
args = time.CLOCK_MONOTONIC_RAW
timer = Timer(func, args)

start_time = time.time()

# SF_TIMES = 10
# FT_TIMES = 100

SF_TIMES = 1
FT_TIMES = 1

color['time'] = []

for i in range(n_test):
    
    if lbf_labels[i] == 0:
        TIMES = SF_TIMES
    else:
        TIMES = FT_TIMES
        
    for times in range(TIMES):
        timer.start()
        lbf.query(test_set[i])
        timer.stop()
        
    color['time'].append(timer.average())
    timer.reset()

print(f'Elapsed Time: {(time.time() - start_time):.4f}')

Elapsed Time: 9.7220


In [11]:
color_df = pd.DataFrame(color)
color_df.to_csv('results/Japan_octect_3_time_100k_final.csv', index=None)