## <u>Cuckoo Filter</u>
### Maggie Drew & Adam Gibbs

Implementation of Cuckoo Filter for estimating set membership on data streams. Implementation psuedocode from the following paper: https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf. This notebook contains the following:

1. Imports used in the notebook
2. All global variables and python data structures (lists, dictionaries, etc.) used
3. Code for Cuckoo Filter functions
4. Cuckoo filter test on simulated data stream
5. Results
6. Analysis of Results 

In [None]:
# IMPORTS
import matplotlib as plt
import numpy as np 
import gzip
import hashlib
import random


In [None]:
# VARIABLES and DATA STRUCTURES

# datset to be used as data stream
dataset = './input.txt'

# number of buckets in hash table
# make this prime!
bucket_size = 29

# numpy array of buckets for elements to hash into
buckets = list()
for i in range(0,bucket_size):
   buckets.append(0)


In [None]:
# HELPER METHODS

def create_fingerprint(x):
    return x

def hash_func(x):
    return x % bucket_size

def relocate(f, bucket):

    count = 0
    while count < len(buckets):
        h1 = hash_func(f)
        h2 = (h1 + f)% bucket_size
        if h1 == bucket:
            new_bucket = h2
        else:
            new_bucket = h1

        if buckets[new_bucket] == 0: 
            buckets[new_bucket] = f
            return True
        else:
            old = buckets[new_bucket]
            buckets[new_bucket] = f
            f = old
        count += 1
    
    return False

    


In [None]:
# IMPLEMENTATION OF CUCKOO FILTER METHODS
def insert(x):
    f = create_fingerprint(x)

    h1 = hash_func(x)
    h2 = (h1 + f) % bucket_size # might have to switch from + operation

    if buckets[h1] == 0 or buckets[h1] == f:
        buckets[h1] = f
        return True
    elif buckets[h2] == 0 or buckets[h2] == f:
        buckets[h2] = f
        return True
    else:
        rand_num = random.random()
        if rand_num < 0.5:
            old = buckets[h1]
            buckets[h1] = f
            
            added = relocate(old, h1) # need to change this to keep track of buckets
            return added
        else:
            old = buckets[h2]
            buckets[h2] = f
            added = relocate(old, h2)
            return added

def lookup(x):
    f = create_fingerprint(x)
    h1 = hash_func(x)
    h2 = (h1 + f) % bucket_size

    if buckets[h1] == f or buckets[h2] == f:
        return True
    else:
        return False

def delete(x):
    f = create_fingerprint(x)
    h1 = hash_func(x)
    h2 = (h1 + f) % bucket_size

    if buckets[h1] == f:
        buckets[h1] = 0
        return True
    elif buckets[h2] == f:
        buckets[h2] = 0
        return True
    
    return False


In [None]:
# READ IN DATA AND APPLY FILTER

with open(dataset, 'rt') as data_stream:
    for line in data_stream:
        for element in line.split():
            insert(int(element))
            print(buckets)

In [None]:
with open(dataset, 'rt') as data_stream:
    for line in data_stream:
        for element in line.split():
            print(lookup(int(element)))