# Count Min Sketch Implementation

Description: <br>
Count Min Sketch (12.2.2.2, Pg 403- 405 Aggarwal)

In [57]:
# importing libraries
# https://pypi.org/project/mmh3/
import mmh3 # can be used to hash strings
import random

In [58]:
# parameters
# length of each array
length = 7
# w
noHashFunctions = 5

In [59]:
#initialize the |noHashFunctions| arrays with 0
arrays = []

for i in range(0,noHashFunctions):
    array = [0] * length
    arrays.append(array)
    
print(arrays)

[[0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0]]


In [60]:
#we need to define pairwise independent hash functions that map an item from 0 to length - 1
def hashItem(item,seed):
    #returns a 32-bit unsigned int 
    #seed is used to create independence
    hashCode = mmh3.hash(str(item),seed, signed = False)
    #maxValue of 32-bit unsigned int 
    maxValue = 4294967295
    #normalize it between 0 and length-1
    normalize = hashCode/maxValue * (length-1)
    #round ensure even splits
    #round(3.6) = 4 while int(3.6) = 3
    return round(normalize)

In [61]:
#counting an item
def count(item):
    for i in range(0,noHashFunctions):
        index = hashItem(item = item, seed = i)
        arrays[i][index] += 1

In [62]:
#counting an item
item_1 = 2000
item_2 = "countmin"
count(item_1)
count(item_2)
count(item_2)
print(arrays)

[[0, 0, 0, 1, 2, 0, 0], [0, 0, 2, 1, 0, 0, 0], [0, 0, 2, 0, 1, 0, 0], [0, 3, 0, 0, 0, 0, 0], [0, 1, 0, 0, 2, 0, 0]]


In [63]:
#finding the count of an item
def getCount(item):
    #taking the min eliminates the effect of collisions
    minCount = float('inf')

    for i in range(0,noHashFunctions):
        index = hashItem(item = item, seed = i)
        count = arrays[i][index]

        if count < minCount:
            minCount = count
    
    return minCount

In [64]:
print(getCount(item_1))
print(getCount(item_2))

1
2
