-
Notifications
You must be signed in to change notification settings - Fork 0
/
hash_generator.py
80 lines (61 loc) · 1.92 KB
/
hash_generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
from math import sqrt
import random
from typing import List
class HashGenerator():
def __init__(self, n: int, k:int, m: int, p: int = None) -> None:
"""
Initializes Hash class.
Parameters
----------
n: integer
Input range.
(e.g. for ASCII: n == 128)
k: integer
Input length. For strings, this would be the max
input string length. Shorter strings are padded.
m: integer
Output dimension.
([n] -> [m])
p: integer, optional
Prime number such that p > n > m.
"""
self.n = n
self.k = k
self.m = m
self.p = p
if not self.p:
self.find_prime()
def find_prime(self) -> None:
"""
Starting from self.n, find the next larger prime number.
"""
curr = max(self.n, self.m)
while not self.p:
upper = int(sqrt(curr))
for i in range(2, upper + 1):
if curr % i == 0:
break
if i == upper:
self.p = curr
curr += 1
def generate_hash_function(self) -> List[int]:
"""
Generates hash function as follows:
select k integers {z1, ..., zk} by U.A.R. sampling
k times from the set {0, ..., p - 1}
"""
self.z = [random.randint(0, self.p - 1) for i in range(self.k)]
return self.z
def hash(self, s: str) -> int:
"""
Hashes input string as follows:
h(a1, ..., ak) = (SUM_overall_zs zi * ai) mod p
Parameters
----------
s: string
Input string to be hashed.
"""
s = s.ljust(self.k, " ")
emb = [ord(ch) for ch in s]
z_a = sum(map(lambda x: x[0] * x[1], zip(self.z, emb[:self.k])))
return (z_a % self.p) % self.m