# Some Tests with Hash Functions

In [None]:
from binascii import hexlify
from os import urandom
import hashlib
import numpy as np
import matplotlib.pyplot as plt

# Example 1

Let's hash an ASCII string with SHA256. Print the output will as a hexadecimal string

In [None]:
x = b"test message"
y = hashlib.sha256(x)
y.hexdigest()

# Example 2

Consider the hash SHAKE128 with output 16 bits. Generate a random digest and find a preimage. Print the number of attempts.

In [None]:
DIGESTSIZE = int(2)

# we use a counter as input
i = int(0)

y = urandom(DIGESTSIZE)

# we break if this takes too long
while i < 100000:
    xp = i.to_bytes(8,'big')
    yp = hashlib.shake_128(xp).digest(DIGESTSIZE)
    if (yp == y):
        print("Attempt #{}".format(i) )
        print("Preimage is {}".format(xp) )
        break
    i = int(i + 1)

# Example 3

Find, empirically, the average number of attempts to find a preimage to SHAKE128 with output 8 bits

In [None]:
def simulate(DIGESTSIZE):
    i = int(0)
    while true:
        y = urandom(DIGESTSIZE)
        xp = i.to_bytes(8,'big')
        yp = hashlib.shake_128(xp).digest(int(DIGESTSIZE))
        if (yp == y):
            return(i)
        i = int(i + 1)

In [None]:
simulations = [simulate(1) for i in range(1000)]

In [None]:
np.mean(simulations)

In [None]:
plt.hist(simulations,32);

# Example 4

Find the empirical probability of finding a second preimage with $q=100$ attempts  

In [None]:
def simulate2(message,DIGESTSIZE,attempts):
    y = hashlib.shake_128(message).digest(int(DIGESTSIZE))
    for i in range(attempts):
        xp = i.to_bytes(8,'big')
        yp = hashlib.shake_128(xp).digest(int(DIGESTSIZE))
        if (yp == y):
            return(1)
    return(0)

In [None]:
q = 100
num_simulations = 10000
# generate many messages
messages = [os.urandom(8) for i in range(num_simulations)]

# simulate finding a second preimage
simulations = [ simulate2(m,1,q) for m in messages]

# result
p_succ = (np.sum(simulations)) / num_simulations
p_theory = q / 2**8

print("Simulations: p={}".format(p_succ))
print("Theory: p={}".format(n(p_theory)))

# Example 5

Find the empirical probability of finding a collision with  q=30  attempts

In [None]:
def simulate3(DIGESTSIZE,attempts):
    calculated_hashes = set()
    for i in range(attempts):
        xp = os.urandom(8)
        yp = hashlib.shake_128(xp).digest(int(DIGESTSIZE))
        if (yp in calculated_hashes):
            return(1)
        calculated_hashes.add(yp)
    return(0)

In [None]:
q = 30
# simulate finding a second preimage
simulations = [ simulate3(1,q) for m in messages]
# result
p_succ = (np.sum(simulations)) / num_simulations
p_theory = 1-np.exp(-q**2 / 2**9)

print("Simulations: p={}".format(p_succ))
print("Theory: p={}".format(n(p_theory)))

# Example 6

Calculate the number of attempts to find a collision with the rho method 

The Rho method is an algorithm for finding collisions that requires only a little memory.

It works like this:
1. Pick some random starting value ($x_1$), and define $x_1 = x′_1$.
2. Compute $x_2 = hash(x_1)$, and $x′_2 = hash(hash(x′_1))$; in the first case we apply the hash function once, while in the second case we apply it twice.
3. Repeat computing $x_{i + 1} = hash(x_i)$ and $x′_{i + 1} = hash(hash(x′_i))$, until you find $x_{i + 1} = x′_{i + 1}$.

In [None]:
DIGESTLEN = int(2)
start = os.urandom(8)

x = hashlib.shake_128(start).digest(DIGESTLEN)
xp = x

for i in range(10000):
    x_i = hashlib.shake_128(x).digest(DIGESTLEN)
    temp = hashlib.shake_128(xp).digest(DIGESTLEN)
    xp_i = hashlib.shake_128(temp).digest(DIGESTLEN)
    if (x_i == xp_i):
        print(i)
        break
    x = x_i
    xp = xp_i