# Some Tests with Hash Functions

In [None]:
from binascii import hexlify
from os import urandom
from hashlib import sha256, shake_128
import numpy as np
import matplotlib.pyplot as plt

# Example 1

Let's hash an ASCII string with SHA256. Print the output will as a hexadecimal string

In [None]:
x = b"test message"
y = sha256(x).hexdigest()
y

# Example 2

Consider the hash SHAKE128 with output 16 bits. Generate a random digest and find a preimage. Print the number of attempts.

In [None]:
DIGESTSIZE = 2

# Define a hash function h with output size DIGESTSIZE bytes 
h = lambda x: shake_128(xp).digest(DIGESTSIZE)

# we use a counter as input
i = 0

y = urandom(DIGESTSIZE)

# we break if this takes too long
while i < 100000:
    xp = i.to_bytes(8,'big')
    yp = h(xp)
    if (yp == y):
        print("Attempt #{}".format(i) )
        print("Preimage is {}".format(hexlify(xp)) )
        break
    i = int(i + 1)

# Example 3

Find, empirically, the average number of attempts to find a preimage to SHAKE128 with output 8 bits

In [None]:
def simulate(DIGESTSIZE):
    i = 0
    h = lambda x: shake_128(x).digest(DIGESTSIZE)
    while True:
        y = urandom(DIGESTSIZE)
        xp = i.to_bytes(8,'big')
        yp = h(xp)
        if (yp == y):
            return(i)
        i = int(i + 1)

In [None]:
DIGESTSIZE = 1
simulations = [simulate(DIGESTSIZE) for i in range(1000)]

np.mean(simulations)

In [None]:
plt.hist(simulations,32);

# Example 4

Find the empirical probability of finding a second preimage with $q=100$ attempts  

In [None]:
def simulate2(message,DIGESTSIZE,attempts):
    h = lambda x: shake_128(x).digest(DIGESTSIZE)
    y = h(message)
    for i in range(attempts):
        xp = i.to_bytes(8,'big')
        yp = h(xp)
        if (yp == y):
            return(1)
    return(0)

In [None]:
q = 100
num_simulations = 10000
# generate many messages
messages = [urandom(8) for i in range(num_simulations)]

# simulate finding a second preimage
simulations = [ simulate2(m,1,q) for m in messages]

# result
p_succ = (np.sum(simulations)) / num_simulations
p_theory = q / 2**8

print("Simulations: p={}".format(p_succ))
print("Theory: p={}".format(p_theory))

# Example 5

Find the empirical probability of finding a collision with  q=30  attempts

In [None]:
def simulate3(DIGESTSIZE,attempts):
    h = lambda x: shake_128(x).digest(DIGESTSIZE)
    calculated_hashes = set()
    for i in range(attempts):
        xp = urandom(8)
        yp = h(xp)
        if (yp in calculated_hashes):
            return(1)
        calculated_hashes.add(yp)
    return(0)

In [None]:
q = 30
# simulate finding a second preimage
simulations = [ simulate3(1,q) for m in messages]
# result
p_succ = (np.sum(simulations)) / num_simulations
p_theory = 1-np.exp(-q**2 / 2**9)

print("Simulations: p={}".format(p_succ))
print("Theory: p={}".format(p_theory))

# Lab Work
Plot the histogram of the number of attempts to find a collision to SHAKE128 with output 16 bits