# Some Tests with Hash Functions

In [None]:
from binascii import hexlify
from os import urandom
from hashlib import sha256, shake_128
import numpy as np
import matplotlib.pyplot as plt

# Example 1

Let's hash an ASCII string with SHA256. Print the output will as a hexadecimal string

In [None]:
x = b"test message"
y = sha256(x).hexdigest()
y

# Example 2

Consider the hash SHAKE128 with output 16 bits. Generate a random digest and find a preimage. Print the number of attempts.

In [None]:
DIGESTSIZE = 2

# Define a hash function h with output size DIGESTSIZE bytes 
h = lambda x: shake_128(x).digest(DIGESTSIZE)

# y is the target
y = urandom(DIGESTSIZE)

i = 1
yp = None
while (yp != y):
    # Generate a random 64 bit input
    xp = urandom(8)
    yp = h(xp)
    i = i + 1

print("Attempt #{}".format(i) )
print("Preimage is {}".format(hexlify(xp)) )    

# Example 3

Find, empirically, the average number of attempts to find a preimage to SHAKE128 with output 8 bits

In [None]:
DIGESTSIZE = 1
h = lambda x: shake_128(x).digest(DIGESTSIZE)

def simulate():
    y = urandom(DIGESTSIZE)
    i = 1
    yp = None
    while (y != yp):
        # Generate a random 64 bit input
        xp = urandom(8)
        yp = h(xp)
        i = i + 1
    return(i)        

simulations = [simulate() for i in range(1000)]

np.mean(simulations)

In [None]:
plt.hist(simulations,32);
plt.xlabel('Number of attempts')
plt.ylabel('Occurrences')

# Example 4

Find the empirical probability of finding a second preimage with $q=100$ attempts  

In [None]:
DIGESTSIZE = 2
# attempts
q = 100
# number of simulations
num_simulations = 10000

h = lambda x: shake_128(x).digest(DIGESTSIZE)

def simulate(message,attempts):
    y = h(message)
    xp = [ urandom(8) for _ in range(attempts) ]
    yp = [ h(x) for x in xp]
    return ( y in yp )

# generate many random messages
messages = [urandom(8) for _ in range(num_simulations)]

# simulate finding a second preimage
simulations = [ simulate(m,q) for m in messages]

# result
p_succ = (sum(simulations)) / num_simulations
p_theory = q / pow(2,DIGESTSIZE * 8)

print("Simulations: p={}".format(p_succ))
print("Theory: p={}".format(p_theory))

# Example 5

Find the empirical probability of finding a collision with  q=30  attempts.
Use a 16-bit hash function

In [None]:
DIGESTSIZE = 2
# attempts
q = 30
# number of simulations
num_simulations = 10000

h = lambda x: shake_128(x).digest(DIGESTSIZE)

def simulate(attempts):
    xp = [ urandom(8) for _ in range(attempts) ]
    yp = [ h(x) for x in xp]
    # remove unique items
    ys = set(yp)
    # if they are the same, there were no duplicates    
    return (len(ys) != len(yp))

# simulate finding a collision
simulations = [ simulate(q) for _ in range(num_simulations)]
# result
p_succ = (sum(simulations)) / num_simulations
p_theory = 1-np.exp(-q**2 / pow(2,DIGESTSIZE*8+1))

print("Simulations: p={}".format(p_succ))
print("Theory: p={}".format(p_theory))

# Lab Work

Work in pairs. One is Alice, one is Bob.

Use SHAKE128 with 8 bits output.

Alice generates a random input and sends it to Bob. Bob finds a second preimage and sends it to Alice.
Alice verifies that the second preimage is correct.

How many attempts did Bob make?

Now swap your roles and use SHAKE128 with 16 bits of output.

# Challenge

For this challenge use the hash function SHAKE128 with 1 byte of output. Your goal it to find a second preimage for the following input. Measure the time it takes, then increase the output size by one byte and repeat. Plot your measurements in a graph.

In [2]:
input_string = b'The frog jumped into the pond'