# Best attack parameters given a hardware

We assume the attack parameters are $n$ and $l$. Also, denote $g$ to be the number of ignored bits. 

$p = \frac{2^l}{2^n}$ , this is a geometric random variable, thus we expect a collision after   $\#queries = \frac{2^n}{2^l} $

Assume, we only accept digests that have certain number of zeros, denoted as $d$. Thus, we can pretend as we are working on small digests
$$
\begin{align}
&\#queries = \frac{2^n}{2^l} \\
&\#queries_{sec} \cdot t_{sec} = \frac{2^n}{2^l}
\end{align}
$$


We have three point of views of $\#queries$
- Senders: How many hashes they generate? 
    - Their speed will be affected by difficulty, but from their perspective the overall attack time doesn't change if the difficulty change (add explanation, later)

    - $\#snd\_queries_{sec} = \frac{\#senders \cdot \#gen\_hashes_{sec}} {2^{d}}$
    - so, $\#queries_{sec} \cdot t_{sec} = \frac{2^n}{2^{l}}$

- Receivers: How many hashes they can query the dicitonary. 
    - In their world, the higher the difficulty the better chance of hitting collision (since digests are technically shorter).
    - $\#rcv\_queries_{sec} = \#receivers \cdot \#dict\_queries_{sec} $
    - but $\#queries_{sec} \cdot t_{sec} = \frac{2^n}{2^{l+d}}$

- Bandwith: This is how many hashes the network can carry in a second. 
    - From their perspective, difficulty reduces the rate of transmitted messages. 
    - $bdwth_queries_{sec} $


Thus,

$$\#queries_{sec} := min\left(snd\_queries_{sec}, rcv\_queries_{sec}, bdwth\_queries_{sec}\right)$$


In [1]:
# Numbers from Gros cluster, nancy, grid5000.fr
# find_dist overhead = 7%-9%
# hash_16x_avx (nancy: gros) ≈ 2^24.87hash/sec
# dict_add≈2^23.41 elm/sec
# dict_lookup ≈ 2^24.9337 elm/sec
# mpi_recv overhead = 10.71% (for regenerating message)


nservers = 32
server_memory = (196 - 20)*10^9 # 96 GB
ncores_per_server = 32
hashes_sec_core = 2^25.015134275003597
dict_queries_sec = 2^21.863350
t_sec =  2 * 24 * 3600
# how many hashes can oure compressed file 
hashes_sec_phase_i = 2^25 # 2^24.72
dict_add_sec = 2^23.41

In [2]:
# 1 core hashing power
# thd2 sha_avx512_16way  elapsed 1.78sec i.e. 898392.92 hashes/sec = 2^19.777 hashes, 57.4971 M


# Querying 100000000, took 2.22 sec i.e. 44977939.99 elm/sec = 2^25.4227 elm/sec 


def seconds_2_time(t):
    from math import floor
    t = float(t)
    days  = floor(t/(3600*24))
    t = t - days*24*3600
    hours = floor(t/3600)
    t = t - hours*3600
    minutes = floor(t/60)
    t = t - minutes*60

    return f"{days} days, {hours} hours, {minutes} mins, {floor(t)} sec"

print(f"server_memory={server_memory}")

server_memory=176000000000


In [13]:
def regen_msg_time(nsenders,
                   nreceivers,
                   hashes_sec_core,
                   dict_add_sec,
                   difficulty,
                   nhashes_stored):
    """ return number of seconds needed to regenerate the long message"""
    nsecs_sender = nhashes_stored / (nsenders*hashes_sec_core)
    nsecs_receiver = (nhashes_stored/(2**difficulty)) 
    nsecs_receiver = nsecs_receiver / (nreceivers*dict_add_sec)
#     print(f"nhashes=2^{log2(nhashes_stored)}")
#     print(f"nsecs_sender={nsecs_sender}")
#     print(f"nsecs_receiver={nsecs_receiver}")
    return max(nsecs_receiver, nsecs_sender)


def regen_msg_l(difficulty,
                nhashes_stored):
    """ return the max l can be constructed with difficulty """
    from math import log2
    
    return log2( (nhashes_stored/(2**difficulty)) )
    
def nqueries_sender(nsenders, hashes_sec_core, difficulty):
    """ Return how many queries senders can generate per second """

    return nsenders*hashes_sec_core/(2**difficulty)


def nqueries_receiver(nreceivers, dict_queries_sec):
    """
    Return how many queries receivers can make in a second
    """
    return nreceivers * dict_queries_sec

def phase_i_time(l, difficulty, hashes_sec_phase_i):
    """
    Return how many seconds it takes to complete phase_i
    """
    return 2^l * 2^difficulty / (hashes_sec_phase_i)


def nqueries_sender(nsenders, hashes_sec_core, difficulty):
    """ Return how many queries senders can generate per second """

    return nsenders*hashes_sec_core/(2**difficulty)


def nqueries_receiver(nreceivers, dict_queries_sec):
    """
    Return how many queries receivers can make in a second
    """
    return nreceivers * dict_queries_sec


def nqueries(n, # as number of bits
             nsenders,
             nreceivers,
             nhashes_stored,
             hashes_sec_core, 
             dict_queries_sec,
             difficulty):
    nqueries_sec =  min(nqueries_sender(nsenders, hashes_sec_core, difficulty),
                      nqueries_receiver(nreceivers, dict_queries_sec) )
    
    return 2**n / (nqueries_sec*nhashes_stored)

In [4]:
# regen_msg_time(1,
#                1,
#                hashes_sec_core,
#                dict_add_sec,
#                6,
#                2^29)
# seconds_2_time(phase_i_time(40, 0, hashes_sec_phase_i))

In [5]:
# phase_i time?
# phase_ii time?

In [23]:
print("Regenerating the long message will take ... ")
from math import log2
nservers = 16
ncores_per_server = 80
nhashes_stored = 2^40
n = 64
best_total_time = float('inf')
best_nsenders = 0
best_nreceivers = 0
best_diff = 0

for nreceivers in range(nservers, nservers*ncores_per_server - nservers, nservers):
    nsenders = (ncores_per_server)*nservers - nreceivers
    #nreceivers = 2*nservers
    for diff in range(10):

        time_rgen_msg = regen_msg_time(nsenders,
                                      nreceivers, # nsenders
                                      hashes_sec_core,
                                      dict_add_sec,
                                      diff, # difficulty
                                      nhashes_stored,
                                     ) # 25 our experimented interval, 4 because sha2-16way, log2(nsenders) that how  we truncate the #states 

        time_phase_i  = phase_i_time(log2(nhashes_stored), 0, hashes_sec_phase_i) # no need to account for difficulty
        time_phase_ii =  nqueries(n, nsenders, nreceivers, nhashes_stored,
                                  hashes_sec_core, dict_queries_sec, diff)
        total_time = time_phase_i+time_rgen_msg+time_phase_ii
        
        
        if total_time < best_total_time and diff < 8:
            best_total_time = total_time
            best_nsenders = nsenders
            best_nreceivers = nreceivers
            best_diff = diff
            
        print("-------------------------------------------------------------------------------------")
        print(f"difficulty={diff} => {round(time_rgen_msg)} sec, nreceivers={nreceivers}, nsenders={nsenders}")
        print(f"phase_i={round(time_phase_i)} sec, phase_ii={round(time_phase_ii)} sec")
        print(f"total time = {seconds_2_time(total_time)}")
        
    print("=============================================")

Regenerating the long message will take ... 
-------------------------------------------------------------------------------------
difficulty=0 => 6165 sec, nreceivers=16, nsenders=1264
phase_i=32768.0 sec, phase_ii=0 sec
total time = 0 days, 10 hours, 48 mins, 53 sec
-------------------------------------------------------------------------------------
difficulty=1 => 3083 sec, nreceivers=16, nsenders=1264
phase_i=32768.0 sec, phase_ii=0 sec
total time = 0 days, 9 hours, 57 mins, 30 sec
-------------------------------------------------------------------------------------
difficulty=2 => 1541 sec, nreceivers=16, nsenders=1264
phase_i=32768.0 sec, phase_ii=0 sec
total time = 0 days, 9 hours, 31 mins, 49 sec
-------------------------------------------------------------------------------------
difficulty=3 => 771 sec, nreceivers=16, nsenders=1264
phase_i=32768.0 sec, phase_ii=0 sec
total time = 0 days, 9 hours, 18 mins, 58 sec
---------------------------------------------------------------

In [24]:

print("-------------------------------------------------------------------------------------")
print(f"difficulty={best_diff} => nreceivers={best_nreceivers}, nsenders={best_nsenders}")
print(f"total time = {seconds_2_time(best_total_time)}")

-------------------------------------------------------------------------------------
difficulty=7 => nreceivers=32, nsenders=1248
total time = 0 days, 9 hours, 6 mins, 34 sec


In [7]:
def regen_msg_time(nsenders,
                   nreceivers,
                   hashes_sec_core,
                   dict_add_sec,
                   difficulty,
                   nhashes_stored):
    
    """ return number of seconds needed to regenerate the long message"""
    nsecs_sender = nhashes_stored / (nsenders*hashes_sec_core)
    nsecs_receiver = (nhashes_stored/(2**difficulty)) 
    nsecs_receiver = nsecs_receiver / (nreceivers*dict_add_sec)
    
    return max(nsecs_receiver, nsecs_sender)


def regen_msg_l(difficulty,
                nhashes_stored):
    """ return the max l can be constructed with difficulty """
    from math import log2
    
    return log2( (nhashes_stored/(2**difficulty)) )
    

def largest_n(l,
              nsenders,
              nreceivers,
              dict_queries_sec,
              hashes_sec_core,
              difficulty,
              t_sec):

    """
    Given an attack parameter what is the largest n can be attacked in t_sec
    """
    from math import log2

    nqueries_sec = min(nqueries_sender(nsenders, hashes_sec_core, difficulty),
                   nqueries_receiver(nreceivers, dict_queries_sec))

    return log2(nqueries_sec*t_sec) + l + difficulty



def find_best_parameters(nservers,
              server_memory,
              ncores_per_server,
              nhashes_stored,
              dict_queries_sec,
              dict_add_sec,
              hashes_sec_core,
              hashes_sec_phase_i,
              t_sec,
              phase_i_timeout=365*24*60*60):
    """
    Find the attack parameters that can attack the largest possible n in t_sec
    return dictionary contains attack parameters.
    phase_i_timeout by default 365 days, since it can be done offline
    phase_ii_reconstruct_timeout 
    """

    from math import log2
    from itertools import product

    memory = nservers * server_memory
    val_size_bytes = 4 # one entry size in the dictionary
    filling_rate = 0.93 # how many slots of the dictionary are used
    l_max = log2(filling_rate * memory / val_size_bytes)

    ncores = nservers * ncores_per_server

    best_difficulty = 0
    best_n = 0 # optimize: find largest n
    best_nsenders = 0
    best_time_phase_i = float('inf')
    largest_difficulty = 40
    best_l = 0
    
    for nsenders, difficulty in product(range(1, ncores-nservers + 1), range(0, largest_difficulty)):
        nreceivers = ncores - nsenders
        l = min(l_max, regen_msg_l(difficulty, nhashes_stored))

        #print(f"l={l}, nsenders={nsenders}, difficulty={difficulty}, l_regen = {regen_msg_l( difficulty, nhashes_stored)}")
        t_rgen_msg = regen_msg_time(nsenders,
                                    nreceivers,
                                    hashes_sec_core,
                                    dict_add_sec,
                                    difficulty,
                                    nhashes_stored)
        
        t_sec_after_regen_msg = t_sec - t_rgen_msg
        if ( t_sec_after_regen_msg <= 0):
            continue # skip this iteration since all time have been spent on regenrating the long message
        
        
        n = largest_n(l,
                      nsenders,
                      nreceivers,
                      dict_queries_sec,
                      hashes_sec_core,
                      difficulty,
                      t_sec_after_regen_msg)


        # better n, always update
        t_phase_i = phase_i_time(l, difficulty, hashes_sec_phase_i)
        
        if (n > best_n  and t_phase_i <= phase_i_timeout):
            best_n = n
            best_l = l
            best_difficulty = difficulty
            best_nsenders = nsenders
            #print(f"better_n = {best_n}, better_l={best_l}, better_difficulty={best_difficulty}, better_nsenders={best_nsenders}, t_phase_i={seconds_2_time(t_phase_i)}")
            


    return {"n": best_n, "l": best_l,
            "difficulty": best_difficulty,
            "nsenders": best_nsenders,
            "nreceivers": ncores - nsenders}

In [8]:
%%time
# best parameters
best_parms = find_best_parameters(nservers,
                     server_memory,
                     ncores_per_server,
                     nhashes_stored,
                     dict_queries_sec,
                     dict_add_sec,
                     hashes_sec_core,
                     hashes_sec_phase_i,
                     t_sec)

print(best_parms)

nsenders = best_parms["nsenders"]
nreceivers = best_parms["nreceivers"]
difficulty = best_parms["difficulty"]
l = best_parms["l"]

{'n': 76.41361356496108, 'l': 32.0, 'difficulty': 0, 'nsenders': 4, 'nreceivers': 1}
CPU times: user 25 ms, sys: 60 µs, total: 25.1 ms
Wall time: 24.8 ms


In [9]:
# how long phase_i will take using the best parameters?
seconds_2_time(phase_i_time(l, difficulty, hashes_sec_phase_i))

'0 days, 0 hours, 2 mins, 8 sec'

In [10]:
# how long regenerating the long message again will take?
seconds_2_time(regen_msg_time(nsenders,
                   nreceivers,
                   hashes_sec_core,
                   dict_add_sec,
                   difficulty,
                   nhashes_stored))

'0 days, 0 hours, 6 mins, 25 sec'

In [11]:
from math import log2
dict_queries_sec = 2^22.963350
log2(33767312.397134)

25.00912402135891