# Best attack parameters given a hardware

We assume the attack parameters are $n$ and $l$. Also, denote $g$ to be the number of ignored bits. 

$p = \frac{2^l}{2^n}$ , this is a geometric random variable, thus we expect a collision after   $\#queries = \frac{2^n}{2^l} $

Assume, we only accept digests that have certain number of zeros, denoted as $d$. Thus, we can pretend as we are working on small digests
$$
\begin{align}
&\#queries = \frac{2^n}{2^l} \\
&\#queries_{sec} \cdot t_{sec} = \frac{2^n}{2^l} \\
\Rightarrow &n = log2\left(\#queries_{sec} \cdot t_{sec} \cdot 2^{n-l-d}  \right)
\end{align}
$$


We have three point of views of $\#queries$
- Senders: How many hashes they generate? 
    - Their speed will be affected by difficulty, but from their perspective the overall attack time doesn't change if the difficulty change (add explanation, later)
    - $\#snd\_queries_{sec} = \frac{\#senders \cdot \#gen\_hashes_{sec}} {2^{d}}$

- Receivers: How many hashes they can query the dicitonary. 
    - In their world, the higher the difficulty the better chance of hitting collision (since digests are technically shorter).
    - $\#rcv\_queries_{sec} = \#receivers \cdot \#dict\_queries_{sec} $

- Bandwith: This is how many hashes the network can carry in a second. 
    - From their perspective, difficulty reduces the rate of transmitted messages. 
    - $bdwth_queries_{sec} $


Thus,

$$\#queries_{sec} := min\left(snd\_queries_{sec}, rcv\_queries_{sec}, bdwth\_queries_{sec}\right)$$


In [1]:
# Numbers from Gros cluster, nancy, grid5000.fr
# find_dist overhead = 7%-9%
# hash_16x_avx (nancy: gros) ≈ 2^24.87hash/sec
# dict_add≈2^23.41 elm/sec
# dict_lookup ≈ 2^24.9337 elm/sec
# mpi_recv overhead = 10.71% (for regenerating message)


nservers = 124
server_memory = (96 - 20)*10^9 # 96 GB
ncores_per_server = 18
hashes_sec_core = 2^24.87
dict_queries_sec = 2^21.963350
t_sec =  2 * 24 * 3600
# how many hashes can oure compressed file 
nhashes_stored = 2^60
hashes_sec_phase_i = 2^24.72
dict_add_sec = 2^23.41

In [2]:
# 1 core hashing power
# thd2 sha_avx512_16way  elapsed 1.78sec i.e. 898392.92 hashes/sec = 2^19.777 hashes, 57.4971 M


# Querying 100000000, took 2.22 sec i.e. 44977939.99 elm/sec = 2^25.4227 elm/sec 


def seconds_2_time(t):
    from math import floor

    t = float(t)
    days  = floor(t/(3600*24))
    t = t - days*24*3600

    hours = floor(t/3600)
    t = t - hours*3600
    minutes = floor(t/60)
    t = t - minutes*60

    return f"{days} days, {hours} hours, {minutes} mins, {floor(t)} sec"

print(f"server_memory={server_memory}")

server_memory=76000000000


In [3]:
def regen_msg_time(nsenders,
                   nreceivers,
                   hashes_sec_core,
                   dict_add_sec,
                   difficulty,
                   nhashes_stored):
    """ return number of seconds needed to regenerate the long message"""
    nsecs_sender = nhashes_stored / (nsenders*hashes_sec_core)
    nsecs_receiver = (nhashes_stored/(2**difficulty)) 
    nsecs_receiver = nsecs_receiver / (nreceivers*dict_add_sec)
    
    return max(nsecs_receiver, nsecs_sender)


def regen_msg_l(difficulty,
                nhashes_stored):
    """ return the max l can be constructed with difficulty """
    from math import log2
    
    return log2( (nhashes_stored/(2**difficulty)) )
    
def nqueries_sender(nsenders, hashes_sec_core, difficulty):
    """ Return how many queries senders can generate per second """

    return nsenders*hashes_sec_core/(2**difficulty)


def nqueries_receiver(nreceivers, dict_queries_sec):
    """
    Return how many queries receivers can make in a second
    """
    return nreceivers * dict_queries_sec

def phase_i_time(l, difficulty, hashes_sec_phase_i):
    """
    Return how many seconds it takes to complete phase_i
    """
    return 2^l * 2^difficulty / (hashes_sec_phase_i)

In [4]:
regen_msg_time(1,
               1,
               hashes_sec_core,
               dict_add_sec,
               6,
               2^29)

17.5086992201718

In [10]:
print("Regenerating the long message will take ... ")
from math import log2
nservers = 8
ncores_per_server = 18
for nreceivers in range(nservers, nservers*ncores_per_server - nservers, nservers):
    nsenders = (ncores_per_server)*nservers - nreceivers
    #nreceivers = 2*nservers
    for diff in range(10):

        time_needed = regen_msg_time(nsenders,
                                     nreceivers, # nsenders
                                     hashes_sec_core,
                                     dict_add_sec,
                                     diff, # difficulty
                                     2^(30+4+log2(nsenders)) # nhashes stored
                                    ) # 25 our experimented interval, 4 because sha2-16way, log2(nsenders) that how  we truncate the #states 

        print(f"difficulty={diff} => {time_needed} sec, nreceivers={nreceivers}, nsenders={nsenders}")
    print("=============================================")

Regenerating the long message will take ... 
difficulty=0 => 26203.3353789319 sec, nreceivers=8, nsenders=136
difficulty=1 => 13101.6676894660 sec, nreceivers=8, nsenders=136
difficulty=2 => 6550.83384473298 sec, nreceivers=8, nsenders=136
difficulty=3 => 3275.41692236649 sec, nreceivers=8, nsenders=136
difficulty=4 => 1637.70846118324 sec, nreceivers=8, nsenders=136
difficulty=5 => 818.854230591622 sec, nreceivers=8, nsenders=136
difficulty=6 => 560.278375045500 sec, nreceivers=8, nsenders=136
difficulty=7 => 560.278375045500 sec, nreceivers=8, nsenders=136
difficulty=8 => 560.278375045500 sec, nreceivers=8, nsenders=136
difficulty=9 => 560.278375045500 sec, nreceivers=8, nsenders=136
difficulty=0 => 12330.9813547915 sec, nreceivers=16, nsenders=128
difficulty=1 => 6165.49067739573 sec, nreceivers=16, nsenders=128
difficulty=2 => 3082.74533869787 sec, nreceivers=16, nsenders=128
difficulty=3 => 1541.37266934893 sec, nreceivers=16, nsenders=128
difficulty=4 => 770.686334674466 sec, nre

In [6]:
def regen_msg_time(nsenders,
                   nreceivers,
                   hashes_sec_core,
                   dict_add_sec,
                   difficulty,
                   nhashes_stored):
    """ return number of seconds needed to regenerate the long message"""
    nsecs_sender = nhashes_stored / (nsenders*hashes_sec_core)
    nsecs_receiver = (nhashes_stored/(2**difficulty)) 
    nsecs_receiver = nsecs_receiver / (nreceivers*dict_add_sec)
    
    return max(nsecs_receiver, nsecs_sender)


def regen_msg_l(difficulty,
                nhashes_stored):
    """ return the max l can be constructed with difficulty """
    from math import log2
    
    return log2( (nhashes_stored/(2**difficulty)) )
    
def nqueries_sender(nsenders, hashes_sec_core, difficulty):
    """ Return how many queries senders can generate per second """

    return nsenders*hashes_sec_core/(2**difficulty)


def nqueries_receiver(nreceivers, dict_queries_sec):
    """
    Return how many queries receivers can make in a second
    """
    return nreceivers * dict_queries_sec

def phase_i_time(l, difficulty, hashes_sec_phase_i):
    """
    Return how many seconds it takes to complete phase_i
    """
    return 2^l * 2^difficulty / (hashes_sec_phase_i)


def largest_n(l,
              nsenders,
              nreceivers,
              dict_queries_sec,
              hashes_sec_core,
              difficulty,
              t_sec):

    """
    Given an attack parameter what is the largest n can be attacked in t_sec
    """
    from math import log2

    nqueries_sec = min(nqueries_sender(nsenders, hashes_sec_core, difficulty),
                   nqueries_receiver(nreceivers, dict_queries_sec))

    return log2(nqueries_sec*t_sec) + l + difficulty



def find_best_parameters(nservers,
              server_memory,
              ncores_per_server,
              nhashes_stored,
              dict_queries_sec,
              dict_add_sec,
              hashes_sec_core,
              hashes_sec_phase_i,
              t_sec,
              phase_i_timeout=365*24*60*60):
    """
    Find the attack parameters that can attack the largest possible n in t_sec
    return dictionary contains attack parameters.
    phase_i_timeout by default 365 days, since it can be done offline
    phase_ii_reconstruct_timeout 
    """

    from math import log2
    from itertools import product

    memory = nservers * server_memory
    val_size_bytes = 4 # one entry size in the dictionary
    filling_rate = 0.93 # how many slots of the dictionary are used
    l_max = log2(filling_rate * memory / val_size_bytes)

    ncores = nservers * ncores_per_server

    best_difficulty = 0
    best_n = 0 # optimize: find largest n
    best_nsenders = 0
    best_time_phase_i = float('inf')
    largest_difficulty = 40
    best_l = 0
    
    for nsenders, difficulty in product(range(1, ncores-nservers + 1), range(0, largest_difficulty)):
        nreceivers = ncores - nsenders
        l = min(l_max, regen_msg_l(difficulty, nhashes_stored))

        #print(f"l={l}, nsenders={nsenders}, difficulty={difficulty}, l_regen = {regen_msg_l( difficulty, nhashes_stored)}")
        t_rgen_msg = regen_msg_time(nsenders,
                                    nreceivers,
                                    hashes_sec_core,
                                    dict_add_sec,
                                    difficulty,
                                    nhashes_stored)
        
        t_sec_after_regen_msg = t_sec - t_rgen_msg
        if ( t_sec_after_regen_msg <= 0):
            continue # skip this iteration since all time have been spent on regenrating the long message
        
        
        n = largest_n(l,
                      nsenders,
                      nreceivers,
                      dict_queries_sec,
                      hashes_sec_core,
                      difficulty,
                      t_sec_after_regen_msg)


        # better n, always update
        t_phase_i = phase_i_time(l, difficulty, hashes_sec_phase_i)
        
        if (n > best_n  and t_phase_i <= phase_i_timeout):
            best_n = n
            best_l = l
            best_difficulty = difficulty
            best_nsenders = nsenders
            #print(f"better_n = {best_n}, better_l={best_l}, better_difficulty={best_difficulty}, better_nsenders={best_nsenders}, t_phase_i={seconds_2_time(t_phase_i)}")
            


    return {"n": best_n, "l": best_l,
            "difficulty": best_difficulty,
            "nsenders": best_nsenders,
            "nreceivers": ncores - nsenders}

In [7]:
%%time
# best parameters
best_parms = find_best_parameters(nservers,
                     server_memory,
                     ncores_per_server,
                     nhashes_stored,
                     dict_queries_sec,
                     dict_add_sec,
                     hashes_sec_core,
                     hashes_sec_phase_i,
                     t_sec)

print(best_parms)

nsenders = best_parms["nsenders"]
nreceivers = best_parms["nreceivers"]
difficulty = best_parms["difficulty"]
l = best_parms["l"]

{'n': 0, 'l': 0, 'difficulty': 0, 'nsenders': 0, 'nreceivers': 8}
CPU times: user 34.8 ms, sys: 0 ns, total: 34.8 ms
Wall time: 34.1 ms


In [8]:
# how long phase_i will take using the best parameters?
seconds_2_time(phase_i_time(l, difficulty, hashes_sec_phase_i))

'0 days, 0 hours, 0 mins, 0 sec'

In [9]:
# how long regenerating the long message again will take?
seconds_2_time(regen_msg_time(nsenders,
                   nreceivers,
                   hashes_sec_core,
                   dict_add_sec,
                   difficulty,
                   nhashes_stored))

OverflowError: cannot convert float infinity to integer

In [None]:
from math import log2
dict_queries_sec = 2^22.963350
log2(33767312.397134)