# System parameters

In [1]:
n_nodes = 25000
repl_factor = 20 # from rfm17
#pr_storing_area = 0 # how close is close enough???
first_gala_bucket_id = 8 # roughly, take graph from Dennis for more precision
# gala_distribution_per_bucket = {0: 0, 1: 0.001, ...}
concurrency = 10
keysize = 256

## Remarks

b255 and b256 are expected to have the same cardinality.

# Lookup process

When looking up for a random `key`, the bucket id of the closest peers will be determined as follow:
`log2(rand)` with 0 < rand < 2**256

The closest peers from this bucket are going to be the `concurrency` closest among `repl_factor`

We are interested in the closeness between the `key` and the requested `peerid` (the first one being the one of the requester.), and how close we get at each step. We expect to observe a giant step - baby step process.

taking a random bitstring is equivalent to generating a random `peerid` and a random `key` and taking the distance between them.

For the first hop: we need to compute the bucket first, then get `repl_factor` random `peerid`s fitting this bucket, and selecting `concurrency` out of them.

## Compute every `key` and `peerid` in distance to target instead of bitstring.

In [2]:
import random, math
from binary_trie import bitstring_to_int, int_to_bitstring

In [3]:
# deprecated
def random_key(n=0):
    return "".join(str(random.randint(0, 1)) for i in range(keysize-n))

In [4]:
# deprecated
def bucket_for_key(bitstring):
    return keysize - math.ceil(math.log2(bitstring_to_int(bitstring)))

# Full theory

The following section is totally generic for key search. It assumes that every single key is a peerid, and that the exact target key must be found. This theory doesn't really apply to the IPFS network that has a limited number of peers, and the provider record gets stored on multiple peers

In [23]:
def random_dist(b=keysize):
    # b is the bucket from which we want the distance to be (e.g dist < 2**b)
    return random.randint(0, 2**b - 1)

In [24]:
def bucket_for_dist(d):
    if d == 0:
        return 0
    return math.floor(math.log2(d))+1

In [25]:
# returns the concurrency closest peers from the RT
def selection(dist):
    b = bucket_for_dist(dist)
    peerids = [random_dist(b) for _ in range(repl_factor)]
    #return sorted(peerids)[:concurrency]
    return sorted(peerids)

In [26]:
# returns the number of hops for a random lookup process
def lookup_process(target=None):
    if target is None:
        target = random_dist()

    n_hops = 0

    peers = selection(target)
    while True:
        n_hops += 1
        new_peers = []
        for i in range(concurrency):
            new_peers += selection(peers[i])
        peers += new_peers
        peers = sorted(peers)

        if 0 in peers:
            break

    return n_hops

In [27]:
# Example: set keysize = 16, concurrency = 2
def lookup_process_example(target=None):
    if target is None:
        target = random_dist()

    n_hops = 0

    peers = selection(target)
    while True:
        n_hops += 1
        print("Hop", n_hops)
        print("Peers =", peers)
        new_peers = []
        for i in range(concurrency):
            print("   ", peers[i])
            new_peers += selection(peers[i])
        peers += new_peers
        peers = sorted(peers)

        if 0 in peers:
            break

    print("Total hops:", n_hops)

In [10]:
lookup_process_example()

Hop 1
Peers = [3231165605114097315375346527038639285006137624564563967875683873210899667138, 9094233855787925715666046573286882292146444383554178453287980435503345368601, 10182110557591481421102507940671688231004485099675852711144867874742262617507, 14256733854169128335270309461969709332969066678501767159545092299134186428133, 17514924290765568893377281992042461765587922688452923058376483782480493577106, 28746474142264480102628724686177599260704991676632831152562042772300076975229, 28763032051505800444673556823981383863367202624834564864926356449316489505491, 38577015285292877522744037619557689510701611315347823976240927324210250239815, 60027328823745665232859464806184592278733611128931378717824062437279167318170, 62823150856630916320540504723453087674312533407595335003852282249027991327513, 75068842310104246840166796457014159414810537970774510138510853773366132770333, 86857093351823257301700079803431885946786152491850073058056757470039040410512, 879909648066809863542270548957304340435

# IPFS evaluation

In [22]:
n_nodes = 2**14
repl_factor = 20 # from rfm17
#pr_storing_area = 0 # how close is close enough???
first_gala_bucket_id = 8 # roughly, take graph from Dennis for more precision
# gala_distribution_per_bucket = {0: 0, 1: 0.001, ...}
concurrency = 10
keysize = 256

We want to compute the average. Not corner cases for now. Assume distribution is uniform.

In [14]:
distance_between_two_nodes = 2**keysize / n_nodes
pr_zone = distance_between_two_nodes * repl_factor # distance covered by a provider record

In [20]:
# returns the number of hops for a random lookup process
def ipfs_lookup_process(target=None, zone=pr_zone):
    if target is None:
        target = random_dist()

    n_hops = 0

    peers = selection(target)
    found = False
    while not found:
        n_hops += 1
        for p in peers:
            if p < zone:
                found = True

        new_peers = []
        for i in range(concurrency):
            new_peers += selection(peers[i])
        peers += new_peers
        peers_set = set(peers)
        peers = sorted(list(peers_set))

    return n_hops

In [21]:
ipfs_lookup_process()

2

In [302]:
for i in range(15, 77):
    zone = 10**i
    res = []
    for j in range(100):
        res.append(ipfs_lookup_process(zone=zone))
    print(i, sum(res)/len(res))

15 34.47
16 33.84
17 33.38
18 32.72
19 32.27
20 31.54
21 30.8
22 30.61
23 30.04
24 29.6
25 28.9
26 28.47
27 27.84
28 27.27
29 26.79
30 26.28
31 25.71
32 25.11
33 24.69
34 24.05
35 23.35
36 22.93
37 22.31
38 21.93
39 21.42
40 20.66
41 20.3
42 19.74
43 19.16
44 18.58
45 17.95
46 17.53
47 16.85
48 16.4
49 15.87
50 15.35
51 14.83
52 14.11
53 13.65
54 13.06
55 12.45
56 12.0
57 11.28
58 10.87
59 10.37
60 9.85
61 9.24
62 8.76
63 8.32
64 7.68
65 7.21
66 6.44
67 6.01
68 5.5
69 4.94
70 4.35
71 3.88
72 3.28
73 2.75
74 2.18
75 1.66
76 1.07


# Tests

This section contains test to verify the accuracy of the code above

In [135]:
# bucket for distance test

occurences = {}
keys = {}
for i in range(100000):
    k = random_dist()
    b = bucket_for_dist(k)
    if k not in keys:
        keys[k] = 0
    keys[k] += 1
    if b not in occurences:
        occurences[b] = 0
    occurences[b] += 1

for i in range(keysize+1, 0, -1):
    if i in occurences:
        # the buckets are printed as 1-b, common representation
        print(keysize - i, occurences[i])

print(keys)
print(occurences)

0 50177
1 24909
2 12440
3 6377
{0: 6097, 14: 6279, 13: 6254, 1: 6377, 3: 6243, 2: 6197, 11: 6219, 6: 6234, 8: 6311, 7: 6292, 12: 6308, 9: 6192, 15: 6362, 10: 6252, 4: 6168, 5: 6215}
{0: 6097, 4: 50177, 1: 6377, 2: 12440, 3: 24909}


# Distance distribution

In [29]:
keys = [random_dist() for _ in range(n_nodes)]
keys = sorted(keys)

In [33]:
# check for duplicate keys
for k in keys:
    count = 0
    for i in range(len(keys)):
        if keys[i] == k:
            count += 1
    if count == 2:
        print(k)

In [34]:
distances = [keys[i+1]-keys[i] for i in range(len(keys) - 1)]

In [40]:
avg_distance = sum(distances)/len(distances)
perf_distance = 2**keysize / n_nodes

In [42]:
print("Simulated", avg_distance)
print("Perfectly spaced", perf_distance)
print("Difference", float(avg_distance/perf_distance - 1), "%")

Simulated 7.0673393453532095e+72
Perfectly spaced 7.067388259113537e+72
Difference -6.921051813546519e-06 %
