# Watchtower

The Hydras are finally about to be defeated. This watchtower helps monitor the impact of the Hydras' deaths on the rest of the world.

## Features of this script

1. Query some Nebula Crawler database
2. Save useful information of the database on disk
3. Load database information from disk
4. Generate plots for selected crawls

## Nebula DB queries

In [65]:
import psycopg2, math
from sshtunnel import SSHTunnelForwarder

In [29]:
def update_peerids():
    cur.execute("select id,multi_hash from peers;")
    peerids = {x[0]:x[1] for x in cur.fetchall()} # nebulaID -> peerID
        
    to_add = []
    with open(peerids_filename, 'r') as f:
        # read existing mappings
        lines = f.readlines()
        # get the nebula IDs only
        nebIDs = [int(line.split(',')[0]) for line in lines]
        
        for p in peerids:
            # find missing peers
            if p not in nebIDs:
                # add them to to_add
                to_add.append(str(p)+','+peerids[p]+'\n')
                                    
    with open(peerids_filename, 'a') as f:
        f.writelines(to_add)

In [30]:
def get_crawl_ids():
    cur.execute("select id from crawls;")
    return [c[0] for c in cur.fetchall()]

In [31]:
def store_crawl(crawl_id, filename):
    cur.execute("select peer_id,neighbor_ids from neighbors where crawl_id="+str(crawl_id)+";")
    neighbors_rel = cur.fetchall()
    
    nebula_peers = []
    for (nebula_id, neighbors) in neighbors_rel:
        s = str(nebula_id) + ','
        for n in neighbors:
            s += str(n) + ','
        s = s[:-1] + '\n'
        nebula_peers.append(s)

    f = open(filename, 'w')
    f.writelines(nebula_peers)
    f.close()

In [41]:
def crawl_filename(db_identifier, crawl_id):
    if crawl_id == 0:
        crawl = "000"
    elif crawl_id < 100:
        l = int(math.log10(crawl_id))
        crawl = (2-l) * '0' + str(crawl_id)
    else:
        crawl = str(crawl_id)
    return data_folder + str(db_identifier) + "-crawl-" + crawl

In [42]:
data_folder = "data/"
peerids_filename = data_folder + "peerids"

In [77]:
hosts = {
    "ap_southeast_1_offset_4" : "13.212.87.17",
    "ap_southeast_1_offset_9" : "13.212.199.68",
    "eu_central_1_offset_0" : "3.69.43.200",
    "eu_central_1_offset_5" : "18.192.183.185",
    "sa_east_1_offset_3" : "18.231.132.66",
    "sa_east_1_offset_8" : "15.229.22.158",
    "us_east_1_offset_2" : "44.200.54.223",
    "us_east_1_offset_7" : "34.205.54.199",
    "us_west_1_offset_1" : "54.219.175.30",
    "us_west_1_offset_6" : "54.153.60.176"
}
host_username = "ubuntu"
ssh_key_location = ".ssh/id_rsa"

In [79]:
ssh_tunnel = SSHTunnelForwarder(
    "13.212.87.17",
    ssh_username=host_username,
    ssh_pkey=ssh_key_location,
    ssh_private_key_password="",
    remote_bind_address=("13.212.87.17", 5432)
)

2022-11-29 14:52:14,451| ERROR   | Password is required for key /home/guissou/.ssh/id_ed25519


In [43]:
db_info = "host=127.0.0.1 dbname=nebula user=nebula password=password"
db_identifier = "db0"

In [44]:
conn = psycopg2.connect(db_info)
cur = conn.cursor()

In [45]:
get_crawl_ids()

[1, 2, 3]

In [46]:
crawl_ids_to_save = [1, 3]

In [48]:
update_peerids()
for i in crawl_ids_to_save:
        store_crawl(i, crawl_filename(db_identifier, i))

## Load crawler data

In [49]:
crawl_ids_to_load = ['db0-crawl-001', 'db0-crawl-003']

In [50]:
# load nebulaID -> peerID map
with open(peerids_filename, 'r') as file:
    nebulaid_peerid = {line.split(',')[0]:line.split(',')[1][:-1] for line in file.readlines()}
    # [:-1] removes the trailing '\n'

In [53]:
nebula_neighbors = []
for crawl_filename in crawl_ids_to_load:
    # load neighbors for each crawl
    with open(data_folder+crawl_filename, 'r') as file:
        nebula_neighbors.append({line.split(',')[0]:line.split(',')[1:][:-1] for line in file.readlines()})

## Helpers

In [54]:
import multihash as mh
import hashlib as hl
from binary_trie import Trie, bytes_to_bitstring, int_to_bitstring

In [55]:
# turn a peer_id (e.g 12D3KooWEZXjE41uU4EL2gpkAQeDXYok6wghN7wwNVPF5bwkaNfS) to its
# sha256 hash representation (256 bits), used as kademlia identifier
def multihash_to_kad_id(peer_id: str) -> bytes:  
    multi_hash = mh.from_b58_string(peer_id)
    return hl.sha256(multi_hash).digest()

# XOR two bitstring of equal size, the size doesn't need to be a multiple of 8
def xor_bitstring(bs0: str, bs1: str) -> str:
    s = ""
    if len(bs0) == len(bs1):
        for i in range(len(bs0)):
            if bs0[i]==bs1[i]:
                s+='0'
            else:
                s+='1'
    return s

# returns the XOR distance (in bytes) between the two provided bytes arrays
def xor_distance(bytes0: bytes, bytes1: bytes):
    xor=bytearray()
    maxlen=max(len(bytes0), len(bytes1))

    for i in range(maxlen):
        byte0 = bytes0[i if len(bytes0)>len(bytes1) else i-len(bytes1)+len(bytes0)] if i >= maxlen-len(bytes0) else 0
        byte1 = bytes1[i if len(bytes1)>len(bytes0) else i-len(bytes0)+len(bytes1)] if i >= maxlen-len(bytes1) else 0
        xor.append(byte0 ^ byte1)

    return bytes(xor)

# get the corresponding k-bucket for the given XOR distance in bytes
def bucket_number_for_distance(d: bytes) -> int:
    count=0
    # iterate on the bytes from left to right
    for b in d:
        # while the byte==0, add 8 (bits) to the counter
        count+=8
        if b!=0:
            # at the first non null byte, shift right until this byte==0
            while b!=0:
                b>>=1
                # for each right shift, remove 1 to counter
                count-=1
            break
    # return the length of the byte string minus the number of leading 0 bits
    return 256-(8*len(d)-count)

In [56]:
class NebulaPeer:
    def __init__(self, nebula_id, peer_id, neighbors_ids):
        self.nebula_id = nebula_id
        self.peer_id = peer_id
        self.neighbors_ids = neighbors_ids
        
        self.key = multihash_to_kad_id(peer_id)
        
        self.alive = len(neighbors_ids)>0
        
        self.buckets = [[] for _ in range(257)]
        self.neighbors = {}
        
    def distance(self, p):
        return xor_distance(self.key, p.key)
        
    def addNeighbor(self, peer):
        if bytes_to_bitstring(peer.key) not in self.neighbors:
            self.neighbors[bytes_to_bitstring(peer.key)] = peer
            self.buckets[bucket_number_for_distance(self.distance(peer))].append(peer)
        
    def __str__(self):
        return "nebula_id: "+str(self.nebula_id)+", peer_id: "+str(self.peer_id)+", neighbors: "+str(self.neighbors_ids)

## Building the data structures

In [61]:
import time

In [60]:
all_crawls_peers = [] # contains all diable peers for each crawl
all_crawls_all_peers = [] # contains all peers for each crawl (including undiable ones)
all_crawls_peers_maps = []

i = 0
for nebula in nebula_neighbors:
    
    # build all peers
    peers = [NebulaPeer(n, nebulaid_peerid[n], nebula[n]) for n in nebula]
    # make a map for easy access nebulaID -> NebulaPeer
    peers_map = {p.nebula_id:p for p in peers}
    
    new_peers = {}
    for p in peers:
        for n in p.neighbors_ids:
            if n not in peers_map and n not in new_peers:
                np = NebulaPeer(n, nebulaid_peerid[n], [])
                new_peers[n] = np
                p.addNeighbor(np)
            elif n in new_peers:
                p.addNeighbor(new_peers[n])
            else:
                p.addNeighbor(peers_map[n])
        
    all_crawls_peers.append(peers)
    all_crawls_peers_maps.append(peers_map)