From a6f4f9502215ce6ab8e4dd82c15cb9a0fdbd1d3e Mon Sep 17 00:00:00 2001 From: willcl-ark Date: Mon, 16 Oct 2023 21:43:52 +0100 Subject: [PATCH 1/2] warnet: generate as files --- src/external/__init__.py | 0 src/external/buildmap.py | 334 +++++++++++++++++++++++++++++++++++++++ src/warnet/tank.py | 8 + src/warnet/utils.py | 8 + src/warnet/warnet.py | 46 +++++- 5 files changed, 394 insertions(+), 2 deletions(-) create mode 100644 src/external/__init__.py create mode 100644 src/external/buildmap.py diff --git a/src/external/__init__.py b/src/external/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/external/buildmap.py b/src/external/buildmap.py new file mode 100644 index 000000000..6095df5c8 --- /dev/null +++ b/src/external/buildmap.py @@ -0,0 +1,334 @@ +""" + +TAKEN FROM https://github.com/sipa/asmap/tree/nextgen + +""" + +""" +Intake a map of IP prefixes -> AS numbers and output instructions that will +allow a decoder to match an IP address to an ASN by following a sequence +of instructions. + +The instructions describe a prefix tree that can be navigated using the bits of +an IP address (i.e. 0 for left child, 1 for right child, leaf nodes +corresponding to a given ASN). The types of instructions are denoted by the +*Type() functions defined below. Once an IP address specifies a bit for which +there is no path in the tree (i.e. the part of its address more specific than +any known network prefix), the tree returns a "default" ASN value that has been +set based on the last valid location in the tree. + +See `testmap.py:Interpret` for an illustration of how the decoding process +works. + +Before the prefix tree is encoded into instructions using bits, it is compacted +(e.g. duplicate subtrees removed) and annotated with which default ASN values +should be set for particular regions of the tree. +""" +import sys +import ipaddress +from collections import namedtuple +from typing import Counter + + +def Parse(entries: list): + """ + Read in a file of the format + + 1.0.0.0/24 AS13335 # ipv4.dump:4856343 + 1.0.4.0/22 AS56203 # ipv4.dump:2759291 + ... + + Ignoring comments following '#'. Creates an Entry object for each line. + Maps IPv4 networks into IPv6 space. + + Args: + entries: modified in place with the new Entrys. + """ + for line in sys.stdin: + line = line.split('#')[0].lstrip(' ').rstrip(' \r\n') + prefix, asn = line.split(' ') + assert(len(asn) > 2 and asn[:2] == "AS") + network = ipaddress.ip_network(prefix) + + prefix_len = network.prefixlen + net_addr = int.from_bytes(network.network_address.packed, 'big') + + # Map an IPv4 prefix into IPv6 space. + if isinstance(network, ipaddress.IPv4Network): + prefix_len += 96 + net_addr += 0xffff00000000 + + entries.append(Entry(prefix_len, net_addr, int(asn[2:]))) + + +Entry = namedtuple('Entry', ( + # The length of the network prefix in bits. E.g. '26' for 255.255.0.0/26. + 'prefix_len', + + # An int containing the bits of the network address. + 'net_addr', + + # An int for the autonomous system (AS) number. + 'asn', +)) + + +def UpdateTree(gtree, addrlen: int, entries: [Entry]): + """ + Returns a prefix tree such that following a path down through the + tree based on the bits of a network prefix (in order of most significant + bit) leads to an ASN. + + Args: + gtree: tree structure to encode the mappings into. Modified in-place. + addrlen: The maximum number of bits in a network address. + This is 128 for IPv6 (16 bytes). + entries: The network prefix -> ASN mappings to encode. + """ + for prefix, val, asn in sorted(entries): + tree = gtree + default = None + + # Iterate through each bit in the network prefix, starting with the + # most significant bit. + for i in range(prefix): + bit = (val >> (addrlen - 1 - i)) & 1 + + # If we have passed the end of the network prefix, all entries + # under subsequent bits will be associated with the same ASN. + needs_inner = i < prefix - 1 + if tree[bit] is None: + if needs_inner: + tree[bit] = [default, default] + tree = tree[bit] + continue + else: + tree[bit] = asn + break + if isinstance(tree[bit], list): + assert(needs_inner) + tree = tree[bit] + continue + assert(isinstance(tree[bit], int)) + if tree[bit] == asn: + break + if not needs_inner: + tree[bit] = asn + break + default = tree[bit] + tree[bit] = [default, default] + tree = tree[bit] + return gtree + + +def CompactTree(tree, approx=True) -> (list, set): + """ + Remove redundancy from a tree. + + E.g. if all nodes in a subtree point to the same ASN, compact the subtree + into a single int. + + Returns: + (the compacted tree, a set of all ASNs in the tree) + + Args: + approx: if True, unassigned ranges may get reassigned to arbitrary ASNs. + """ + num = 0 + if tree is None: + return (tree, set()) + if isinstance(tree, int): + return (tree, set([tree])) + tree[0], leftas = CompactTree(tree[0], approx) + tree[1], rightas = CompactTree(tree[1], approx) + allas = leftas | rightas + if len(allas) == 0: + return (None, allas) + if approx and len(allas) == 1: + return (list(allas)[0], allas) + if isinstance(tree[0], int) and isinstance(tree[1], int) and tree[0] == tree[1]: + return tree[0], set([tree[0]]) + return (tree, allas) + + +def PropTree(tree, approx=True) -> (list, Counter, bool): + """ + Annotate internal nodes in the tree with the most common leafs below it. + The binary serialization later uses this. + + This changes the shape of the `tree` datastructure from + `[left_child, right_child]` to `[lc, rc, max_ASN_in_tree]`. + + Returns: + (tree, Counter of ASNs in tree, whether or not tree is empty) + """ + if tree is None: + return (tree, Counter(), True) + if isinstance(tree, int): + return (tree, Counter({tree: 1}), False) + tree[0], leftcnt, leftnone = PropTree(tree[0], approx) + tree[1], rightcnt, rightnone = PropTree(tree[1], approx) + allcnt = leftcnt + rightcnt + allnone = leftnone | rightnone + maxasn, maxcount = allcnt.most_common(1)[0] + if maxcount is not None and maxcount >= 2 and (approx or not allnone): + return ([tree[0], tree[1], maxasn], Counter({maxasn: 1}), allnone) + return (tree, allcnt, allnone) + + +def EncodeBits(val, minval, bit_sizes) -> [int]: + """ + Perform a variable-length encoding of a value to bits, least significant + bit first. + + For each `bit_sizes` passed, attempt to encode the value with that number + of bits + 1. Normalize the encoded value by `minval` to potentially save + bits - the value will be corrected during decoding. + + Returns: + a list of bits representing the value to encode. + """ + val -= minval + ret = [] + for pos in range(len(bit_sizes)): + bit_size = bit_sizes[pos] + + # If the value will not fit in `bit_size` bits, absorb the largest + # value for this bitsize and continue to the next smallest size. + if val >= (1 << bit_size): + val -= (1 << bit_size) + ret += [1] + else: + # If we aren't encoding the largest possible value per the largest + # bitsize... + if (pos + 1 < len(bit_sizes)): + ret += [0] + + # Use remaining bits to encode the rest of val. + for b in range(bit_size): + ret += [(val >> (bit_size - 1 - b)) & 1] + return ret + + # Couldn't fit val into any of the bit_sizes + assert(False) + +def MatchType() -> [int]: + """ + The match instruction descends into the tree based on a bit path. If at any + point the match fails to hit a valid path through the tree, it will fail + and return the current default ASN (which changes as we move through the + tree). + """ + return EncodeType(2) + +def JumpType() -> [int]: + """ + The jump instruction allows us to quickly seek to one side of the tree + or the other. By encoding the length of the left child, we can skip over + it to the right child if need be. + """ + return EncodeType(1) + +def LeafType() -> [int]: + """The leaf instruction encodes an ASN at the end of a bit path.""" + return EncodeType(0) + +def SetNewDefaultType() -> [int]: + """ + This instruction establishes a new default ASN to return should we fail + while traversing this path. + """ + return EncodeType(3) + +def EncodeType(v) -> [int]: + return EncodeBits(v, 0, [0, 0, 1]) + +def EncodeASN(v) -> [int]: + # It's reasonable to ask why "15" (indicating 16 bits) is the minimum size + # we might try to pack an ASN into, given there are many ASNs below 2**16. + # + # The reason that we start at 15 here is because we want the first bitsize + # we specify to contain ~50% of the values we are trying to encode - this + # is because each separate bitsize we try will add a digit to our encoded + # values, so we simultaneously want to minimize the number of bitsizes we + # allow while also minimizing the bit length of the encoded data, which + # is a trade-off. + return EncodeBits(v, 1, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24]) + +def EncodeMatch(v) -> [int]: + return EncodeBits(v, 2, [1, 2, 3, 4, 5, 6, 7, 8]) + +def EncodeJump(v) -> [int]: + return EncodeBits(v, 17, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]) + +def EncodeBytes(bits) -> [int]: + """Encode a sequence of bits as a sequence of bytes.""" + val = 0 + nbits = 0 + bytes = [] + for bit in bits: + val += (bit << nbits) + nbits += 1 + if (nbits == 8): + bytes += [val] + val = 0 + nbits = 0 + if nbits: + bytes += [val] + return bytes + +def TreeSer(tree, default): + match = 1 + assert(tree is not None) + assert(not (isinstance(tree, int) and tree == default)) + + # If one side of the tree is empty (i.e. represents a path without + # choices), encode a match instruction up to 8 bits. + while isinstance(tree, list) and match <= 0xFF: + if tree[0] is None or tree[0] == default: + match = (match << 1) + 1 + tree = tree[1] + elif tree[1] is None or tree[1] == default: + match = (match << 1) + 0 + tree = tree[0] + else: + break + if match >= 2: + return MatchType() + EncodeMatch(match) + TreeSer(tree, default) + + # Leaf node: return the ASN. + if isinstance(tree, int): + return LeafType() + EncodeASN(tree) + + # Return the tree along with a new "default" ASN value should we fail to + # match while along this path. + if len(tree) > 2 and tree[2] != default: + return SetNewDefaultType() + EncodeASN(tree[2]) + TreeSer(tree, tree[2]) + + left = TreeSer(tree[0], default) + right = TreeSer(tree[1], default) + + # Start the program by specifying a possible jump to either child of the + # first node. + return JumpType() + EncodeJump(len(left)) + left + right + +def BuildTree(entries, approx=True): + tree = [None, None] + tree = UpdateTree(tree, 128, entries) + return tree + +if __name__ == "__main__": + entries: [Entry] = [] + print("[INFO] Loading", file=sys.stderr) + Parse(entries) + print("[INFO] Read %i prefixes" % len(entries), file=sys.stderr) + print("[INFO] Constructing trie", file=sys.stderr) + tree = BuildTree(entries) + print("[INFO] Compacting tree", file=sys.stderr) + tree, _ = CompactTree(tree, True) + print("[INFO] Computing inner prefixes", file=sys.stderr) + tree, _, _ = PropTree(tree, True) + + ser = TreeSer(tree, None) + print("[INFO] Total bits: %i" % (len(ser)), file=sys.stderr) + sys.stdout.buffer.write(bytes(EncodeBytes(ser))) diff --git a/src/warnet/tank.py b/src/warnet/tank.py index d6698cc66..00d6caa1b 100644 --- a/src/warnet/tank.py +++ b/src/warnet/tank.py @@ -15,6 +15,7 @@ from warnet.utils import ( exponential_backoff, generate_ipv4_addr, + generate_as, sanitize_tc_netem_command, dump_bitcoin_conf, SUPPORTED_TAGS, @@ -57,6 +58,7 @@ def __init__(self, index:int, config_dir: Path, warnet): self._container = None self._suffix = None self._ipv4 = None + self._a_system = None self._container_name = None self._exporter_name = None self.extra_build_args = "" @@ -125,6 +127,12 @@ def ipv4(self): self._ipv4 = generate_ipv4_addr(self.warnet.subnet) return self._ipv4 + @property + def autonomous_system(self): + if self._a_system is None: + self._a_system = generate_as(self.warnet) + return self._a_system + @property def container_name(self): if self._container_name is None: diff --git a/src/warnet/utils.py b/src/warnet/utils.py index 478e8f861..a446e6d73 100644 --- a/src/warnet/utils.py +++ b/src/warnet/utils.py @@ -123,6 +123,14 @@ def is_public(ip): return ip_str +def generate_as(warnet): + while True: + as_number = random.randint(1, 64496) # I think these are not "reserved" + if as_number not in warnet.a_systems: + warnet.a_systems.add(as_number) + return as_number + + def sanitize_tc_netem_command(command: str) -> bool: """ Sanitize the tc-netem command to ensure it's valid and safe to execute, as we run it as root on a container. diff --git a/src/warnet/warnet.py b/src/warnet/warnet.py index e27e756cb..fc7b74a5c 100644 --- a/src/warnet/warnet.py +++ b/src/warnet/warnet.py @@ -4,14 +4,17 @@ import docker import logging +import io import networkx import shutil import subprocess +import sys import yaml from pathlib import Path from templates import TEMPLATES from typing import List, Optional +import external.buildmap as buildmap from services.prometheus import Prometheus from services.node_exporter import NodeExporter from services.grafana import Grafana @@ -21,8 +24,12 @@ from warnet.tank import Tank from warnet.utils import parse_bitcoin_conf, gen_config_dir, bubble_exception_str, version_cmp_ge -logger = logging.getLogger("warnet") + FO_CONF_NAME = "fork_observer_config.toml" +ASMAP_TXT_PATH = "asmap.txt" +ASMAP_DAT_PATH = "asmap.dat" + +logger = logging.getLogger("warnet") logging.getLogger("docker.utils.config").setLevel(logging.WARNING) logging.getLogger("docker.auth").setLevel(logging.WARNING) @@ -38,7 +45,9 @@ def __init__(self, config_dir): self.graph: Optional[networkx.Graph] = None self.graph_name = "graph.graphml" self.tanks: List[Tank] = [] - + self.a_systems = set() + self.as_map_txt_path = config_dir / ASMAP_TXT_PATH + self.as_map_dat_path = config_dir / ASMAP_DAT_PATH def __str__(self) -> str: template = "\t%-8.8s%-25.24s%-25.24s%-25.24s%-18.18s\n" @@ -122,6 +131,39 @@ def tanks_from_graph(self): ) self.tanks.append(Tank.from_graph_node(node_id, self)) logger.info(f"Imported {len(self.tanks)} tanks from graph") + self.generate_as_map() + + def generate_as_map(self): + # Write AS mappings to file + with open(self.as_map_txt_path, "w") as f: + for tank in self.tanks: + f.write(f"{tank.ipv4}/32 AS{tank.autonomous_system}\n") + + # Yes, read back into a string... + with open(self.as_map_txt_path, "r") as f: + file_content = f.read() + + buffer = io.StringIO(file_content) + sys.stdin = buffer + + entries = [] + logger.info("AS map: Loading") + buildmap.Parse(entries) + logger.info(f"AS map: Read {len(entries)} prefixes") + logger.info("AS map: Constructing trie") + tree = buildmap.BuildTree(entries) + logger.info("AS map: Compacting tree") + tree, _ = buildmap.CompactTree(tree, True) + logger.info("AS map: Computing inner prefixes") + tree, _, _ = buildmap.PropTree(tree, True) + + ser = buildmap.TreeSer(tree, None) + logger.info(f"AS map: Total bits: {len(ser)}") + with open(self.as_map_dat_path, "wb") as f: + f.write(bytes(buildmap.EncodeBytes(ser))) + + # Reset sys.stdin to its original state + sys.stdin = sys.__stdin__ @bubble_exception_str def write_bitcoin_confs(self): From 06fc25614ea82bd0dc22e344e3d1d6252e0469ed Mon Sep 17 00:00:00 2001 From: willcl-ark Date: Mon, 16 Oct 2023 21:57:27 +0100 Subject: [PATCH 2/2] use asmap.dat for ip bucketing --- src/templates/Dockerfile | 1 + src/templates/bitcoin.conf | 1 + src/warnet/tank.py | 7 +++++++ src/warnet/warnet.py | 9 +++++---- 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/templates/Dockerfile b/src/templates/Dockerfile index c980bc4b2..7b3d6c293 100644 --- a/src/templates/Dockerfile +++ b/src/templates/Dockerfile @@ -81,6 +81,7 @@ COPY --from=builder /usr/local/bin/bitcoind /usr/local/bin/bitcoin-cli /usr/loca COPY docker_entrypoint.sh /entrypoint.sh COPY torrc /etc/tor/torrc COPY bitcoin.conf /home/bitcoin/.bitcoin/bitcoin.conf +COPY asmap.dat /home/bitcoin/.bitcoin/regtest/asmap.dat VOLUME ["/home/bitcoin/.bitcoin"] EXPOSE 8332 8333 18332 18333 18443 18444 38333 38332 diff --git a/src/templates/bitcoin.conf b/src/templates/bitcoin.conf index 3e79d559e..2c10b9aaa 100644 --- a/src/templates/bitcoin.conf +++ b/src/templates/bitcoin.conf @@ -24,3 +24,4 @@ fallbackfee=0.00001000 # P2P onion=127.0.0.1:9050 listen=1 +asmap=asmap.dat diff --git a/src/warnet/tank.py b/src/warnet/tank.py index 00d6caa1b..939ee7ee9 100644 --- a/src/warnet/tank.py +++ b/src/warnet/tank.py @@ -30,6 +30,7 @@ TORRC_NAME = "torrc" WARNET_ENTRYPOINT_NAME = "warnet_entrypoint.sh" DOCKER_ENTRYPOINT_NAME = "docker_entrypoint.sh" +ASMAP_DAT_NAME = "asmap.dat" logger = logging.getLogger("tank") @@ -55,6 +56,7 @@ def __init__(self, index:int, config_dir: Path, warnet): self.torrc_path = config_dir / TORRC_NAME self.warnet_entrypoint = config_dir / WARNET_ENTRYPOINT_NAME self.docker_entrypoint = config_dir / DOCKER_ENTRYPOINT_NAME + self.as_map_dat_path = config_dir / ASMAP_DAT_NAME self._container = None self._suffix = None self._ipv4 = None @@ -225,10 +227,15 @@ def copy_dockerfile(self): assert self.dockerfile_path shutil.copyfile(Path(TEMPLATES) / DOCKERFILE_NAME, self.dockerfile_path) + def copy_asmap(self): + assert self.dockerfile_path + shutil.copyfile(self.warnet.as_map_dat_path, self.as_map_dat_path) + def copy_configs(self): self.copy_torrc() self.copy_entrypoints() self.copy_dockerfile() + self.copy_asmap() def add_services(self, services): assert self.index is not None diff --git a/src/warnet/warnet.py b/src/warnet/warnet.py index fc7b74a5c..00c7a35b3 100644 --- a/src/warnet/warnet.py +++ b/src/warnet/warnet.py @@ -26,8 +26,9 @@ FO_CONF_NAME = "fork_observer_config.toml" -ASMAP_TXT_PATH = "asmap.txt" -ASMAP_DAT_PATH = "asmap.dat" +ASMAP_TXT_NAME = "asmap.txt" +ASMAP_DAT_NAME = "asmap.dat" + logger = logging.getLogger("warnet") logging.getLogger("docker.utils.config").setLevel(logging.WARNING) @@ -46,8 +47,8 @@ def __init__(self, config_dir): self.graph_name = "graph.graphml" self.tanks: List[Tank] = [] self.a_systems = set() - self.as_map_txt_path = config_dir / ASMAP_TXT_PATH - self.as_map_dat_path = config_dir / ASMAP_DAT_PATH + self.as_map_txt_path = config_dir / ASMAP_TXT_NAME + self.as_map_dat_path = config_dir / ASMAP_DAT_NAME def __str__(self) -> str: template = "\t%-8.8s%-25.24s%-25.24s%-25.24s%-18.18s\n"