# Experiment 1

Hypothesis: Zeph will be able to see almost the same as complete discovery
(= full IPv4 Diamond-Miner snapshot) but with a much-reduced probing budget.

* Exhaustive cycles (will be used as ground truth)
* Zeph “exhaustive” cycles at 200 kpps (compression of full snapshot + epsilon% random)
* Zeph “constrained” cycles at 20 kpps (not enough budget to do every /24 in a day)

In [3]:
import logging
from pathlib import Path

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
script_formatter = logging.Formatter(
    "%(asctime)s :: SCRIPT :: %(levelname)s :: %(message)s"
)
stream_handler = logging.StreamHandler()
stream_handler.setLevel(logging.INFO)
stream_handler.setFormatter(script_formatter)
logger.addHandler(stream_handler)

# Directory for the experiment
pilot_dir = Path("./resources/data/measurements/exp1/")
pilot_dir.mkdir(parents=True, exist_ok=True)

file_handler = logging.FileHandler(pilot_dir / "log.txt")
file_handler.setLevel(logging.INFO)
file_handler.setFormatter(script_formatter)
logger.addHandler(file_handler)

## Configuration

In this section:

* we get the configuration of the Iris API and database
* we get the configuration of the experiment itself

In [4]:
# Get API/database credentials from config.py
from config.config import *

In [6]:
# Experiment parameters
tool = "yarrp"
epsilon = 0.1
protocol = "icmp"
min_ttl = 8
max_ttl = 32

# This can be overrided by the pilot configuration (see below)
n_cycles = 10
global_budget = 11_881_416
bgp_prefixes_path = Path("./resources/data/bgp_prefixes.pickle")

# Pilot definition (optional)

If you don't want to run the experiment on the entire universe of BGP prefixes, you can define a pilot.

In [19]:
# Enable/disable pilot experiment
enable_pilot = False

In [20]:
# If pilot is disabled, we just import the bgp prefixes
# See (https://github.com/dioptra-io/zeph) for more information about the creation of this file

import pickle

if not enable_pilot:
    with bgp_prefixes_path.open("rb") as fd:
        bgp_prefixes = pickle.load(fd)
        logger.info(f"Number of BGP prefixes {len(bgp_prefixes)}")

2021-12-30 18:24:59,715 :: SCRIPT :: INFO :: Number of BGP prefixes 922882
2021-12-30 18:24:59,715 :: SCRIPT :: INFO :: Number of BGP prefixes 922882
2021-12-30 18:24:59,715 :: SCRIPT :: INFO :: Number of BGP prefixes 922882


In [10]:
import random

def pilot_bgp_prefixes(bgp_prefixes, n_prefixes):
    current_n_prefixes = 0
    subset_bgp_prefixes = []

    random.shuffle(bgp_prefixes)

    for bgp_prefix in bgp_prefixes:
        if current_n_prefixes > n_prefixes:
            break

        subset_bgp_prefixes.append(bgp_prefix)
        current_n_prefixes += len(bgp_prefix)

    logger.info(f"Number of /24 prefixes: {current_n_prefixes}")
    return subset_bgp_prefixes

In [11]:
# Optionally override experiment parameters
if enable_pilot:
    n_cycles = 5
    global_budget = 10_000


### BGP prefixes subset creation

Here you can define the subset of BGP prefixes you want to run the pilot experiment on.

In [12]:
if enable_pilot:
    # Enable/diable bgp prefix subset creation
    create_bgp_prefixes_subset = False    

In [15]:
import pickle

if enable_pilot:
    # Override prefix path
    bgp_prefixes_path = Path("./resources/data/bgp_prefixes.pickle")

    if not create_bgp_prefixes_subset:
        with bgp_prefixes_path.open("rb") as fd:
            bgp_prefixes = pickle.load(fd)
        logger.info(f"Number of BGP prefixes {len(bgp_prefixes)}")

In [13]:
if enable_pilot:
    # Override prefix path and and dump a subset here
    bgp_prefixes_path = Path("./resources/data/bgp_prefixes.pickle")

    if create_bgp_prefixes_subset:
        bgp_prefixes = pilot_bgp_prefixes(bgp_prefixes, global_budget)
        logger.info(f"Number of BGP prefixes {len(bgp_prefixes)}")
        with bgp_prefixes_path.open("wb") as fd:
            pickle.dump(bgp_prefixes, fd)

## Instance definition

In this section we define the instance(s) of the experiment.
An instance is one workflow run with a set of parameters.

In [21]:
from zeph.main import create_selector
from zeph.drivers import iris_driver

def adaptive_instance(
    name,
    n_cycles,
    compute_budget,
    bgp_prefixes=None,
    bgp_awareness=True,
    exploitation_only=False,
    previous_measurement_uuid=None,
    dry_run=False,
):
    """Instance of the experience."""
    measurement_uuid = previous_measurement_uuid
    for _ in range(n_cycles):
        selector = create_selector(
            measurement_uuid, bgp_prefixes=bgp_prefixes, bgp_awareness=bgp_awareness
        )
        measurement_uuid, exploitation_per_agent, prefixes_per_agent = iris_driver(
            api_url,
            api_username,
            api_password,
            name,
            tool,
            protocol,
            min_ttl,
            max_ttl,
            selector,
            compute_budget,
            logger,
            exploitation_only=exploitation_only,
            dry_run=dry_run,
        )

        recap = {k: len(v) for k, v in prefixes_per_agent.items()}
        logger.info(f"{name} - {measurement_uuid}: {recap}")

        with (pilot_dir / ("exploitation_" + measurement_uuid + ".pickle")).open(
            "wb"
        ) as fd:
            pickle.dump(exploitation_per_agent, fd)
        with (pilot_dir / ("prefixes_" + measurement_uuid + ".pickle")).open(
            "wb"
        ) as fd:
            pickle.dump(prefixes_per_agent, fd)
        yield measurement_uuid

## Experiment definition

In this section we define the experiment.
Here we have:


* zeph cycles (10% of the budget)
* zeph cycles (25% of the budget)
* zeph cycles (50% of the budget)
* zeph cycles (75% of the budget)
* zeph cycles (100% of the budget)


In [22]:
from math import floor

# Zeph (10%)
zeph_uuids_10 = []
zeph_10 = adaptive_instance(
    "edgenet-1",
    n_cycles,
    lambda x: (floor(0.10 * global_budget), 6),
    bgp_prefixes=bgp_prefixes,
    bgp_awareness=False,
    exploitation_only=False,
)

# Zeph (25%)
zeph_uuids_25 = []
zeph_25 = adaptive_instance(
    "edgenet-2",
    n_cycles,
    lambda x: (floor(0.25 * global_budget), 6),
    bgp_prefixes=bgp_prefixes,
    bgp_awareness=False,
    exploitation_only=False,
)

# Zeph (50%)
zeph_uuids_50 = []
zeph_50 = adaptive_instance(
    "edgenet-3",
    n_cycles,
    lambda x: (floor(0.50 * global_budget), 6),
    bgp_prefixes=bgp_prefixes,
    bgp_awareness=False,
    exploitation_only=False,
)

# Zeph (75%)
zeph_uuids_75 = []
zeph_75 = adaptive_instance(
    "edgenet-4",
    n_cycles,
    lambda x: (floor(0.75 * global_budget), 6),
    bgp_prefixes=bgp_prefixes,
    bgp_awareness=False,
    exploitation_only=False,
)

# Agent(s) is probing the full budget
zeph_uuids_100 = []
zeph_100 = adaptive_instance(
    "edgenet-5",
    n_cycles,
    lambda x: (global_budget, 6),
    bgp_prefixes=bgp_prefixes,
    bgp_awareness=False,
    exploitation_only=False,
)

## Experiment execution

We execute the experiment by running the workflow on the instance(s).

In [23]:
# Dry run, skip the execution
dry_run = True

In [24]:
import requests
from zeph.drivers import create_auth_header

def check_measurement_finished(url, username, password, measurement_uuid):
    headers = create_auth_header(url, username, password)
    req = requests.get(url + f"/measurements/{measurement_uuid}", headers=headers)
    return req.json()["state"] == "finished"

In [25]:
import time

if dry_run:
    raise Exception("Dry run, exiting")

for (
    zeph_uuid_10,
    zeph_uuid_25,
    zeph_uuid_50,
    zeph_uuid_75,
    zeph_uuid_100,

) in zip(
    zeph_10,
    zeph_25,
    zeph_50,
    zeph_75,
    zeph_100,
):

    zeph_uuids_10.append(zeph_uuid_10)
    zeph_uuids_25.append(zeph_uuid_25)
    zeph_uuids_50.append(zeph_uuid_50)
    zeph_uuids_75.append(zeph_uuid_75)
    zeph_uuids_100.append(zeph_uuid_100)

    while True:
        check_zeph_10 = check_measurement_finished(
            api_url, api_username, api_password, zeph_uuid_10
        )
        check_zeph_25 = check_measurement_finished(
            api_url, api_username, api_password, zeph_uuid_25
        )
        check_zeph_50 = check_measurement_finished(
            api_url, api_username, api_password, zeph_uuid_50
        )
        check_zeph_75 = check_measurement_finished(
            api_url, api_username, api_password, zeph_uuid_75
        )
        check_zeph_100 = check_measurement_finished(
            api_url, api_username, api_password, zeph_uuid_75
        )

        if (
            check_zeph_10
            and check_zeph_25
            and check_zeph_50
            and check_zeph_75
            and check_zeph_100
        ):
            break
        time.sleep(10)


with (pilot_dir / "zeph_10.txt").open("w") as fd:
    for uuid in zeph_uuids_10:
        fd.write(uuid + "\n")
with (pilot_dir / "zeph_25.txt").open("w") as fd:
    for uuid in zeph_uuids_25:
        fd.write(uuid + "\n")
with (pilot_dir / "zeph_50.txt").open("w") as fd:
    for uuid in zeph_uuids_50:
        fd.write(uuid + "\n")
with (pilot_dir / "zeph_75.txt").open("w") as fd:
    for uuid in zeph_uuids_75:
        fd.write(uuid + "\n")
with (pilot_dir / "zeph_100.txt").open("w") as fd:
    for uuid in zeph_uuids_75:
        fd.write(uuid + "\n")

Exception: Dry run, exiting