# Experiment 1

*Hypothesis*: Zeph will be able to see almost the same as complete discovery
(= full IPv4 routable prefixes) but with a much-reduced probing budget.


In [91]:
import logging
from pathlib import Path

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
script_formatter = logging.Formatter(
    "%(asctime)s :: SCRIPT :: %(levelname)s :: %(message)s"
)
stream_handler = logging.StreamHandler()
stream_handler.setLevel(logging.INFO)
stream_handler.setFormatter(script_formatter)
logger.addHandler(stream_handler)

# Directory of the experiment
exp_dir = Path("../resources/data/measurements/exp1-pilot/")
exp_dir.mkdir(parents=True, exist_ok=True)

# Directory of the total prefixes and exploitation prefixes pickle files
prefixes_dir = exp_dir / "prefixes"
prefixes_dir.mkdir(parents=True, exist_ok=True)
exploitation_dir = exp_dir / "exploitation"
exploitation_dir.mkdir(parents=True, exist_ok=True)

file_handler = logging.FileHandler(exp_dir / "log.txt")
file_handler.setLevel(logging.INFO)
file_handler.setFormatter(script_formatter)
logger.addHandler(file_handler)

## Configuration

In this section:

* we get the configuration of the Iris API and database
* we get the configuration of the experiment itself

In [1]:
# Get Iris API / database credentials
import json
from zeph.drivers import create_auth_header, get_database_url

config = json.load(open("../config.json"))
headers = create_auth_header(config["iris_url"], config["iris_username"], config["iris_password"])
database_url = get_database_url(config["iris_url"], headers)

In [93]:
# Experiment parameters
tool = "yarrp"
epsilon = 0.1
protocol = "icmp"
min_ttl = 8
max_ttl = 32

measurement_tags = ["!public", "zeph-evaluation", "exp1"]

# This can be overrided by the pilot configuration (see below)
n_cycles = 10
global_budget = 11_881_416

# You can generate this file by following these instructions: https://github.com/dioptra-io/zeph#-generate-the-bgp-prefix-file
bgp_prefixes_path = Path("../resources/data/bgp_prefixes.pickle")

# Pilot definition (optional)

If you don't want to run the experiment on the entire universe of BGP prefixes, you can define a pilot.

In [94]:
# Enable/disable pilot experiment
enable_pilot = False

In [95]:
import pickle

if not enable_pilot:
    with bgp_prefixes_path.open("rb") as fd:
        bgp_prefixes = pickle.load(fd)
        logger.info(f"Number of BGP prefixes {len(bgp_prefixes)}")

In [96]:
import random

def pilot_bgp_prefixes(bgp_prefixes, n_prefixes):
    current_n_prefixes = 0
    subset_bgp_prefixes = []

    random.shuffle(bgp_prefixes)

    for bgp_prefix in bgp_prefixes:
        if current_n_prefixes > n_prefixes:
            break

        subset_bgp_prefixes.append(bgp_prefix)
        current_n_prefixes += len(bgp_prefix)

    logger.info(f"Number of /24 prefixes: {current_n_prefixes}")
    return subset_bgp_prefixes

In [97]:
# Optionally override experiment parameters
if enable_pilot:
    n_cycles = 3
    global_budget = 10_000

### BGP prefixes subset creation

Here you can define the subset of BGP prefixes you want to run the pilot experiment on.

In [98]:
if enable_pilot:
    # Enable/diable bgp prefix subset creation
    create_bgp_prefixes_subset = False    

In [None]:
import pickle

if enable_pilot and not create_bgp_prefixes_subset:
    # Override prefix path
    bgp_prefixes_path = Path("../resources/data/bgp_prefixes_pilot.pickle")

    if not create_bgp_prefixes_subset:
        with bgp_prefixes_path.open("rb") as fd:
            bgp_prefixes = pickle.load(fd)
        logger.info(f"Number of BGP prefixes {len(bgp_prefixes)}")

In [100]:
if enable_pilot and create_bgp_prefixes_subset:
        with bgp_prefixes_path.open("rb") as fd:
            bgp_prefixes = pickle.load(fd)
        bgp_prefixes = pilot_bgp_prefixes(bgp_prefixes, global_budget)
        logger.info(f"Number of BGP prefixes {len(bgp_prefixes)}")

        # Override prefix path
        bgp_prefixes_path = Path("../resources/data/bgp_prefixes_pilot.pickle")
        with bgp_prefixes_path.open("wb") as fd:
            pickle.dump(bgp_prefixes, fd)

## Instance definition

In this section we define the instance(s) of the experiment.
An instance is one workflow run with a set of parameters.

In [2]:
from zeph.main import create_selector
from zeph.drivers import iris_driver, get_previous_measurement_agents


def adaptive_instance(
    name,
    n_cycles,
    compute_budget,
    bgp_prefixes=None,
    bgp_awareness=True,
    exploitation_only=False,
    previous_measurement_uuid=None,
    dry_run=False,
):
    """Instance of the experiment."""
    agents_uuid = None
    
    for _ in range(n_cycles):

        if previous_measurement_uuid:
            logger.debug("Get previous measurement agents")
            headers = create_auth_header(config["iris_url"], config["iris_username"], config["iris_password"])
            agents_uuid = get_previous_measurement_agents(
                config["iris_url"], previous_measurement_uuid, headers
            )


        selector = create_selector(
            database_url, 
            epsilon, 
            bgp_prefixes, 
            previous_measurement_uuid=previous_measurement_uuid,
            previous_agents_uuid=agents_uuid,
            bgp_awareness=bgp_awareness,
        )

        measurement_uuid, exploitation_per_agent, prefixes_per_agent = iris_driver(
            config["iris_url"],
            config["iris_username"],
            config["iris_password"],
            name,
            tool,
            protocol,
            min_ttl,
            max_ttl,
            selector,
            compute_budget,
            logger,
            measurement_tags=measurement_tags,
            exploitation_only=exploitation_only,
            dry_run=dry_run,
        )

        previous_measurement_uuid = measurement_uuid

        recap = {k: len(v) for k, v in prefixes_per_agent.items()}
        logger.info(f"{name} - {measurement_uuid}: {recap}")

        with (exploitation_dir / ("exploitation_" + measurement_uuid + ".pickle")).open(
            "wb"
        ) as fd:
            pickle.dump(exploitation_per_agent, fd)
        with (prefixes_dir / ("prefixes_" + measurement_uuid + ".pickle")).open(
            "wb"
        ) as fd:
            pickle.dump(prefixes_per_agent, fd)
        yield measurement_uuid

## Experiment definition

In this section we define the experiment.
Here we have:


* zeph cycles (10% of the budget)
* zeph cycles (25% of the budget)
* zeph cycles (50% of the budget)
* zeph cycles (75% of the budget)
* zeph cycles (100% of the budget)


In [102]:
# Dry run, skip the execution
dry_run = False

In [103]:
from math import floor

# Zeph (10%)
zeph_uuids_10 = []
zeph_10 = adaptive_instance(
    "edgenet-1",
    n_cycles,
    lambda _: floor(0.10 * global_budget),
    bgp_prefixes=bgp_prefixes,
    bgp_awareness=False,
    exploitation_only=False,
    dry_run=dry_run,
)

# Zeph (25%)
zeph_uuids_25 = []
zeph_25 = adaptive_instance(
    "edgenet-2",
    n_cycles,
    lambda _: floor(0.25 * global_budget),
    bgp_prefixes=bgp_prefixes,
    bgp_awareness=False,
    exploitation_only=False,
    dry_run=dry_run,

)

# Zeph (50%)
zeph_uuids_50 = []
zeph_50 = adaptive_instance(
    "edgenet-3",
    n_cycles,
    lambda _: floor(0.50 * global_budget),
    bgp_prefixes=bgp_prefixes,
    bgp_awareness=False,
    exploitation_only=False,
    dry_run=dry_run,
)

# Zeph (75%)
zeph_uuids_75 = []
zeph_75 = adaptive_instance(
    "edgenet-4",
    n_cycles,
    lambda _: floor(0.75 * global_budget),
    bgp_prefixes=bgp_prefixes,
    bgp_awareness=False,
    exploitation_only=False,
    dry_run=dry_run,
)

# Agent(s) is probing the full budget
zeph_uuids_100 = []
zeph_100 = adaptive_instance(
    "edgenet-5",
    n_cycles,
    lambda _: global_budget,
    bgp_prefixes=bgp_prefixes,
    bgp_awareness=False,
    exploitation_only=False,
    dry_run=dry_run,
)

## Experiment execution

We execute the experiment by running the workflow on the instance(s).

In [104]:
import requests
from zeph.drivers import create_auth_header

def check_measurement_finished(url, username, password, measurement_uuid):
    headers = create_auth_header(url, username, password)
    req = requests.get(url + f"/measurements/{measurement_uuid}", headers=headers)
    return req.json()["state"] == "finished"

In [None]:
import time


for (
    zeph_uuid_10,
    zeph_uuid_25,
    zeph_uuid_50,
    zeph_uuid_75,
    zeph_uuid_100,

) in zip(
    zeph_10,
    zeph_25,
    zeph_50,
    zeph_75,
    zeph_100,
):

    zeph_uuids_10.append(zeph_uuid_10)
    zeph_uuids_25.append(zeph_uuid_25)
    zeph_uuids_50.append(zeph_uuid_50)
    zeph_uuids_75.append(zeph_uuid_75)
    zeph_uuids_100.append(zeph_uuid_100)

    while True:
        check_zeph_10 = check_measurement_finished(
            config["iris_url"], config["iris_username"], config["iris_password"], zeph_uuid_10
        )
        check_zeph_25 = check_measurement_finished(
            config["iris_url"], config["iris_username"], config["iris_password"], zeph_uuid_25
        )
        check_zeph_50 = check_measurement_finished(
            config["iris_url"], config["iris_username"], config["iris_password"], zeph_uuid_50
        )
        check_zeph_75 = check_measurement_finished(
            config["iris_url"], config["iris_username"], config["iris_password"], zeph_uuid_75
        )
        check_zeph_100 = check_measurement_finished(
            config["iris_url"], config["iris_username"], config["iris_password"], zeph_uuid_75
        )

        if (
            check_zeph_10
            and check_zeph_25
            and check_zeph_50
            and check_zeph_75
            and check_zeph_100
        ):
            break
        time.sleep(10)


with (exp_dir / "zeph_10.txt").open("w") as fd:
    for uuid in zeph_uuids_10:
        fd.write(uuid + "\n")
with (exp_dir / "zeph_25.txt").open("w") as fd:
    for uuid in zeph_uuids_25:
        fd.write(uuid + "\n")
with (exp_dir / "zeph_50.txt").open("w") as fd:
    for uuid in zeph_uuids_50:
        fd.write(uuid + "\n")
with (exp_dir / "zeph_75.txt").open("w") as fd:
    for uuid in zeph_uuids_75:
        fd.write(uuid + "\n")
with (exp_dir / "zeph_100.txt").open("w") as fd:
    for uuid in zeph_uuids_75:
        fd.write(uuid + "\n")