## get all anchors

In [1]:
import requests

def get_from_atlas(url):
    response = requests.get(url).json()
    while True:
        for anchor in response["results"]:
            yield anchor

        if response["next"]:
            response = requests.get(response["next"]).json()
        else:
            break

In [None]:
import pickle
import pandas as pd

anchor_file = "../datasets/anchors.pickle"

anchors = {}
for index, anchor in enumerate(get_from_atlas("https://atlas.ripe.net/api/v2/probes/")):

    # select all anchors
    if (
        anchor["status"]["name"] != "Connected"
        or anchor.get("geometry") is None
        or anchor.get("address_v4") is None
        or anchor.get("country_code") is None
        or not anchor["is_anchor"]
    ):
        continue


    anchors[anchor['address_v4']] = {
        "id" : anchor["id"],
        "is_anchor" : anchor["is_anchor"],
        "country_code" : anchor["country_code"],
        "latitude" : anchor["geometry"]["coordinates"][1],
        "longitude" : anchor["geometry"]["coordinates"][0],
    }


print(f"Number of Atlas probes kept: {len(anchors)}/{index}")

# save results
with open(anchor_file, "wb") as f:
    pickle.dump(anchors,f)

## TODO

1. get target ips (3) for all anchors
2. make ping from all anchors to all targets
3. select a set of sufficiently diversified probes (Metis) or simple

4. (probe validation) why not taking all achons and issuing pings towards the rest of probes? then cbg -> depending on results -> check

In [4]:
import pickle
from pathlib import Path

in_file = Path(".") / "../datasets/parsed_hitlist.pickle"
with open(in_file, "rb") as f:
    ip_hitlist = pickle.load(f)

anchor_file = "../datasets/anchors.pickle"
with open(anchor_file, "rb") as f:
    anchors = pickle.load(f)


In [5]:
from random import randint
from ipaddress import IPv4Address, IPv4Network
from geoloc_imc_2023.atlas_probing import RIPEAtlas
from collections import defaultdict
from datetime import datetime

RIPE_ACCOUNT = "timur.friedman@sorbonne-universite.fr"
RIPE_KEY = "b3d3d4fc-724e-4505-befe-1ad16a70dc87"
MAX_NUMBER_OF_VPS = 1_000
NB_PACKETS = 3

def get_target_hitlist(anchor_ip: str, hitlist_size: int = 3) -> list[str]:
    """from ip, return a list of target ips"""
    prefix_anchor = anchor_ip.split(".")[:-1]
    prefix_anchor.append("0")
    prefix_anchor = ".".join(prefix_anchor)
    ip_list = []
    try:
        ip_list = ip_hitlist[prefix_anchor]
    except KeyError:
        pass

    if len(ip_list) < hitlist_size:
        ip = anchor_ip.split(".")[:2]
        ip.extend(['0','0'])
        ip = ".".join(ip)
        prefix_target = IPv4Network(ip + '/' + str(24))

        ip_list.extend([str(prefix_target[randint(1,254)]) for _ in range(0,hitlist_size - len(ip_list))])
    return ip_list


driver = RIPEAtlas(RIPE_ACCOUNT, RIPE_KEY)
measurements = defaultdict(dict)
dry_run = False
for i , ip_target in enumerate(anchors):

    if i > 2_00: break

    # get ip_target_list
    random_target_list = get_target_hitlist(ip_target, 3)
    # add the real ip_target to verify that geoloc prefixes is valid assumption
    random_target_list.append(ip_target)

    # get vp ids
    vp_ids = [anchors[ip_vp]['id'] for ip_vp in anchors if ip_vp != ip_target]
    assert len(vp_ids) == len(anchors) - 1
    # select a number of vps (testing)
    vp_ids = vp_ids[:MAX_NUMBER_OF_VPS]

    print(f"starting measurement for {ip_target=} with {[ip for ip in random_target_list]} with {len(vp_ids)} anchors")    

    ip_target_measurement_ids = []
    for ip in random_target_list:
        if dry_run:
            print(f"measurement for {ip}")
            continue
        else:
            # TODO: parralelize post requests otherwise it takes too much time
            # probe and retreive measurements
            measurement_id = driver.probe(str(ip), vp_ids, NB_PACKETS)
            try:
                measurements[ip_target]["id"].append(measurement_id)
            except KeyError:
                measurements[ip_target]["id"] = [measurement_id]
    print()
        

starting measurement for ip_target='213.225.160.239' with ['213.225.160.14', '213.225.160.140', '213.225.160.159', '213.225.160.239'] with 10 anchors

starting measurement for ip_target='145.220.0.55' with ['145.220.0.1', '145.220.0.28', '145.220.0.29', '145.220.0.55'] with 10 anchors

starting measurement for ip_target='5.28.0.17' with ['5.28.0.6', '5.28.0.13', '5.28.0.14', '5.28.0.17'] with 10 anchors

starting measurement for ip_target='193.171.255.2' with ['193.171.255.1', '193.171.255.2', '193.171.255.9', '193.171.255.2'] with 10 anchors



In [6]:
for target_addr in measurements:
    print(f"target ip : {target_addr} : {[id for id in measurements[target_addr]['id']]}")

target ip : 213.225.160.239 : [52568925, 52568926, 52568927, 52568928]
target ip : 145.220.0.55 : [52568930, 52568931, 52568932, 52568933]
target ip : 5.28.0.17 : [52568934, 52568935, 52568936, 52568937]
target ip : 193.171.255.2 : [52568938, 52568939, 52568941, 52568943]


In [7]:
import pickle

# save results
date = datetime.now()
out_file = f"../results/measurement_ids.pickle"
print(out_file)
with open(out_file, "wb") as f:
    pickle.dump(measurements,f)

../results/measurement_ids.pickle


In [13]:
import requests
import time
from typing import List
import pickle
from collections import defaultdict

# save results
in_file = f"../results/measurement_ids.pickle"
with open(in_file, "rb") as f:
    measurements = pickle.load(f)

anchor_file = "../datasets/anchors.pickle"
with open(anchor_file, "rb") as f:
    anchors = pickle.load(f)

print(f"retreive measurements for {len(measurements)} ip addresses")

key = "b3d3d4fc-724e-4505-befe-1ad16a70dc87"
for i, target_addr in enumerate(measurements):
    if i > 15: break

    measurements[target_addr]["target_results"] = []
    measurements[target_addr]["prefix_results"] = defaultdict(list)

    for measurement_id in measurements[target_addr]["id"]:
        url = f"https://atlas.ripe.net/api/v2/measurements/{measurement_id}/results/?key={key}"

        timeout = 60
        for _ in range(timeout):

            response = requests.get(url, timeout=20).json()
            if response:
                break
            time.sleep(2)

        for result in response:
            # parse results and calculate geoloc
            if result.get('result') is not None:
                
                dst_addr = result['dst_addr']
                vp_ip = result['from']

                if type(result['result']) == list:
                    rtt_list = [list(rtt.values())[0] for rtt in result['result']]
                else:
                    rtt_list = [result['result']["rtt"]]

                # remove unresponsive results
                if not rtt_list: 
                    continue
                if '*' in rtt_list: 
                    rtt_list.remove('*')
                min_rtt = min(rtt_list)

                if min_rtt == "*": continue

                # both vp and target coordinates
                vp_lat = anchors[vp_ip]['latitude']
                vp_lon = anchors[vp_ip]['longitude']

                if dst_addr == target_addr:
                    measurements[target_addr]["target_results"].append({
                        "node": vp_ip,
                        "min_rtt": min_rtt,
                        "rtt_list": rtt_list,
                        "vp_lat": vp_lat,
                        "vp_lon": vp_lon,
                    })
                else: 
                    measurements[target_addr]["prefix_results"][dst_addr].append({
                        "node": vp_ip,
                        "min_rtt": min_rtt,
                        "rtt_list": rtt_list,
                        "vp_lat": vp_lat,
                        "vp_lon": vp_lon,
                    })
            else:
                print(f"no results: {result}")
    
    # sort delay
    measurements[target_addr]["target_results"] = sorted(measurements[target_addr]["target_results"], key = lambda x: x["min_rtt"])

    for dst_addr in  measurements[target_addr]["prefix_results"]:

        try:
            measurements[target_addr]["prefix_results"][dst_addr] = sorted(
                measurements[target_addr]["prefix_results"][dst_addr], 
                key = lambda x: x["min_rtt"]
            )
        except TypeError:
            raise RuntimeError(measurements[target_addr]["prefix_results"][dst_addr])
    


retreive measurements for 4 ip addresses


In [15]:
for i, ip_target in enumerate(measurements):
    if i > 10: break
    print(f"results for ip: {ip_target}:")
    for result in measurements[ip_target]["target_results"]:
        print("ip result", result)
    for ip_dst in measurements[ip_target]["prefix_results"]:
        print("prefix reesults:",measurements[ip_target]["prefix_results"][ip_dst])

results for ip: 213.225.160.239:
ip result {'node': '145.220.0.55', 'min_rtt': 9.583642, 'rtt_list': [9.778704, 9.583642, 9.618189], 'vp_lat': 52.3675, 'vp_lon': 4.8985}
ip result {'node': '199.10.66.231', 'min_rtt': 10.50564, 'rtt_list': [10.886608, 10.50564, 10.665625], 'vp_lat': 52.3485, 'vp_lon': 4.8315}
ip result {'node': '194.150.191.46', 'min_rtt': 15.335209, 'rtt_list': [15.556679, 15.335209, 15.419697], 'vp_lat': 49.4275, 'vp_lon': 11.0215}
ip result {'node': '193.171.255.2', 'min_rtt': 18.732867, 'rtt_list': [18.850879, 19.252921, 18.732867], 'vp_lat': 48.2085, 'vp_lon': 16.3695}
ip result {'node': '192.65.184.54', 'min_rtt': 19.045067, 'rtt_list': [19.045067, 19.308753, 19.058893], 'vp_lat': 46.2285, 'vp_lon': 6.0495}
ip result {'node': '5.28.0.17', 'min_rtt': 28.593321, 'rtt_list': [28.91641, 43.622274, 28.593321], 'vp_lat': 47.5015, 'vp_lon': 19.0385}
ip result {'node': '185.42.136.158', 'min_rtt': 30.341865, 'rtt_list': [30.663514, 30.341865, 30.512735], 'vp_lat': 59.3315

In [16]:
import pickle

# save results
date = datetime.now()
out_file = f"../results/all_anchors_to_all_anchors.pickle"
print(out_file)
with open(out_file, "wb") as f:
    pickle.dump(measurements,f)

../results/all_anchors_to_all_anchors.pickle


In [19]:
import pickle
from pathlib import Path

in_file = Path(".") / "../results/all_anchors_to_all_anchors.pickle"
with open(in_file, "rb") as f:
    measurements = pickle.load(f)

for i, ip_target in enumerate(measurements):
    if i > 10: break
    print(f"results for ip: {ip_target}: {measurements[ip_target]['target_results']}")
    for dst_addr in measurements[ip_target]["prefix_results"]:
        print("prefix reesults:",measurements[ip_target]["prefix_results"][dst_addr])

results for ip: 213.225.160.239: [{'node': '145.220.0.55', 'min_rtt': 9.583642, 'rtt_list': [9.778704, 9.583642, 9.618189], 'vp_lat': 52.3675, 'vp_lon': 4.8985}, {'node': '199.10.66.231', 'min_rtt': 10.50564, 'rtt_list': [10.886608, 10.50564, 10.665625], 'vp_lat': 52.3485, 'vp_lon': 4.8315}, {'node': '194.150.191.46', 'min_rtt': 15.335209, 'rtt_list': [15.556679, 15.335209, 15.419697], 'vp_lat': 49.4275, 'vp_lon': 11.0215}, {'node': '193.171.255.2', 'min_rtt': 18.732867, 'rtt_list': [18.850879, 19.252921, 18.732867], 'vp_lat': 48.2085, 'vp_lon': 16.3695}, {'node': '192.65.184.54', 'min_rtt': 19.045067, 'rtt_list': [19.045067, 19.308753, 19.058893], 'vp_lat': 46.2285, 'vp_lon': 6.0495}, {'node': '5.28.0.17', 'min_rtt': 28.593321, 'rtt_list': [28.91641, 43.622274, 28.593321], 'vp_lat': 47.5015, 'vp_lon': 19.0385}, {'node': '185.42.136.158', 'min_rtt': 30.341865, 'rtt_list': [30.663514, 30.341865, 30.512735], 'vp_lat': 59.3315, 'vp_lon': 18.0595}, {'node': '76.26.115.194', 'min_rtt': 86.2

## Apply CBG methodology

In [22]:
from pathlib import Path
import random
import pickle
from copy import copy

from geoloc_imc_2023.helpers import polygon_centroid, haversine, circle_intersections
from geoloc_imc_2023.draw_results import draw_results

def select_best_guess_centroid(target_addr, measurement_results):
    """
    Find the best guess
    that is the location of the vantage point closest to the centroid.
    """
    probe_circles = {}
    print(f"NUMBER OF MEASUREMENT RESULTS FOR TARGET IP: {target_addr} : {len(measurement_results)}")
    for _, result in enumerate(measurement_results):
        vp_ip = result["node"]
        lat = result["vp_lat"]
        lon = result["vp_lon"]
        min_rtt = result["min_rtt"]

        # too inflated RTT means that measurement will not provide usefull info
        if isinstance(min_rtt, float):
                probe_circles[vp_ip] = (
                    lat,
                    lon,
                    min_rtt,
                    None,
                    None,
                )
                print(f"vp_anchor = {vp_ip} with results: {min_rtt}")
    print()

    # draw circles
    if not probe_circles: return

    intersections = circle_intersections(probe_circles.values(), speed_threshold=4/9)
    print("intersecion:", intersections)
    target = (anchors[target_addr]["latitude"], anchors[target_addr]["longitude"])

    centroid = None
    if intersections:
        centroid = polygon_centroid(intersections)
        print("centroid:", centroid)
        target_description = anchors[target_addr]

        if centroid:
            for ip_src, circle in probe_circles.items():

                measured_d = haversine((circle[0], circle[1]), centroid)
                calculated_d = haversine((target_description["latitude"], target_description["longitude"]), centroid)

                print("ip_src:",ip_src,"dst (measured):", measured_d, "dst (calculated):", calculated_d)
    
    draw_results(copy(probe_circles), intersections,centroid, target)

    return centroid

in_file = Path(".") / "../results/all_anchors_to_all_anchors.pickle"
with open(in_file, "rb") as f:
    measurements = pickle.load(f)

anchor_file = "../datasets/anchors.pickle"
with open(anchor_file, "rb") as f:
    anchors = pickle.load(f)

# select target and vp list for analyssi
TARGET_SIZE = 5

target_list = [random.choice(list(measurements)) for _ in range(0, TARGET_SIZE)]
vp_list = set(measurements).difference(set(target_list))

target_list = list(measurements.keys())
vp_list = list(measurements.keys())

print(f"target size: {len(target_list)}")
print(f"vp size: {len(vp_list)}")

geoloc_results = {}
for i, target_addr in enumerate(target_list):
    if i > 10: break

    # get measurements for vp_dataset
    vp_results = [vp_result for vp_result in measurements[target_addr]['target_results'] if vp_result['node'] in vp_list]
    shortest_delay_results = vp_results[:10]

    centroid = select_best_guess_centroid(target_addr, shortest_delay_results)
    if centroid:
        geoloc_results[target_addr] = {
            "estimated_lat": centroid[0],
            "estimated_lon": centroid[1],
        }

geoloc_out_file = Path(".") / "../results/all_anchors_towards_all_anchors_geoloc.pickle"
with open(geoloc_out_file, "wb") as f:
    pickle.dump(geoloc_results, f)


target size: 4
vp size: 4
NUMBER OF MEASUREMENT RESULTS FOR TARGET IP: 213.225.160.239 : 3
vp_anchor = 145.220.0.55 with results: 9.583642
vp_anchor = 193.171.255.2 with results: 18.732867
vp_anchor = 5.28.0.17 with results: 28.593321

only one circle found with coordinates: (52.3675, 4.8985, 9.583642, 1277.818933333333, 0.20056803222937264)
intersecion: [(52.3675, 23.697953974010133), (63.84634278150637, 4.898500000000001), (52.3675, -13.900953974010134), (40.88865721849363, 4.898499999999997)]
centroid: (52.3675, 4.898499999999999)
ip_src: 145.220.0.55 dst (measured): 5.3952051714024527e-14 dst (calculated): 466.6206202376259
ip_src: 193.171.255.2 dst (measured): 934.9143366429872 dst (calculated): 466.6206202376259
ip_src: 5.28.0.17 dst (measured): 1144.135378661584 dst (calculated): 466.6206202376259
calculated centroid: (52.3675, 4.898499999999999)
target:
47.5015 19.0385


NUMBER OF MEASUREMENT RESULTS FOR TARGET IP: 145.220.0.55 : 3
vp_anchor = 213.225.160.239 with results: 9.710373
vp_anchor = 193.171.255.2 with results: 17.734753
vp_anchor = 5.28.0.17 with results: 20.420615

only one circle found with coordinates: (48.5795, 7.7485, 9.710373, 1294.7164, 0.20322027939099044)
intersecion: [(48.5795, 25.328583536904365), (60.210135307202044, 7.748500000000001), (48.5795, -9.831583536904365), (36.94886469279796, 7.748499999999996)]
centroid: (48.5795, 7.7485)
ip_src: 213.225.160.239 dst (measured): 0.0 dst (calculated): 466.6206202376259
ip_src: 193.171.255.2 dst (measured): 637.116886026077 dst (calculated): 466.6206202376259
ip_src: 5.28.0.17 dst (measured): 846.5425796066376 dst (calculated): 466.6206202376259
calculated centroid: (48.5795, 7.7485)
target:
47.5015 19.0385


NUMBER OF MEASUREMENT RESULTS FOR TARGET IP: 5.28.0.17 : 3
vp_anchor = 193.171.255.2 with results: 4.40731
vp_anchor = 145.220.0.55 with results: 20.316818
vp_anchor = 213.225.160.239 with results: 28.59716

only one circle found with coordinates: (48.2085, 16.3695, 4.40731, 587.6413333333333, 0.09223690681734943)
intersecion: [(48.2085, 24.29071650883042), (53.487371913137075, 16.3695), (48.2085, 8.448283491169578), (42.92962808686293, 16.3695)]
centroid: (48.2085, 16.3695)
ip_src: 193.171.255.2 dst (measured): 0.0 dst (calculated): 213.94823034118167
ip_src: 145.220.0.55 dst (measured): 934.9143366429872 dst (calculated): 213.94823034118167
ip_src: 213.225.160.239 dst (measured): 637.116886026077 dst (calculated): 213.94823034118167
calculated centroid: (48.2085, 16.3695)
target:
48.5795 7.7485


NUMBER OF MEASUREMENT RESULTS FOR TARGET IP: 193.171.255.2 : 6
vp_anchor = 5.28.0.17 with results: 4.205421
vp_anchor = 5.28.0.17 with results: 4.271998
vp_anchor = 145.220.0.55 with results: 17.605114
vp_anchor = 145.220.0.55 with results: 17.843378
vp_anchor = 213.225.160.239 with results: 18.722382
vp_anchor = 213.225.160.239 with results: 18.778599

only one circle found with coordinates: (47.5015, 19.0385, 4.271998, 569.5997333333332, 0.0894050750797886)
intersecion: [(47.5015, 26.612540594577133), (52.61830146283737, 19.0385), (47.5015, 11.464459405422863), (42.38469853716263, 19.0385)]
centroid: (47.5015, 19.0385)
ip_src: 5.28.0.17 dst (measured): 0.0 dst (calculated): 213.94823034118167
ip_src: 145.220.0.55 dst (measured): 1144.1353786615837 dst (calculated): 213.94823034118167
ip_src: 213.225.160.239 dst (measured): 846.5425796066376 dst (calculated): 213.94823034118167
calculated centroid: (47.5015, 19.0385)
target:
48.5795 7.7485


In [9]:
geoloc_out_file = Path(".") / "../results/all_anchors_towards_all_anchors_geoloc.pickle"
with open(geoloc_out_file, "rb") as f:
    geoloc_results = pickle.load(f)


In [11]:
from geoloc_imc_2023.helpers import distance

for i, target_addr in enumerate(geoloc_results):
    if i > 15: break
    target_lat = anchors[target_addr]["latitude"]
    target_lon = anchors[target_addr]["longitude"]

    estimated_lat = geoloc_results[target_addr]["estimated_lat"]
    estimated_lon = geoloc_results[target_addr]["estimated_lon"]

    d_error = distance(target_lat, estimated_lat, target_lon, estimated_lon)
    print(f"geoloc results for target ip: {target_addr} distance error: {d_error}")

geoloc results for target ip: 145.220.0.55 distance error: 3.981388122601496
geoloc results for target ip: 5.28.0.17 distance error: 166.404396745
geoloc results for target ip: 193.171.255.2 distance error: 0.8226714520441889
geoloc results for target ip: 192.65.184.54 distance error: 3.5015730277578623
geoloc results for target ip: 197.80.104.36 distance error: 19.16556336974885
geoloc results for target ip: 200.7.84.24 distance error: 26.886725742565762
geoloc results for target ip: 76.26.115.194 distance error: 85.76798202864943
geoloc results for target ip: 194.150.191.46 distance error: 2.5171887386945353
geoloc results for target ip: 199.10.66.231 distance error: 6.835654928240526
