In [None]:
import requests
import pickle

from utils.common import compute_error_threshold_cdfs
from default import ANCHORS_FILE, REMOVED_PROBES_FILE, IP_INFO_GEO_FILE, MAXMIND_GEO_FILE, PROBES_TO_ANCHORS_RESULT_FILE, GEO_DATABASE_FILE
from utils.helpers import haversine
from utils.plot_utils import plot_save, plot_multiple_cdf, homogenize_legend
from utils.file_utils import load_json, dump_json

In [None]:
snapshot_date = "20230516"
token = "4f6c895ec9224f"

maxmind_block_file = f"GeoLite2-City-CSV_{snapshot_date}/GeoLite2-City-Blocks-IPv4.csv"
maxmind_tree_file = f"{maxmind_block_file[:-4]}_{snapshot_date}.tree"

In [None]:
anchors = load_json(ANCHORS_FILE)

removed_probes = load_json(REMOVED_PROBES_FILE)

## IP Info and Maxmind results

In [None]:
with open(maxmind_tree_file, "rb") as f:
    tree = pickle.load(f)

ip_info_geo = {}
maxmind_geo = {}

for i, anchor in enumerate(sorted(anchors, key=lambda x: x["address_v4"])):
    ip = anchor["address_v4"]

    # ip_info
    url = f"https://ipinfo.io/{ip}?token={token}"
    result = requests.get(url).json()
    ip_info_geo[ip] = result

    # maxmind_results
    node = tree.search_best(ip)
    if node is not None:
        if "city" in node.data:
            maxmind_geo[ip] = node.data["coordinates"]

dump_json(ip_info_geo, IP_INFO_GEO_FILE)
dump_json(maxmind_geo, MAXMIND_GEO_FILE)

## Plot results

In [None]:
ip_info_geo = load_json(IP_INFO_GEO_FILE)
mm_geo = load_json(MAXMIND_GEO_FILE)
errors_threshold_probes_to_anchors = load_json(PROBES_TO_ANCHORS_RESULT_FILE)

In [None]:
error_threshold_cdfs_p_to_a, circles_threshold_cdfs_p_to_a, _ = compute_error_threshold_cdfs(
    errors_threshold_probes_to_anchors)

maxmind_error = {}
ip_info_error = {}
for i, anchor in enumerate(sorted(anchors, key=lambda x: x["address_v4"])):
    ip = anchor["address_v4"]
    if ip in removed_probes:
        continue

    if "geometry" not in anchor:
        continue

    long, lat = anchor["geometry"]["coordinates"]
    if ip in mm_geo:
        error = haversine(mm_geo[ip], (lat, long))
        maxmind_error[ip] = error

    if ip in ip_info_geo:
        ipinfo_lat, ipinfo_long = ip_info_geo[ip]["loc"].split(",")
        ipinfo_lat, ipinfo_long = float(ipinfo_lat), float(ipinfo_long)
        error = haversine((ipinfo_lat, ipinfo_long), (lat, long))
        ip_info_error[ip] = error

Ys = [error_threshold_cdfs_p_to_a[0], list(
    maxmind_error.values()), list(ip_info_error.values())]
print([len(Y) for Y in Ys])
labels = ["All VPs", "Maxmind (Free)", "IPinfo"]
fig, ax = plot_multiple_cdf(Ys, 10000, 1, 10000,
                            "Geolocation error (km)",
                            "CDF of targets",
                            xscale="log",
                            yscale="linear",
                            legend=labels)
homogenize_legend(ax, "lower right")

ofile = GEO_DATABASE_FILE
plot_save(ofile, is_tight_layout=True)