In [2]:
import folium
import matplotlib.pyplot as plt

from collections import defaultdict
from pyasn import pyasn
from math import pi, cos
from dataclasses import dataclass
from loguru import logger

from geogiant.common.utils import EvalResults, get_parsed_vps
from geogiant.common.queries import load_vps
from geogiant.common.files_utils import load_pickle, load_csv
from geogiant.common.ip_addresses_utils import get_prefix_from_ip
from geogiant.common.settings import PathSettings, ClickhouseSettings

path_settings = PathSettings()
clickhouse_settings = ClickhouseSettings()

asndb = pyasn(str(path_settings.RIB_TABLE))
vps = load_vps(clickhouse_settings.VPS_FILTERED)
vps_per_subnet, vps_coordinates = get_parsed_vps(vps, asndb)


@dataclass(frozen=True)
class ResultsScore:
    client_granularity: str
    answer_granularity: str
    scores: list
    inconsistent_mappings: list


def plotDot(lat, lon, map, color="blue", radius: int = 2):
    """input: series that contains a numeric named latitude and a numeric named longitude
    this function creates a CircleMarker and adds it to your this_map"""
    folium.CircleMarker(
        location=[lat, lon],
        radius=radius,
        weight=radius,
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=1,
    ).add_to(map)


def plotRectangle(lat, lon, map, color="blue", radius: int = 2):
    """input: series that contains a numeric named latitude and a numeric named longitude
    this function creates a CircleMarker and adds it to your this_map"""
    folium.Rectangle(
        location=[lat, lon],
        radius=radius,
        weight=radius,
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=1,
    ).add_to(map)

[32m2024-05-28 07:19:30.066[0m | [1mINFO    [0m | [36mgeogiant.clickhouse.query[0m:[36mexecute[0m:[36m122[0m - [1mquery=GetVPs table_name=filtered_vps  limit=None[0m


# Geoloc map

In [3]:
ref_shortest_ping_results = load_pickle(
    path_settings.RESULTS_PATH / "results_ref_shortest_ping.pickle"
)


eval: EvalResults = load_pickle(
    path_settings.RESULTS_PATH
    / "tier4_evaluation/results__best_hostname_geo_score_20_BGP_3_hostnames_per_org_ns.pickle"
)

In [23]:
granularity = "answer_bgp_prefix"
results_answer_subnet = eval.results_answer_subnets

count = 0
costs = []

map = folium.Map(
    zoom_start=12,
    tiles="https://{s}.basemaps.cartocdn.com/rastertiles/voyager/{z}/{x}/{y}{r}.png",
    attr='&copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> contributors &copy; <a href="https://carto.com/attributions">CARTO</a>',
)


d_error_per_budget = defaultdict(dict)
for target, target_results in results_answer_subnet.items():
    try:
        ecs_shortest_ping_vp = target_results["result_per_metric"]["jaccard"][
            "ecs_shortest_ping_vp_per_budget"
        ][50]
        no_ping_vp = target_results["result_per_metric"]["jaccard"]["no_ping_vp"]

    except KeyError:
        continue

    ecs_shortest_ping_vps = target_results["result_per_metric"]["jaccard"]["ecs_vps"]

    d_error = ecs_shortest_ping_vp["d_error"]

    target = target_results["target"]

    try:
        ref_shortest_ping_vp = ref_shortest_ping_results[target["addr"]][
            "ref_shortest_ping_vp"
        ]
    except:
        continue

    diff_error = ref_shortest_ping_vp["d_error"] - d_error

    if diff_error < 5000000000:

        # if target["addr"] != "5.180.228.37":
        #     continue

        # count += 1
        # if count > 100:
        #     break

        # logger.info("Target::")
        # logger.info(f"{target['addr']=}")
        # logger.info(f"{target['subnet']=}")
        # logger.info(f"{target['lat']=}")
        # logger.info(f"{target['lon']=}")

        # logger.info("ECS shortest ping vp::")
        # logger.info(f"{ecs_shortest_ping_vp['addr']=}")
        # logger.info(f"{ecs_shortest_ping_vp['subnet']=}")
        # logger.info(f"{ecs_shortest_ping_vp['lat']=}")
        # logger.info(f"{ecs_shortest_ping_vp['lon']=}")
        # logger.info(f"{ecs_shortest_ping_vp['rtt']=}")
        # logger.info(f"{ecs_shortest_ping_vp['d_error']=}")
        # logger.info(f"{ecs_shortest_ping_vp['score']=}")

        # logger.info("No ping vp::")
        # logger.info(f"{no_ping_vp['addr']=}")
        # logger.info(f"{no_ping_vp['subnet']=}")
        # logger.info(f"{no_ping_vp['lat']=}")
        # logger.info(f"{no_ping_vp['lon']=}")
        # logger.info(f"{no_ping_vp['d_error']=}")
        # logger.info(f"{no_ping_vp['score']=}")

        # logger.info("REF shortest ping vp::")
        # logger.info(f"{ref_shortest_ping_vp['addr']=}")
        # logger.info(f"{ref_shortest_ping_vp['subnet']=}")
        # logger.info(f"{ref_shortest_ping_vp['lat']=}")
        # logger.info(f"{ref_shortest_ping_vp['lon']=}")
        # logger.info(f"{ref_shortest_ping_vp['rtt']=}")
        # logger.info(f"{ref_shortest_ping_vp['d_error']=}")

        count += 1

        r_earth = 6378
        d = d_error
        new_latitude = target["lat"] + (d / r_earth) * (180 / pi)
        new_longitude = target["lon"] + (d / r_earth) * (180 / pi) / cos(
            target["lat"] * pi / 180
        )

        min_lat = target["lat"] - (d / r_earth) * (180 / pi)
        max_lat = target["lat"] + (d / r_earth) * (180 / pi)
        min_lon = target["lon"] - (d / r_earth) * (180 / pi) / cos(
            target["lat"] * pi / 180
        )
        max_lon = target["lon"] + (d / r_earth) * (180 / pi) / cos(
            target["lat"] * pi / 180
        )

        # for vp in vps:
        #     try:
        #         vp_lat, vp_lon, _ = vps_coordinates[vp["addr"]]
        #         plotDot(vp_lat, vp_lon, map, color="grey")

        #     except KeyError as e:
        #         print(e)
        #         continue

        # for vp_addr, _ in ecs_shortest_ping_vps:
        #     try:
        #         vp_lat, vp_lon, _ = vps_coordinates[vp_addr]
        #         plotDot(vp_lat, vp_lon, map, color="cyan", radius=4)

        #     except KeyError as e:
        #         print(e)
        #         continue

        # selected_vp_coordinates = np.array(selected_vp_coordinates, dtype=float)

        plotDot(target["lat"], target["lon"], map, color="red", radius=3)
        # plotDot(
        #     ecs_shortest_ping_vp["lat"], ecs_shortest_ping_vp["lon"], map, color="blue"
        # )

        # plotDot(no_ping_vp["lat"], no_ping_vp["lon"], map, color="pink")
        # plotDot(
        #     ref_shortest_ping_vp["lat"], ref_shortest_ping_vp["lon"], map, color="green"
        # )
        # plotDot(cluster_elected_lat, cluster_elected_lon, map, color="orange")

display(map)

logger.info(f"Nb wrongful geoloc:: {count}")

[32m2024-05-28 07:45:10.497[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m131[0m - [1mNb wrongful geoloc:: 748[0m


In [26]:
granularity = "answer_bgp_prefix"
results_answer_subnet = eval.results_answer_subnets

count = 0
costs = []
d_error_per_budget = defaultdict(dict)
for target, target_results in results_answer_subnet.items():
    try:
        ecs_shortest_ping_vp = target_results["result_per_metric"]["jaccard"][
            "ecs_shortest_ping_vp_per_budget"
        ][50]
        no_ping_vp = target_results["result_per_metric"]["jaccard"]["no_ping_vp"]

    except KeyError:
        continue

    ecs_shortest_ping_vps = target_results["result_per_metric"]["jaccard"]["ecs_vps"]

    d_error = ecs_shortest_ping_vp["d_error"]

    target = target_results["target"]

    try:
        ref_shortest_ping_vp = ref_shortest_ping_results[target["addr"]][
            "ref_shortest_ping_vp"
        ]
    except:
        continue

    diff_error = ref_shortest_ping_vp["d_error"] - d_error

    if diff_error < 50:

        if target["addr"] != "5.180.228.37":
            continue

        logger.info("Target::")
        logger.info(f"{target['addr']=}")
        logger.info(f"{target['subnet']=}")
        logger.info(f"{target['lat']=}")
        logger.info(f"{target['lon']=}")

        logger.info("ECS shortest ping vp::")
        logger.info(f"{ecs_shortest_ping_vp['addr']=}")
        logger.info(f"{ecs_shortest_ping_vp['subnet']=}")
        logger.info(f"{ecs_shortest_ping_vp['lat']=}")
        logger.info(f"{ecs_shortest_ping_vp['lon']=}")
        logger.info(f"{ecs_shortest_ping_vp['rtt']=}")
        logger.info(f"{ecs_shortest_ping_vp['d_error']=}")
        logger.info(f"{ecs_shortest_ping_vp['score']=}")

        logger.info("No ping vp::")
        logger.info(f"{no_ping_vp['addr']=}")
        logger.info(f"{no_ping_vp['subnet']=}")
        logger.info(f"{no_ping_vp['lat']=}")
        logger.info(f"{no_ping_vp['lon']=}")
        logger.info(f"{no_ping_vp['d_error']=}")
        logger.info(f"{no_ping_vp['score']=}")

        logger.info("REF shortest ping vp::")
        logger.info(f"{ref_shortest_ping_vp['addr']=}")
        logger.info(f"{ref_shortest_ping_vp['subnet']=}")
        logger.info(f"{ref_shortest_ping_vp['lat']=}")
        logger.info(f"{ref_shortest_ping_vp['lon']=}")
        logger.info(f"{ref_shortest_ping_vp['rtt']=}")
        logger.info(f"{ref_shortest_ping_vp['d_error']=}")

        count += 1

        r_earth = 6378
        d = d_error
        new_latitude = target["lat"] + (d / r_earth) * (180 / pi)
        new_longitude = target["lon"] + (d / r_earth) * (180 / pi) / cos(
            target["lat"] * pi / 180
        )

        min_lat = target["lat"] - (d / r_earth) * (180 / pi)
        max_lat = target["lat"] + (d / r_earth) * (180 / pi)
        min_lon = target["lon"] - (d / r_earth) * (180 / pi) / cos(
            target["lat"] * pi / 180
        )
        max_lon = target["lon"] + (d / r_earth) * (180 / pi) / cos(
            target["lat"] * pi / 180
        )

        map = folium.Map(
            location=(target["lat"], target["lon"]),
            tiles="https://{s}.basemaps.cartocdn.com/rastertiles/voyager/{z}/{x}/{y}{r}.png",
            attr='&copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> contributors &copy; <a href="https://carto.com/attributions">CARTO</a>',
            zoom_start=6,
            min_lat=min_lat,
            max_lat=max_lat,
            min_lon=min_lon,
            max_lon=max_lon,
        )

        for vp in vps:
            try:
                vp_lat, vp_lon, _ = vps_coordinates[vp["addr"]]
                plotDot(vp_lat, vp_lon, map, color="grey")

            except KeyError as e:
                print(e)
                continue

        for vp_addr, _ in ecs_shortest_ping_vps:
            try:
                vp_lat, vp_lon, _ = vps_coordinates[vp_addr]
                plotDot(vp_lat, vp_lon, map, color="cyan", radius=6)

            except KeyError as e:
                print(e)
                continue

        plotDot(target["lat"], target["lon"], map, color="red", radius=12)

        display(map)

        plotDot(
            ecs_shortest_ping_vp["lat"],
            ecs_shortest_ping_vp["lon"],
            map,
            color="darkblue",
            radius=12,
        )

        display(map)


logger.info(f"Nb wrongful geoloc:: {count}")

[32m2024-05-28 07:54:55.780[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m37[0m - [1mTarget::[0m
[32m2024-05-28 07:54:55.782[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m38[0m - [1mtarget['addr']='5.180.228.37'[0m
[32m2024-05-28 07:54:55.786[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m39[0m - [1mtarget['subnet']='5.180.228.0'[0m
[32m2024-05-28 07:54:55.793[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m40[0m - [1mtarget['lat']=40.4415[0m
[32m2024-05-28 07:54:55.794[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m41[0m - [1mtarget['lon']=-3.6185[0m
[32m2024-05-28 07:54:55.796[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m43[0m - [1mECS shortest ping vp::[0m
[32m2024-05-28 07:54:55.798[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m44[0m - [1mecs_shortest_ping_vp['addr']='185.161.96.30'[0m
[32m2024-05-28 07:54:55.799[0m |

# Wrong geoloc

In [6]:
import numpy as np

from pyasn import pyasn
from collections import defaultdict


from geogiant.common.utils import EvalResults, get_parsed_vps
from geogiant.common.queries import load_vps
from geogiant.common.files_utils import load_pickle
from geogiant.common.settings import PathSettings, ClickhouseSettings

path_settings = PathSettings()
clickhouse_settings = ClickhouseSettings()

asndb = pyasn(str(path_settings.RIB_TABLE))
vps = load_vps(clickhouse_settings.VPS_FILTERED)
vps_per_subnet, vps_coordinates = get_parsed_vps(vps, asndb)

ref_shortest_ping_results = load_pickle(
    path_settings.RESULTS_PATH / "results_ref_shortest_ping.pickle"
)

eval: EvalResults = load_pickle(
    path_settings.RESULTS_PATH
    / "tier5_evaluation/results__best_hostname_geo_score.pickle"
)

[32m2024-05-28 07:19:46.355[0m | [1mINFO    [0m | [36mgeogiant.clickhouse.query[0m:[36mexecute[0m:[36m122[0m - [1mquery=GetVPs table_name=filtered_vps  limit=None[0m


In [7]:
geo_resolver_results = defaultdict(list)
geo_resolver_results_500 = defaultdict(list)

for target_addr in eval.results_answer_subnets:
    try:
        geo_resolver_results[target_addr] = eval.results_answer_subnets[target_addr][
            "result_per_metric"
        ]["jaccard"]["ecs_shortest_ping_vp_per_budget"][50]

    except KeyError:
        continue

In [8]:
print(len(geo_resolver_results))
m_error = round(np.median([vp["d_error"] for vp in geo_resolver_results.values()]), 2)
m_error = round(
    np.median(
        [
            vp["ref_shortest_ping_vp"]["d_error"]
            for vp in ref_shortest_ping_results.values()
        ]
    ),
    2,
)
print(m_error)

751
5.04


In [9]:
wrongly_geolocated_ip_addr = 0
ref_index_above_500 = 0
error_diff_above_50 = 0
first_40_km_vp = []
error_diff = defaultdict(list)
for target, geo_resolver_result in geo_resolver_results.items():

    try:
        ref_result = ref_shortest_ping_results[target]
    except KeyError:
        continue

    ref_shortest_ping_vp = ref_result["ref_shortest_ping_vp"]
    ref_d_error = round(ref_result["ref_shortest_ping_vp"]["d_error"], 2)
    geo_resolver_d_error = round(geo_resolver_result["d_error"], 2)

    if (
        ref_d_error < 40
        and geo_resolver_d_error > 40
        and ref_shortest_ping_vp["score"] > 0
    ):

        print(f"Target addr:: {target}")
        print(f"Ref addr:: {ref_shortest_ping_vp['addr']}")
        print(f"Ref d_error:: {ref_d_error}")
        print(f"Ref rtt:: {ref_shortest_ping_vp['rtt']}")
        print(f"Ref score:: {ref_shortest_ping_vp['score']}")
        print(f"Ref score index:: {ref_shortest_ping_vp['index']}")
        print(f"Geo resolver addr:: {geo_resolver_result['addr']}")
        print(f"Geo resolver rtt:: {geo_resolver_result['rtt']}")
        print(f"Geo resolver d_error:: {geo_resolver_d_error}")
        print(f"Geo resolver score:: {geo_resolver_result['score']}")
        print(f"Geo resolver score index:: {geo_resolver_result['index']}")
        print(f"Geo resolver addr 500:: {geo_resolver_results_500[target]['addr']}")
        print(f"Geo resolver rtt 500:: {geo_resolver_results_500[target]['rtt']}")
        print(
            f"Geo resolver d_error 500:: {geo_resolver_results_500[target]['d_error']}"
        )
        print(f"Geo resolver score 500:: {geo_resolver_results_500[target]['score']}")
        print(
            f"Geo resolver score index 500:: {geo_resolver_results_500[target]['index']}"
        )

        first_40_km_vp.append(geo_resolver_results_500[target]["index"])

        for probing_budget, geo_resolver_results in eval.results_answer_subnets[
            target_addr
        ]["result_per_metric"]["jaccard"]["ecs_shortest_ping_vp_per_budget"].items():

            error_dif[probing].append(geo_resolver_d_error - ref_d_error)

            if geo_resolver_d_error - ref_d_error > 50:
                error_diff_above_50 += 1

            print(f"Error Dif:: {abs(ref_d_error - geo_resolver_d_error)}")

            if target == "66.255.252.225":
                print(
                    eval.results_answer_subnets[target]["result_per_metric"]["jaccard"][
                        "ecs_scores"
                    ]
                )
        print()
        wrongly_geolocated_ip_addr += 1

        if ref_shortest_ping_vp["index"] == -1:
            ref_index_above_500 += 1

print(wrongly_geolocated_ip_addr)
print(wrongly_geolocated_ip_addr / len(ref_shortest_ping_results) * 100)
print(ref_index_above_500 / wrongly_geolocated_ip_addr * 100)
print(error_diff_above_50 / wrongly_geolocated_ip_addr * 100)

Target addr:: 5.57.16.65
Ref addr:: 37.10.44.14
Ref d_error:: 0.98
Ref rtt:: 0.383938
Ref score:: 0.613779674243248
Ref score index:: 493
Geo resolver addr:: 90.223.193.1
Geo resolver rtt:: 1.210121
Geo resolver d_error:: 42.84
Geo resolver score:: 0.6633335070651352
Geo resolver score index:: 171


TypeError: list indices must be integers or slices, not str

In [None]:
from geogiant.evaluation.plot import ecdf, plot_cdf

x, y = ecdf(error_diff)

plot_cdf(
    x=x,
    y=y,
    output_path="error_diff_wrongly_geolocated",
    x_label="Geolocation error difference (km)",
    y_label="CDF of targets",
)

No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.


In [None]:
x, y = ecdf(first_40_km_vp)

plot_cdf(
    x=x,
    y=y,
    output_path="first_city_level_vp_index",
    x_label="VP index",
    y_label="CDF of targets",
)

No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
