## stack overflow intersection circles method

In [20]:
'''
FINDING THE INTERSECTION COORDINATES (LAT/LON) OF TWO CIRCLES (GIVEN THE COORDINATES OF THE CENTER AND THE RADII)

Many thanks to Ture Pålsson who directed me to the right source, the code below is based on whuber's brilliant logic and
explanation here https://gis.stackexchange.com/questions/48937/calculating-intersection-of-two-circles 

The idea is that;
  1. The points in question are the mutual intersections of three spheres: a sphere centered beneath location x1 (on the 
  earth's surface) of a given radius, a sphere centered beneath location x2 (on the earth's surface) of a given radius, and
  the earth itself, which is a sphere centered at O = (0,0,0) of a given radius.
  2. The intersection of each of the first two spheres with the earth's surface is a circle, which defines two planes.
  The mutual intersections of all three spheres therefore lies on the intersection of those two planes: a line.
  Consequently, the problem is reduced to intersecting a line with a sphere.

Note that "Decimal" is used to have higher precision which is important if the distance between two points are a few
meters.
'''
from decimal import Decimal
from math import cos, sin, sqrt
import math
import numpy as np

def intersection(p1, r1_meter, p2, r2_meter):
    # p1 = Coordinates of Point 1: latitude, longitude. This serves as the center of circle 1. Ex: (36.110174,  -90.953524)
    # r1_meter = Radius of circle 1 in meters
    # p2 = Coordinates of Point 2: latitude, longitude. This serves as the center of circle 1. Ex: (36.110174,  -90.953524)
    # r2_meter = Radius of circle 2 in meters
    '''
    1. Convert (lat, lon) to (x,y,z) geocentric coordinates.
    As usual, because we may choose units of measurement in which the earth has a unit radius
    '''
    x_p1 = Decimal(cos(math.radians(p1[1]))*cos(math.radians(p1[0])))  # x = cos(lon)*cos(lat)
    y_p1 = Decimal(sin(math.radians(p1[1]))*cos(math.radians(p1[0])))  # y = sin(lon)*cos(lat)
    z_p1 = Decimal(sin(math.radians(p1[0])))                           # z = sin(lat)
    x1 = (x_p1, y_p1, z_p1)

    x_p2 = Decimal(cos(math.radians(p2[1]))*cos(math.radians(p2[0])))  # x = cos(lon)*cos(lat)
    y_p2 = Decimal(sin(math.radians(p2[1]))*cos(math.radians(p2[0])))  # y = sin(lon)*cos(lat)
    z_p2 = Decimal(sin(math.radians(p2[0])))                           # z = sin(lat)
    x2 = (x_p2, y_p2, z_p2)
    '''
    2. Convert the radii r1 and r2 (which are measured along the sphere) to angles along the sphere.
    By definition, one nautical mile (NM) is 1/60 degree of arc (which is pi/180 * 1/60 = 0.0002908888 radians).
    '''
    r1 = Decimal(math.radians((r1_meter/1852) / 60)) # r1_meter/1852 converts meter to Nautical mile.
    r2 = Decimal(math.radians((r2_meter/1852) / 60))
    '''
    3. The geodesic circle of radius r1 around x1 is the intersection of the earth's surface with an Euclidean sphere
    of radius sin(r1) centered at cos(r1)*x1.

    4. The plane determined by the intersection of the sphere of radius sin(r1) around cos(r1)*x1 and the earth's surface
    is perpendicular to x1 and passes through the point cos(r1)x1, whence its equation is x.x1 = cos(r1)
    (the "." represents the usual dot product); likewise for the other plane. There will be a unique point x0 on the
    intersection of those two planes that is a linear combination of x1 and x2. Writing x0 = ax1 + b*x2 the two planar
    equations are;
       cos(r1) = x.x1 = (a*x1 + b*x2).x1 = a + b*(x2.x1)
       cos(r2) = x.x2 = (a*x1 + b*x2).x2 = a*(x1.x2) + b
    Using the fact that x2.x1 = x1.x2, which I shall write as q, the solution (if it exists) is given by
       a = (cos(r1) - cos(r2)*q) / (1 - q^2),
       b = (cos(r2) - cos(r1)*q) / (1 - q^2).
    '''
    q = Decimal(np.dot(x1, x2))

    if q**2 != 1 :
        a = (Decimal(cos(r1)) - Decimal(cos(r2))*q) / (1 - q**2)
        b = (Decimal(cos(r2)) - Decimal(cos(r1))*q) / (1 - q**2)
        '''
        5. Now all other points on the line of intersection of the two planes differ from x0 by some multiple of a vector
        n which is mutually perpendicular to both planes. The cross product  n = x1~Cross~x2  does the job provided n is 
        nonzero: once again, this means that x1 and x2 are neither coincident nor diametrically opposite. (We need to 
        take care to compute the cross product with high precision, because it involves subtractions with a lot of
        cancellation when x1 and x2 are close to each other.)
        '''
        n = np.cross(x1, x2)
        '''
        6. Therefore, we seek up to two points of the form x0 + t*n which lie on the earth's surface: that is, their length
        equals 1. Equivalently, their squared length is 1:  
        1 = squared length = (x0 + t*n).(x0 + t*n) = x0.x0 + 2t*x0.n + t^2*n.n = x0.x0 + t^2*n.n
        '''
        x0_1 = [a*f for f in x1]
        x0_2 = [b*f for f in x2]
        x0 = [sum(f) for f in zip(x0_1, x0_2)]
        '''
          The term with x0.n disappears because x0 (being a linear combination of x1 and x2) is perpendicular to n.
          The two solutions easily are   t = sqrt((1 - x0.x0)/n.n)    and its negative. Once again high precision
          is called for, because when x1 and x2 are close, x0.x0 is very close to 1, leading to some loss of
          floating point precision.
        '''
        if (np.dot(x0, x0) <= 1) & (np.dot(n,n) != 0): # This is to secure that (1 - np.dot(x0, x0)) / np.dot(n,n) > 0
            t = Decimal(sqrt((1 - np.dot(x0, x0)) / np.dot(n,n)))
            t1 = t
            t2 = -t

            i1 = x0 + t1*n
            i2 = x0 + t2*n
            '''
            7. Finally, we may convert these solutions back to (lat, lon) by converting geocentric (x,y,z) to geographic
            coordinates. For the longitude, use the generalized arctangent returning values in the range -180 to 180
            degrees (in computing applications, this function takes both x and y as arguments rather than just the
            ratio y/x; it is sometimes called "ATan2").
            '''

            i1_lat = math.degrees( math.asin(i1[2]))
            i1_lon = math.degrees( math.atan2(i1[1], i1[0] ) )
            ip1 = (i1_lat, i1_lon)

            i2_lat = math.degrees( math.asin(i2[2]))
            i2_lon = math.degrees( math.atan2(i2[1], i2[0] ) )
            ip2 = (i2_lat, i2_lon)
            return [ip1, ip2]
        elif (np.dot(n,n) == 0):
            return("The centers of the circles can be neither the same point nor antipodal points.")
        else:
            return("The circles do not intersect")
    else:
        return("The centers of the circles can be neither the same point nor antipodal points.")

'''
Example: the output of below is  [(36.989311051533505, -88.15142628069133), (38.2383796094578, -92.39048549120287)]
         intersection_points = intersection((37.673442, -90.234036), 107.5*1852, (36.109997, -90.953669), 145*1852)
         print(intersection_points)
'''


'\nExample: the output of below is  [(36.989311051533505, -88.15142628069133), (38.2383796094578, -92.39048549120287)]\n         intersection_points = intersection((37.673442, -90.234036), 107.5*1852, (36.109997, -90.953669), 145*1852)\n         print(intersection_points)\n'

In [2]:
def polygon_area(points):
    poly_area = 0

    count = len(points)
    j = count - 1

    if count < 3:
        return None

    for i in range(0, count):
        p1_x, p1_y = points[i]
        p2_x, p2_y = points[j]

        poly_area += p1_x * p2_y
        poly_area -= p1_y * p2_x
        j = i

    poly_area /= 2
    if np.isnan(poly_area):
        return None

    return abs(poly_area)


def polygon_centroid(points):
    f_total = 0
    x_total = 0
    y_total = 0

    count = len(points)
    j = count - 1

    if count < 3:
        return None

    for i in range(0, count):
        p1_x, p1_y = points[i]
        p2_x, p2_y = points[j]

        f_total = p1_x * p2_y - p2_x * p1_y
        x_total += (p1_x + p2_x) * f_total
        y_total += (p1_y + p2_y) * f_total
        j = i

    six_area = polygon_area(points) * 6
    if six_area is None:
        return None

    print(six_area)

    return x_total / six_area, y_total / six_area, six_area


def polygon_centroid_2023(points):
    """
    Compute polygon centroid using Finit Set of point method.
    (see https://en.wikipedia.org/wiki/Centroid#Of_a_finite_set_of_points)
    """
    x = 0
    y = 0
    for point in points:
        x += point[0]
        y += point[1]
    return x / len(points), y / len(points)

In [10]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from math import radians, cos, sin, asin, sqrt, pi
from geoloc_imc_2023.helpers import circle_intersections, rtt_to_km

probe_circles = {}
probe_circles["a"] = (48.68559000514443, 1.4890742633310852, 4.5, None, None)
probe_circles["b"] = (49.386153229694834, 2.2976143160251232, 4.5, None, None)
probe_circles["c"] = (48.86168126136162, 2.916765060171397, 4.5, None, None)

test_circles = {}
for key, (lat, lon, rtt, _, _) in probe_circles.items():
    d = rtt_to_km(rtt,  4/9)
    test_circles[key] = (lat, lon, d)

# CONCLUSION: if we take one working example and use it for calculation
# results differs from reality
# noethless, program seems to work otherwise

intersections = circle_intersections(probe_circles.values(), speed_threshold=4/9)

centroid = polygon_centroid_2023(intersections)

print("intersecion:", intersections)
print("centroid:", centroid)
print("polygon:")

df_circles = pd.DataFrame({
    'Latitude': np.array([lat_long[0] for lat_long in test_circles.values()]),
    'Longitude': np.array([lat_long[1] for lat_long in test_circles.values()]),
    'Radius': np.array([lat_long[2] for lat_long in test_circles.values()]),
})

fig_map3 = px.scatter_mapbox(df_circles['Radius'], lon=df_circles['Longitude'], lat=df_circles['Latitude'],
                             hover_name='Radius', zoom=9, width=300, height=500)

# # parameters
N = 360 # number of discrete sample points to be generated along the circle

# generate points
circles = []
for i, (index, row) in enumerate(df_circles.iterrows()):

    circle_lats, circle_lons = [], []

    lat = df_circles['Latitude'][i]
    lon = df_circles['Longitude'][i]
    r = df_circles['Radius'][i]

    print(lat, lon, r)

    # test getting point on circle in case there is no intersection
    if i == 0:
        single_circle_lats, single_circle_lons = [], []
        nb_points = 4
        for k in range(nb_points):
            # compute
            angle = pi*2*k/nb_points
            dx = r*1000*cos(angle)
            dy = r*1000*sin(angle)
            single_circle_lats.append(lat + (180/pi)*(dy/6378137))
            single_circle_lons.append(lon + (180/pi)*(dx/6378137)/cos(lat*pi/180))

        fig_map3.add_trace(go.Scattermapbox(
                lat=single_circle_lats,
                lon=single_circle_lons,
                marker=go.scattermapbox.Marker(
                    size=10, color="Red"
                ),
        ))

    for k in range(N):
        # compute
        angle = pi*2*k/N
        dx = r*1000*cos(angle)
        dy = r*1000*sin(angle)
        circle_lats.append(lat + (180/pi)*(dy/6378137))
        circle_lons.append(lon + (180/pi)*(dx/6378137)/cos(lat*pi/180))

    circle_lats.append(circle_lats[0])
    circle_lons.append(circle_lons[0])

    fig_map3.add_trace(go.Scattermapbox(
        lat=circle_lats,
        lon=circle_lons,
        mode='lines',
        marker=go.scattermapbox.Marker(
            size=1, color="BlueViolet"
        ),
    ))

# add calculated intersections
print("calculated intersections:")
for lat, lon in intersections:
    print(lat, lon)

fig_map3.add_trace(go.Scattermapbox(
    lat=[int[0] for int in intersections],
    lon=[int[1] for int in intersections],
    fill="toself"
))

print("calculated centroid:", centroid[0], centroid[1])
fig_map3.add_trace(go.Scattermapbox(
    lat=[centroid[0]],
    lon=[centroid[1]],
    marker=go.scattermapbox.Marker(
        size=10, color="Green"
    ),
))

fig_map3.update_layout(mapbox_style='open-street-map', margin={'r':0, 't':0, 'l':0, 'b':0}, width=500)
fig_map3.show()

intersecion: [(54.047616086019005, 0.5197296691916815), (45.62117314446898, 8.025479402389852), (45.66299922539507, -3.4922106409963742)]
centroid: (48.44392948529435, 1.684332810195053)
polygon:
48.68559000514443 1.4890742633310852 600.0
49.386153229694834 2.2976143160251232 600.0
48.86168126136162 2.916765060171397 600.0
calculated intersections:
54.047616086019005 0.5197296691916815
45.62117314446898 8.025479402389852
45.66299922539507 -3.4922106409963742
calculated centroid: 48.44392948529435 1.684332810195053


## improve post requests efficiency

In [23]:
import os
import arrow
from json.decoder import JSONDecodeError
import logging
import ujson as json
import math
from collections import defaultdict
from requests_futures.sessions import FuturesSession


def worker_task(resp, *args, **kwargs):
    """Process json in background"""
    try:
        resp.data = resp.json()
    except JSONDecodeError:
        logging.error("Error while reading Atlas json data.\n")
        resp.data = {}


class Disconnect():
    def __init__(self, start=None, end=None, streamnames=None, af=4, session=None,
                 cache=True, cache_dir="cache/",
                 url='https://ihr.iijlab.net/ihr/api/disco/events/',
                 nb_threads=2):
        """
        :originasn: Origin ASN of interest. It can be a list of ASNs or a single
        int value. Set to 0 for global hegemony.
        :start: Start date/time.
        :end: End date/time.
        :asn: Return dependency only to the given ASNs. By default return all
        dependencies.
        :af: Adress family, default is IPv4
        :session: Requests session to use
        :page: Page number for paginated results.
        :cache: Set to False to ignore cache
        :cache_dir: Directory used for cached results.
        :url: API root url
        :nb_threads: Maximum number of parallel downloads
        Notes: By default results are cached on disk.
        """


        if isinstance(streamnames, int) or isinstance(streamnames, str):
            streamnames = [streamnames]
        elif streamnames is None:
            streamnames = [None]

        self.streamnames = set(streamnames)
        self.start = start
        self.end = end
        self.af = af
        self.session = session
        self.cache = cache
        if session is None:
            self.session = FuturesSession(max_workers=nb_threads)
        else:
            self.session = session

        self.url = url
        self.cache_dir = cache_dir
        if not os.path.exists(cache_dir):
            os.mkdir(cache_dir)
        self.params = {}
        self.queries = defaultdict(list)

    def query_api(self, streamname, page):
        """Single API query. Don't call this method, use get_results instead."""

        params = dict(
            starttime__gte=arrow.get(self.start),
            endtime__lte=arrow.get(self.end),
            af=self.af,
            page=page,
            format="json"
        )

        if streamname is not None:
            params["streamname"] = streamname

        logging.info("query results for {}, page={}".format(streamname, page))
        self.params = params
        return self.session.get(
            url=self.url, params=params,
            hooks={'response': worker_task, }
        )

    def get_results(self):
        """Fetch AS dependencies (aka AS hegemony) results.
        Return AS dependencies for the given origin AS between the start and
        end dates.
        :returns: Dictionary of AS dependencies.
        """

        # Main loop
        queries = {}

        # Query the API
        for streamname in self.streamnames:
            # Skip the query if we have the corresponding cache
            cache_fname = "{}/Disconnect_start{}_end{}_streamname{}_af{}.json".format(
                self.cache_dir, self.start, self.end, streamname, self.af)
            if self.cache and os.path.exists(cache_fname):
                continue
            queries[streamname] = self.query_api(streamname, 1)

## measurements results

In [None]:
from random import randint
from ipaddress import IPv4Address, IPv4Network
from geoloc_imc_2023.atlas_probing import RIPEAtlas
from collections import defaultdict
from datetime import datetime

RIPE_ACCOUNT = "timur.friedman@sorbonne-universite.fr"
RIPE_KEY = "b3d3d4fc-724e-4505-befe-1ad16a70dc87"
MAX_NUMBER_OF_VPS = 100
NB_PACKETS = 3

import pickle
from pathlib import Path

in_file = Path(".") / "../datasets/parsed_hitlist.pickle"
with open(in_file, "rb") as f:
    ip_hitlist = pickle.load(f)

anchor_file = "../datasets/anchors.pickle"
with open(anchor_file, "rb") as f:
    anchors = pickle.load(f)

def get_target_hitlist(anchor_ip: str, hitlist_size: int = 3) -> list[str]:
    """from ip, return a list of target ips"""
    prefix_anchor = anchor_ip.split(".")[:-1]
    prefix_anchor.append("0")
    prefix_anchor = ".".join(prefix_anchor)
    ip_list = []
    try:
        ip_list = ip_hitlist[prefix_anchor]
    except KeyError:
        pass

    if len(ip_list) < hitlist_size:
        ip = anchor_ip.split(".")[:-1]
        ip.extend('0')
        ip = ".".join(ip)
        prefix_target = IPv4Network(ip + '/' + str(24))

        ip_list.extend([str(prefix_target[randint(1,254)]) for _ in range(0,hitlist_size - len(ip_list))])
    if len(ip_list) > hitlist_size:
        ip_list = ip_list[:hitlist_size]
    return ip_list


driver = RIPEAtlas(RIPE_ACCOUNT, RIPE_KEY)
measurements = defaultdict(dict)
dry_run = False
for i , ip_target in enumerate(anchors):

    if i > 10: break

    # get ip_target_list
    random_target_list = get_target_hitlist(ip_target, 3)
    # add the real ip_target to verify that geoloc prefixes is valid assumption
    random_target_list.append(ip_target)

    # get vp ids
    vp_ids = [anchors[ip_vp]['id'] for ip_vp in anchors if ip_vp != ip_target]
    assert len(vp_ids) == len(anchors) - 1
    # select a number of vps (testing)
    vp_ids = vp_ids[:MAX_NUMBER_OF_VPS]

    print(f"starting measurement for {ip_target=} with {[ip for ip in random_target_list]} with {len(vp_ids)} anchors")    

    ip_target_measurement_ids = []
    for ip in random_target_list:
        if dry_run:
            print(f"measurement for {ip}")
            continue
        else:
            # TODO: parralelize post requests otherwise it takes too much time
            # probe and retreive measurements
            measurement_id = driver.probe(str(ip), vp_ids, NB_PACKETS)
            try:
                measurements[ip_target]["id"].append(measurement_id)
            except KeyError:
                measurements[ip_target]["id"] = [measurement_id]

for target_addr in measurements:
    print(f"target ip : {target_addr} : {[id for id in measurements[target_addr]['id']]}")

In [None]:
import requests
import time
from typing import List
import pickle
from collections import defaultdict

# save results
in_file = f"../results/measurement_ids.pickle"
with open(in_file, "rb") as f:
    measurements = pickle.load(f)

anchor_file = "../datasets/anchors.pickle"
with open(anchor_file, "rb") as f:
    anchors = pickle.load(f)

print(f"retreive measurements for {len(measurements)} ip addresses")

key = "b3d3d4fc-724e-4505-befe-1ad16a70dc87"
for i, target_addr in enumerate(measurements):

    measurements[target_addr]["target_results"] = []
    measurements[target_addr]["prefix_results"] = defaultdict(list)

    for measurement_id in measurements[target_addr]["id"]:
        url = f"https://atlas.ripe.net/api/v2/measurements/{measurement_id}/results/?key={key}"

        timeout = 60
        for _ in range(timeout):

            response = requests.get(url, timeout=20).json()
            if response:
                break
            time.sleep(2)

        for result in response:
            # parse results and calculate geoloc
            if result.get('result') is not None:
                
                dst_addr = result['dst_addr']
                vp_ip = result['from']

                if type(result['result']) == list:
                    rtt_list = [list(rtt.values())[0] for rtt in result['result']]
                else:
                    rtt_list = [result['result']["rtt"]]

                # remove stars from results
                rtt_list = list(filter(lambda x: x != "*", rtt_list))
                if not rtt_list: 
                    continue
                
                # get min rtt
                min_rtt = min(rtt_list)

                if min_rtt == "*": continue

                # both vp and target coordinates
                vp_lat = anchors[vp_ip]['latitude']
                vp_lon = anchors[vp_ip]['longitude']

                if dst_addr == target_addr:
                    measurements[target_addr]["target_results"].append({
                        "node": vp_ip,
                        "min_rtt": min_rtt,
                        "rtt_list": rtt_list,
                        "vp_lat": vp_lat,
                        "vp_lon": vp_lon,
                    })
                else: 
                    measurements[target_addr]["prefix_results"][dst_addr].append({
                        "node": vp_ip,
                        "min_rtt": min_rtt,
                        "rtt_list": rtt_list,
                        "vp_lat": vp_lat,
                        "vp_lon": vp_lon,
                    })
            else:
                print(f"no results: {result}")
    
    # sort delay
    measurements[target_addr]["target_results"] = sorted(measurements[target_addr]["target_results"], key = lambda x: x["min_rtt"])

    for dst_addr in  measurements[target_addr]["prefix_results"]:

        try:
            measurements[target_addr]["prefix_results"][dst_addr] = sorted(
                measurements[target_addr]["prefix_results"][dst_addr], 
                key = lambda x: x["min_rtt"]
            )
        except TypeError:
            raise RuntimeError(measurements[target_addr]["prefix_results"][dst_addr])
    


In [None]:
import pickle

for i, ip_target in enumerate(measurements):
    if i > 10: break
    print(f"results for ip: {ip_target}:")
    print(f"len {len(measurements[ip_target]['target_results'])}")
    for result in measurements[ip_target]["target_results"]:
        print("ip result", result)
    for ip_dst in measurements[ip_target]["prefix_results"]:
        print("prefix results:",measurements[ip_target]["prefix_results"][ip_dst])
    
# save results
date = datetime.now()
out_file = f"../results/all_anchors_to_all_anchors_test.pickle"
print(out_file)
with open(out_file, "wb") as f:
    pickle.dump(measurements,f)

In [3]:
import matplotlib.pyplot as plt
import seaborn as sns

penguins = sns.load_dataset('penguins')

fig, (ax1, ax2, ax3) = plt.subplots(ncols=3, figsize=(15, 4))

sns.ecdfplot(data=penguins, x="bill_length_mm", hue="species", ax=ax1)
ax1.set_title('Default')

URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:997)>