In [None]:
import pyasn
from env_project import ANALYZABLE_FILE_PATH, IP_TO_ASN_FILE_PATH, POPULATION_CITY_FILE_PATH

from clickhouse_driver import Client
from plot_utils.plot import plot_multiple_cdf, plot_scatter, plot_save, plot_scatter_multiple
import math
from scipy.stats import pearsonr
from geoloc_earth import get_points_in_poly, plot_circles_and_points
import statistics
import seaborn as sns
from matplotlib import pyplot as plt
import numpy as np
from pprint import pprint
import ujson as json
from helpers import haversine, rtt_to_km, is_within_cirle, polygon_centroid, circle_intersections, distance
import sys
sys.path.insert(0, './geoloc-imc-2023/geoloc_imc_2023')

## CBG evaluation

In [None]:
def cbg_evaluation(data):
    bad = 0
    good = 0
    good_23_only = 0
    empty_vps = 0
    not_empty_vps = 0
    targeted_traceroutes = 0
    not_targeted_traceroutes = 0
    vps_not_working = []
    for _, d in data.items():
        if d['tier1:done']:
            good += 1
        else:
            bad += 1
            points = get_points_in_poly(d['vps'], 36, 5, 4/9)
            if len(points) != 0:
                print(len(points))
            tmp_vps = []
            for vp in d['vps']:
                tmp_vps.append((vp[0], vp[1], vp[2], None, None))
            points = get_points_in_poly(tmp_vps, 36, 5, 2/3)
            if len(points) != 0:
                good_23_only += 1
            else:
                if len(d['vps']) > 0:
                    not_empty_vps += 1
                    vps_not_working.append(d['target_ip'])
                else:
                    empty_vps += 1
                    client = Client('127.0.0.1')
                    tmp_row = client.execute(
                        f'select src_addr, rtt, tstamp from bgp_interdomain_te.street_lvl_traceroutes where dst_addr = \'{d["target_ip"]}\'')
                    if len(tmp_row) != 0:
                        targeted_traceroutes += 1
                        print(f"{d['target_ip']} was targeted by traceroutes")
                    else:
                        not_targeted_traceroutes += 1

    print(vps_not_working)
    print(f"{bad} no intersection out or {bad+good} = {bad/(bad+good)}")
    print(
        f"If the speed where to be 2/3 CBG would have worked for {good_23_only} more targets")
    print(f"{empty_vps} no vp, {not_empty_vps} some vps")
    print(
        f"When no vp found {targeted_traceroutes} target had a traceroute dedecated to it and {not_targeted_traceroutes} did not")

    position_in = 0
    position_out = 0
    would_be_in = 0
    for _, d in data.items():
        if not d['tier1:done']:
            continue
        all_in = True
        candidate_geo = (d['lat_c'], d['lon_c'])
        for vp in d['vps']:
            if not is_within_cirle((vp[0], vp[1]), vp[2], candidate_geo, speed_threshold=4/9):
                all_in = False
        if all_in:
            position_in += 1
        else:
            position_out += 1
            all_in = True
            for vp in d['vps']:
                if not is_within_cirle((vp[0], vp[1]), vp[2], candidate_geo, speed_threshold=2/3):
                    all_in = False
            if all_in:
                would_be_in += 1
            else:
                print(f"{d['target_ip']} is always outside the CBG area")

    print(f"the target was in the CBG area {position_in} times")
    print(f"the target was out of the CBG area {position_out} times")
    print(f"CBG failed {position_out*100/(position_in+position_out)}%")
    print(
        f"If we would use 2/3 {would_be_in} extra targets would be in the CBG area")

## Success rate

In [None]:



def success_rate(data):
    feilds_count = {'tier1:done': 0, 'tier2:done': 0, 'tier3:done': 0}
    for _, d in data.items():
        for feild in feilds_count:
            if d[feild]:
                feilds_count[feild] += 1
    print(f"{len(data)} Total targets done")
    for k, v in feilds_count.items():
        print(f"{v} {k}")

    dict_reasons = {
        'tier2_failed_because_no_zipcodes': 0,
        'tier2_failed_because_no_landmark': 0,
        'tier2_failed_because_no_valid_traceroute': 0,
        'tier2_failed_because_other': 0,
        'tier3_failed_because_no_zipcodes': 0,
        'tier3_failed_because_no_landmark': 0,
        'tier3_failed_because_no_valid_traceroute': 0,
        'tier3_failed_because_other': 0
    }

    for _, d in data.items():
        if not d['tier1:done']:  # Here you should analyse tier1
            continue
        if not d['tier2:done']:
            if d['tier2:inspected_points_count'] == 0:
                dict_reasons['tier2_failed_because_no_zipcodes'] += 1
                continue
            if d['tier2:landmark_count'] == 0:
                dict_reasons['tier2_failed_because_no_landmark'] += 1
                continue
            one_traceroute_found = False
            for t in d['tier2:traceroutes']:
                if t[4] > 0:
                    one_traceroute_found = True
                    break
            if not one_traceroute_found:
                dict_reasons['tier2_failed_because_no_valid_traceroute'] += 1
                continue
            dict_reasons['tier2_failed_because_other'] += 1
            continue
        if not d['tier3:done']:
            if d['tier3:inspected_points_count'] == 0:
                dict_reasons['tier3_failed_because_no_zipcodes'] += 1
                # if d['target_ip'] not in ['185.28.221.65', '46.183.219.225']:
                #    print(d['target_ip'])
                #    exit()
                continue
            if d['tier3:landmark_count'] == 0:
                dict_reasons['tier3_failed_because_no_landmark'] += 1
                continue
            one_traceroute_found = False
            for t in d['tier3:traceroutes']:
                if t[4] > 0:
                    one_traceroute_found = True
                    break
            if not one_traceroute_found:
                dict_reasons['tier3_failed_because_no_valid_traceroute'] += 1
                continue
            dict_reasons['tier3_failed_because_other'] += 1
            continue

    for k, v in dict_reasons.items():
        print(f"{k} {v}")

## API calls count

In [None]:
def api_calles_count(data):
    zipcodes_counts = []
    landmarks_counts = []
    traceroutes_counts = []
    for _, d in data.items():
        zipcodes_count = 0
        landmarks_count = 0
        traceroutes_count = 0

        for f in ['tier2:inspected_points_count', 'tier3:inspected_points_count']:
            if f in d:
                zipcodes_count += d[f]
        if zipcodes_count != 0:
            zipcodes_counts.append(zipcodes_count)

        for f in ["tier2:failed_dns_count", "tier2:failed_asn_count", "tier2:cdn_count", "tier2:non_cdn_count", "tier3:failed_dns_count", "tier3:failed_asn_count", "tier3:cdn_count", "tier3:non_cdn_count"]:
            if f in d:
                landmarks_count += d[f]
        if landmarks_count != 0:
            landmarks_counts.append(landmarks_count)

        for f in ['tier2:traceroutes', 'tier3:traceroutes']:
            if f in d:
                traceroutes_count += len(d[f])
        if traceroutes_count != 0:
            traceroutes_counts.append(traceroutes_count)

    print(f"{np.median(zipcodes_counts)} Zipcode to check (median)")
    print(f"{np.median(traceroutes_counts)} traceroutes to check (median)")
    print(f"{np.median(landmarks_counts)} landmarks to check (median)")

    total = 0
    for zip in zipcodes_counts:
        total += zip
    print(f"{total} Overpass queries")
    total = 0
    for x in landmarks_counts:
        total += x
    print(f"{total} landmarks verification")
    total = 0
    for x in traceroutes_counts:
        total += x
    print(f"{total} traceroutes")

## Correlation same network

In [None]:
def correlation_same_network(data):
    asn_coef_lst = []
    bgp_coef_lst = []
    asndb = pyasn.pyasn(IP_TO_ASN_FILE_PATH)
    bgp_prefixes = get_all_bgp_prefixes()
    for _, d in data.items():
        same_bgp_x = []
        same_bgp_y = []
        same_asn_x = []
        same_asn_y = []
        for f in ['tier2:traceroutes', 'tier3:traceroutes']:
            if f in d:
                for t in d[f]:
                    if t[4] < 0:
                        continue
                    distance = haversine(
                        (t[5], t[6]), (d['lat_c'], d['lon_c']))
                    ipt = t[1]
                    ipl = t[2]
                    asnt = asndb.lookup(ipt)[0]
                    asnl = asndb.lookup(ipl)[0]
                    if asnl != None and asnt != None:
                        if asnt == asnl and distance not in same_asn_y:
                            same_asn_y.append(distance)
                            same_asn_x.append(t[4])

                    if is_same_bgp_prefix(ipt, ipl, bgp_prefixes):
                        if distance not in same_bgp_y:
                            same_bgp_y.append(distance)
                            same_bgp_x.append(t[4])
        if len(same_asn_x) > 1:
            correlation = pearsonr(same_asn_x, same_asn_y)[0]
            asn_coef_lst.append(correlation)
        if len(same_bgp_x) > 1:
            correlation = pearsonr(same_bgp_x, same_bgp_y)[0]
            bgp_coef_lst.append(correlation)

    print(f"{len(asn_coef_lst)} targets with correlation asn")
    print(f"{len(bgp_coef_lst)} targets with correlation bgp")
    print(f"{np.median(bgp_coef_lst)} median bgp correlation")
    print(f"{np.median(asn_coef_lst)} median asn correlation")

## ping

In [None]:
def ping_go_do(data):
    res_dct = {}
    res = []
    for _, d in data.items():
        for f in ['tier2:landmarks', 'tier3:landmarks']:
            target_geo = (d['lat_c'], d['lon_c'])
            if f in d:
                for l in d[f]:
                    landmark_geo = (l[2], l[3])
                    distance = haversine(target_geo, landmark_geo)
                    if distance <= 40:
                        res_dct[(d['target_ip'], l[0])] = float(distance)
    print(len(res_dct))
    for k, v in res_dct.items():
        res.append({'target_ip': k[0], 'landmark_ip': k[1], 'distance': v})

    with open("ping_todo.json", 'w') as outfile:
        json.dump(res, outfile)

    return res
