# Print tables

Print all the tables of the replication paper  
To do after analysis/million_scale.ipynb

In [1]:
import pyasn

from ipaddress import ip_network
from clickhouse_driver import Client

from scripts.utils.file_utils import load_json
from scripts.utils.clickhouse import Clickhouse
from scripts.analysis.analysis import get_all_bgp_prefixes, is_same_bgp_prefix, every_tier_result_and_errors
from scripts.utils.helpers import haversine
from default import IP_TO_ASN_FILE, ANALYZABLE_FILE, ROUND_BASED_ALGORITHM_FILE, TARGET_TO_LANDMARKS_PING_TABLE

## Measurement overhead

### Figure 3.c of the replication paper

In [2]:
round_based_algorithm_results = load_json(ROUND_BASED_ALGORITHM_FILE)

round_based_algorithm_results = {
int(x): round_based_algorithm_results[x] for x in round_based_algorithm_results}

In [3]:
for tier1_vps, results in sorted(round_based_algorithm_results.items()):
        tier1_vps = int(tier1_vps)
        n_vps_cdf = [r[2] + tier1_vps for r in results if r[2] is not None]
        print(tier1_vps, 3 * sum(n_vps_cdf))

10 5785182
100 4459050
300 3205290
500 2800245
1000 2817933


## Number of landmarks within a certain radius

### Figure 5.b of the replication paper

In [3]:
data = load_json(ANALYZABLE_FILE)

valid_landmarks_count = 0
unvalid_landmarks_count = 0
same_asn_lst = []
same_24_lst = []
same_bgp_lst = []
distances_to_landmarks = []
all_landmarks = []
asndb = pyasn.pyasn(str(IP_TO_ASN_FILE))
bgp_prefixes = get_all_bgp_prefixes()

for _, d in data.items():
    same_asn = 0
    diff_asn = 0
    same_bgp = 0
    diff_bgp = 0
    same_24 = 0
    diff_24 = 0
    all_landmarks.append(0)
    if "tier2:cdn_count" in d and "tier2:landmark_count" in d and "tier2:failed_header_test_count" in d:
        all_landmarks[-1] += d['tier2:landmark_count'] + \
            d['tier2:cdn_count'] + d['tier2:failed_header_test_count']
        valid_landmarks_count += d['tier2:landmark_count']
        unvalid_landmarks_count += d['tier2:cdn_count'] + \
            d['tier2:failed_header_test_count']
    if "tier3:cdn_count" in d and "tier3:landmark_count" in d and "tier3:failed_header_test_count" in d:
        all_landmarks[-1] += d['tier3:landmark_count'] + \
            d['tier3:cdn_count'] + d['tier3:failed_header_test_count']
        valid_landmarks_count += d['tier3:landmark_count']
        unvalid_landmarks_count += d['tier3:cdn_count'] + \
            d['tier3:failed_header_test_count']
    for f in ['tier2:traceroutes', 'tier3:traceroutes']:
        if f in d:
            for t in d[f]:

                ipt = t[1]
                ipl = t[2]
                asnt = asndb.lookup(ipt)[0]
                asnl = asndb.lookup(ipl)[0]
                if asnl != None and asnt != None:
                    if asnt == asnl:
                        same_asn += 1
                    else:
                        diff_asn += 1

                nt = ip_network(ipt+"/24", strict=False).network_address
                nl = ip_network(ipl+"/24", strict=False).network_address
                if nt == nl:
                    same_24 += 1
                else:
                    diff_24 += 1

                if is_same_bgp_prefix(ipt, ipl, bgp_prefixes):
                    same_bgp += 1
                else:
                    diff_bgp += 1

    distances = []
    for f in ['tier2:landmarks', 'tier3:landmarks']:
        target_geo = (d['RIPE:lat'], d['RIPE:lon'])
        if f in d:
            for l in d[f]:
                landmark_geo = (l[2], l[3])
                distances.append(haversine(target_geo, landmark_geo))
    distances_to_landmarks.append(distances)

    if same_asn != 0 or diff_asn != 0:
        same_asn_lst.append(same_asn/(same_asn+diff_asn))

    if same_24 != 0 or diff_24 != 0:
        same_24_lst.append(same_24/(same_24+diff_24))
        if same_24 != 0:
            print(
                f"Found {d['target_ip']} with a landmark in the same /24")
    if same_bgp != 0 or diff_bgp != 0:
        same_bgp_lst.append(same_bgp/(diff_bgp+same_bgp))

Found 78.128.211.119 with a landmark in the same /24
Found 77.109.180.62 with a landmark in the same /24
Found 103.143.136.43 with a landmark in the same /24


In [4]:
landmarks_all = []
landmarks_less_1 = []
landmarks_less_5 = []
landmarks_less_10 = []
landmarks_less_40 = []

for landmark_distances in distances_to_landmarks:
    landmarks_all.append(len(landmark_distances))
    landmarks_less_1.append(len([i for i in landmark_distances if i <= 1]))
    landmarks_less_5.append(len([i for i in landmark_distances if i <= 5]))
    landmarks_less_10.append(
        len([i for i in landmark_distances if i <= 10]))
    landmarks_less_40.append(
        len([i for i in landmark_distances if i <= 40]))

lm_a_0 = len([i for i in all_landmarks if i > 0])
lmv_a_0 = len([i for i in landmarks_all if i > 0])
lm1_0 = len([i for i in landmarks_less_1 if i > 0])
lm5_0 = len([i for i in landmarks_less_5 if i > 0])
lm10_0 = len([i for i in landmarks_less_10 if i > 0])
lm40_0 = len([i for i in landmarks_less_40 if i > 0])


len_all = len(data)
print(f"{lm_a_0} target have potentail landmarks or {lm_a_0/len_all}")
print(f"{lmv_a_0} target have valid landmarks or {lmv_a_0/len_all}")
print(f"{lm1_0} target with a landmark within 1 km or {lm1_0/len_all}")
print(f"{lm5_0} target with a landmark within 5 km or {lm5_0/len_all}")
print(f"{lm10_0} target with a landmark within 10 km or {lm10_0/len_all}")
print(f"{lm40_0} target with a landmark within 40 km or {lm40_0/len_all}")

713 target have potentail landmarks or 0.9861687413554634
677 target have valid landmarks or 0.9363762102351314
207 target with a landmark within 1 km or 0.2863070539419087
419 target with a landmark within 5 km or 0.5795297372060858
464 target with a landmark within 10 km or 0.6417704011065007
552 target with a landmark within 40 km or 0.7634854771784232


In [5]:
clickhouse_driver = Clickhouse()
query = clickhouse_driver.get_min_rtt_per_src_dst_prefix_query(TARGET_TO_LANDMARKS_PING_TABLE, filter="", threshold=1000000)
db_table = clickhouse_driver.execute(query)

rtts = []
remove_dict = {}
for l in db_table:
    rtts.append(l[2])
    remove_dict[(l[0], l[1])] = l[2]

error1 = []
error2 = []
error3 = []
error4 = []
error1ms = []
error2ms = []
error5ms = []
error10ms = []

for _, d in data.items():
    errors = every_tier_result_and_errors(d)
    error1.append(errors['error1'])
    error2.append(errors['error2'])
    error3.append(errors['error3'])
    error4.append(errors['error4'])
    err1ms = 50000
    err2ms = 50000
    err5ms = 50000
    err10ms = 50000
    for f in ['tier2:landmarks', 'tier3:landmarks']:
        if f in d:
            for l_ip, _, l_lat, l_lon in d[f]:
                dist = haversine((l_lat, l_lon), (d['RIPE:lat'], d['RIPE:lon']))
                key_rtt = (l_ip, d['target_ip'])
                if dist < err1ms and (key_rtt not in remove_dict or remove_dict[key_rtt] <= 1):
                    err1ms = dist
                if dist < err2ms and (key_rtt not in remove_dict or remove_dict[key_rtt] <= 2):
                    err2ms = dist
                if dist < err5ms and (key_rtt not in remove_dict or remove_dict[key_rtt] <= 5):
                    err5ms = dist
                if dist < err10ms and (key_rtt not in remove_dict or remove_dict[key_rtt] <= 10):
                    err10ms = dist
    if err1ms != 50000:
        error1ms.append(err1ms)
    else:
        error1ms.append(error1[-1])
    if err2ms != 50000:
        error2ms.append(err2ms)
    else:
        error2ms.append(error1[-1])
    if err5ms != 50000:
        error5ms.append(err5ms)
    else:
        error5ms.append(error1[-1])
    if err10ms != 50000:
        error10ms.append(err10ms)
    else:
        error10ms.append(error1[-1])

for i in [1, 5, 10, 40, 9999999999]:
    c = len([j for j in error1ms if j <= i])
    print(f"{c} targets with landmarks (ping <= {i}) or {c/len(error1ms)}")

2023-09-14 13:19:51::INFO:root:analysis:: Tier1 Failed


207 targets with landmarks (ping <= 1) or 0.2863070539419087
419 targets with landmarks (ping <= 5) or 0.5795297372060858
464 targets with landmarks (ping <= 10) or 0.6417704011065007
552 targets with landmarks (ping <= 40) or 0.7634854771784232
723 targets with landmarks (ping <= 9999999999) or 1.0
