In [1]:
import requests
import json
res = requests.get("https://atlas.ripe.net/api/v2/measurements/traceroute?status=Stopped&sort=-id&format=json")
j = json.loads(res.text)
print(len(j['results']))

50


In [2]:
dst_ips = {r['target_ip']:0 for r in j['results']}
last_page = j
while len(dst_ips) < 1000:
    page = json.loads(requests.get(last_page['next']).text)
    for r in page['results']:
        if r['target_ip'] not in dst_ips:
            dst_ips[r['target_ip']] = 0
    last_page = page
    
print(len(dst_ips))

1005


In [18]:
def fetch_results(dst_ip):
    res = requests.get(f"https://atlas.ripe.net/api/v2/measurements/traceroute?status=Stopped&sort=-id&target={dst_ip}&format=json")
    j = json.loads(res.text)
    return j

def find_pairs(res):
    if res['count'] < 2:
        return []
    
    res = res['results']
    try:
        pairs = []
        last_time = None
        if res[0]['stop_time'] is not None:
            last_time = res[0]['stop_time']
        elif res[0]['status']['when'] is not None:
            last_time = res[0]['status']['when']
        else:
            assert(False)
            
        for idx in range(len(res) - 1):
            i = idx+1
            start = res[i]['start_time']
            diff = start - last_time
            if diff < 30:
                pairs.append((res[i-1]['id'], res[i]['id']))
                
            if res[0]['stop_time'] is not None:
                last_time = res[0]['stop_time']
            elif res[0]['status']['when'] is not None:
                last_time = res[0]['status']['when']
            else:
                assert(False)
        return pairs
    except Exception as e:
        print(res)
        raise e

In [19]:
x = find_pairs(fetch_results('27.71.193.1'))
print(x)
x = find_pairs(fetch_results(list(dst_ips)[0]))
print(x)

[]
[(24757826, 24743499), (24743499, 24636493), (24636493, 24589823), (24589823, 24324633), (24324633, 24259169), (24259169, 24257462), (24257462, 24187178)]


In [20]:
# a list of pairs of measurements to the same dest that happened close in time
import itertools
import concurrent.futures

def pairs_for_dip(dip):
    return find_pairs(fetch_results(dip))

m_pairs = []
with concurrent.futures.ThreadPoolExecutor() as rt:
    ps = [rt.submit(pairs_for_dip, dip) for dip in dst_ips]
    m_pairs = list(itertools.chain.from_iterable(p.result() for p in ps))
len(m_pairs)

10081

In [33]:
m_id = m_pairs[0][0]
res0 = json.loads(requests.get(f"https://atlas.ripe.net/api/v2/measurements/{m_id}/results/?format=json").text)
print(sorted([r['from'] for r in res0]))
m_id = m_pairs[0][1]
res1 = json.loads(requests.get(f"https://atlas.ripe.net/api/v2/measurements/{m_id}/results/?format=json").text)
print(sorted([r['from'] for r in res1]))

['102.163.98.10', '140.238.68.88', '149.7.57.150', '175.144.27.133', '189.238.89.244', '202.187.63.153', '213.218.135.244', '41.86.240.141', '96.30.152.34', '99.239.104.226']
['102.163.100.12', '110.159.126.66', '140.238.69.182', '174.114.195.220', '189.172.229.177', '202.186.237.158', '213.218.135.244', '38.111.188.11', '41.86.240.141', '96.30.152.34']


In [50]:
import subprocess

def get_asn(ip):
    out = subprocess.run(f"curl https://api.iptoasn.com/v1/as/ip/{ip}", stdout=subprocess.PIPE, shell=True)
    j = json.loads(out.stdout)
    return j['as_number']
    
get_asn('140.238.69.182')

31898

In [34]:
from ripe.atlas.sagan import Result

def find_matching_src_idxs(xs, ys):
    xs_srcs = [r['from'] for r in xs]
    ys_srcs = [r['from'] for r in ys]
    common = set(xs_srcs) & set(ys_srcs)
    idxs = []
    for src in common:
        x_idx = 0
        y_idx = 0
        for i in range(len(xs_srcs)):
            if xs_srcs[i] == src:
                x_idx = i
                break
        for i in range(len(ys_srcs)):
            if ys_srcs[i] == src:
                y_idx = i
                break
        idxs.append((x_idx, y_idx))
    return idxs

def trim_path(ip_path):
    if len(ip_path) < 3:
        return []
    ases = [get_asn(ip) for ip in ip_path]
    middle_ips = []
    for asn, ips in itertools.groupby(zip(ases, ip_path), key=lambda x:x[0]):
        middle_ips.append((asn, ips))
    src_asn = middle_ips[0][0]
    dst_asn = middle_ips[-1][0]
    middle_ips = [x[1] for x in middle_ips]
    return src_asn, dst_asn, list(itertools.chain(*middle_ips[1:-1]))

def check_pair(p):
    res0 = json.loads(requests.get(f"https://atlas.ripe.net/api/v2/measurements/{p[0]}/results/?format=json").text, object_hook=lambda x:[x])
    res1 = json.loads(requests.get(f"https://atlas.ripe.net/api/v2/measurements/{p[1]}/results/?format=json").text, object_hook=lambda x:[x])
    common = find_matching_src_idxs(res0, res1)
    ps = []
    for r0idx, r1idx in common:
        src = res0[r0idx]['from']
        r0 = Result.get(res0[r0idx])
        r1 = Result.get(res1[r1idx])
        r0_ippath = [x[0] for x in r0.ip_path]
        r1_ippath = [x[0] for x in r1.ip_path]
        
        # trim src and dst AS parts
        src_asn, dst_asn, r0_ippath = trim_path(r0_ippath)
        _, _, r1_ippath = trim_path(r1_ippath)
        ps.append({'src_ip': src, 'src_asn': src_asn, 'dst_ip': r0.destination_address, 'dst_asn': dst_asn, 'path': r0_ippath})
    
    # use intesection over union
    #res0_ippath = set(x[0] for x in res0.ip_path)
    #res1_ippath = set(x[0] for x in res1.ip_path)
    #union = len(res0_ippath | res1_ippath)
    #inter = len(res0_ippath & res1_ippath)
    #return { 'dst_ip': res0.destination_address, 'iou': float(inter) / union, 'time_sep': res0.end_time - res1.end_time }

In [None]:
import logging
logging.disable(logging.CRITICAL); 
ps = []
for p in m_pairs:
    c = check_pair(p)
    if c is not None:
        ps.append(c)

In [56]:
def print_pair(p):
    res0 = json.loads(requests.get(f"https://atlas.ripe.net/api/v2/measurements/{p[0]}/results/?format=json").text)
    res1 = json.loads(requests.get(f"https://atlas.ripe.net/api/v2/measurements/{p[1]}/results/?format=json").text)
    res0 = Result.get(res0[0])
    res1 = Result.get(res1[0])
    print(res0.source_address, res0.destination_address, res0.ip_path)

print_pair(m_pairs[0])

10.0.0.4 188.171.192.1 [['140.91.200.23'], ['80.239.196.243'], [None], ['195.219.83.101'], [None], ['80.231.139.41'], ['80.231.139.66'], ['80.231.157.57'], ['212.89.3.226'], [None], [None], [None], [None], [None], [None]]


In [None]:
import rpy2
import rpy2.robjects.lib.ggplot2 as ggplot2
import rpy2.robjects as ro
from rpy2.robjects.packages import importr
base = importr('base')

# the base of rpy2 plotting is matplotlib, thus we need to declare
# it inline in order to see the plots in the notebook
%matplotlib inline

# we need to activate the automatic conversion for pandas
from rpy2.robjects import pandas2ri
pandas2ri.activate()

# load the needed extension for the %%R cell magic
%load_ext rpy2.ipython

import pandas as pd
import numpy as np

import matplotlib.pylab as plt
plt.rcParams['figure.dpi'] = 200

In [None]:
df = pd.DataFrame.from_records(ps)
df = df[["dst_ip", "iou"]]
df

In [None]:
%%R -i df -w 900 -h 600

library(ggplot2)
ggplot(df, aes(x=iou)) + stat_ecdf()
ggsave("traceroutes-iou.pdf")