In [1]:
import rpy2
import rpy2.robjects.lib.ggplot2 as ggplot2
import rpy2.robjects as ro
from rpy2.robjects.packages import importr
base = importr('base')

# the base of rpy2 plotting is matplotlib, thus we need to declare
# it inline in order to see the plots in the notebook
%matplotlib inline

# we need to activate the automatic conversion for pandas
from rpy2.robjects import pandas2ri
pandas2ri.activate()

# load the needed extension for the %%R cell magic
%load_ext rpy2.ipython

import pandas as pd
import numpy as np

import matplotlib.pylab as plt
plt.rcParams['figure.dpi'] = 200

In [2]:
import subprocess

def get_asn(ip):
    out = subprocess.run(f"curl 0.0.0.0:53661/v1/as/ip/{ip}", stdout=subprocess.PIPE, shell=True)
    try:
        j = json.loads(out.stdout)
        return j['as_number']
    except Exception as e:
        return None

get_asn('195.24.196.122')

In [3]:
import requests
import json

def measurements_in_timerange(start, end):
    res = requests.get(f"https://atlas.ripe.net/api/v2/measurements/traceroute?status=Stopped&sort=-id&start_time__gte={start}&start_time__lte={end}&format=json")
    return json.loads(res.text)

# 2020-04-10-2100 -> 2020-04-10-2200
timerange = (1586548800, 1586552400)
timeranges = [(timerange[0] + i * 3600, timerange[1] + i * 3600) for i in range(12)]
timerange_measurements = [measurements_in_timerange(*t) for t in timeranges]
print([m['count'] for m in timerange_measurements])

[276, 88, 66, 164, 117, 94, 41, 173, 54, 42, 68, 249]


In [4]:
from ripe.atlas.sagan import Result
import itertools

def trim_path(src_asn, dst_asn, ip_path):
    ip_path = [i for i in ip_path if i is not None]
    if len(ip_path) < 3:
        #print('too short', ip_path)
        return None
    ases = [get_asn(ip) for ip in ip_path]
    if None in ases:
        return None
    
    x = list(zip(ases, ip_path))
    #print('strip src as', src_asn, x)
    while True:
        if len(x) == 0:
            return []
        if len(x[0]) != 2:
            print(x)
            assert(False)
        if x[0][0] is None or x[0][0] == src_asn:
            x = x[1:]
        else:
            break

    #print('strip dst as', dst_asn, x)

    x = x[::-1]
    while True:
        if len(x) == 0:
            return []
        if len(x[0]) != 2:
            print(x)
            assert(False)
        if x[0][0] == dst_asn:
            x = x[1:]
        else:
            break
    middle_ips = x[::-1]
    middle_ips = [x[1] for x in middle_ips]
    return middle_ips

def read_path(m):
    src_ip = m['from']
    r = Result.get(m)
    dst_ip = r.destination_address
    src_asn = get_asn(src_ip)
    dst_asn = get_asn(dst_ip)
    try:
        ippath = [x[0] for x in r.ip_path]
    except:
        ippath = []
    res = trim_path(src_asn, dst_asn, ippath)
    if res is None:
        return None
    return {
        'src_ip': src_ip, 
        'dst_ip': dst_ip, 
        'src_asn': src_asn, 
        'dst_asn': dst_asn, 
        'path': res,
    }

def all_paths(m_id):
    # get all the paths from a given measurement
    res = requests.get(f"https://atlas.ripe.net/api/v2/measurements/{m_id}/results/?format=json").text
    res = json.loads(res)
    if type(res) == dict:
        res = [res]
    ps = []
    for p in res:
        path = read_path(p)
        if path is None:
            continue
        if len(path['path']) > 0:
            ps.append(path)
    return ps

def exact_path_equals(ps):
    ps = ['-'.join(p) for p in ps]
    ps.sort()
    for k, g in itertools.groupby(ps):
        g = len(list(g))
        yield k, g

def group_paths(ps):
    ps.sort(key=lambda p: (p['src_asn'], p['dst_asn']))
    for aspair, xs in itertools.groupby(ps, key=lambda p: (p['src_asn'], p['dst_asn'])):
        xs = list(xs)
        aspair = '-'.join(str(a) for a in aspair)
        if len(xs) > 1:
            for p, lp in exact_path_equals(x['path'] for x in xs):
                yield {'aspair': aspair, 'path': p, 'pathcount': lp}
        #else:
        #    yield {'aspair': aspair, 'path': '-'.join(xs[0]['path']), 'pathcount': 1}

import logging
logging.disable(logging.CRITICAL); 

timerange_paths = []
for m in timerange_measurements:
    ps = list(itertools.chain.from_iterable(all_paths(m['id']) for m in m['results']))
    path_groups = list(group_paths(ps))
    timerange_paths.append(path_groups)
print(timerange_paths)

[[{'aspair': '1140-8452', 'path': '217.170.9.122-213.254.223.17-213.200.118.206-46.33.85.190', 'pathcount': 2}, {'aspair': '1916-267138', 'path': '187.16.218.120-143.202.52.246', 'pathcount': 1}, {'aspair': '1916-267138', 'path': '200.219.139.246-186.219.25.2-45.6.128.91-143.202.52.246', 'pathcount': 1}, {'aspair': '8426-8452', 'path': '80.231.154.125-80.231.153.49-130.117.15.69-154.54.37.237-154.54.38.65-130.117.49.154-154.54.38.170-149.14.126.34', 'pathcount': 1}, {'aspair': '8426-8452', 'path': '80.231.154.125-80.231.153.49-130.117.15.69-154.54.61.33-154.54.61.17-130.117.49.154-154.54.38.170-149.14.126.34', 'pathcount': 1}, {'aspair': '15133-52631', 'path': '4.68.71.197-67.16.179.46-189.125.249.118', 'pathcount': 1}, {'aspair': '15133-52631', 'path': '64.215.101.205-67.16.147.161-67.16.166.58-67.16.178.169-204.246.245.158-67.16.179.46-189.125.249.118', 'pathcount': 1}, {'aspair': '15169-28226', 'path': '195.22.219.73-195.22.219.189', 'pathcount': 2}, {'aspair': '22548-28573', 'path'

In [None]:
def reorder_aspairs(path_groups):
    as_order = []
    for _, paths in itertools.groupby(path_groups, key=lambda x: x['aspair']):
        paths = list(paths)
        tot_len = sum(x['pathcount'] for x in paths)
        as_order.append((tot_len, paths))
    as_order.sort(key=lambda x:-x[0])
    for tot, paths in as_order:
        for i, p in zip(range(len(paths)), paths):
            p['tot_pathcount'] = tot
            p['path_idx'] = i
            yield p
            
path_groups = list(reorder_aspairs(path_groups))

In [None]:
df = pd.DataFrame.from_records(path_groups)
df

In [None]:
%%R -i df -w 900 -h 600

library(ggplot2)
lvls <- unique(df$aspair[order(df$tot_pathcount)])
df$ordered_aspair <- factor(df$aspair, levels = lvls)
ggplot(df, aes(x = ordered_aspair, y = pathcount, fill = path_idx)) + geom_col(position="stack")