In [1]:
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt

# Process Caida data

Please note that this notebook contains two steps: <br>
1) The exploration of the Caida file <br>
2) A verification that the graph respects the BGP rules <br>
<br>
The complete processing of the file in a Caida graph can be found in analysis.py in the root directory

In [2]:
data = pd.read_csv('20190301.as-rel2.txt',delimiter='\n', comment='#', header=None, encoding='ISO-8859-1')

In [3]:
data['provider_peer'], data['customer_peer'], data['relation_type'], data['source'] = data[0].str.split('|').str

In [4]:
data['relation_type']  = data['relation_type'].apply(lambda x: 'provider-customer' if x == '-1' else 'peer-peer')

In [5]:
links = data[['provider_peer','customer_peer','relation_type']]

In [6]:
ases = set(links['provider_peer'].values)
ases.update(links['customer_peer'].values)

In [7]:
links['link'] = list(zip(links['provider_peer'], links['customer_peer']))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [8]:
links = links[['link','relation_type']]
links_rel = {}
for index,row in links.iterrows():
    (s,t) = row['link']
    rel = row['relation_type']
    if rel == 'provider-customer':
        links_rel[tuple([s,t])] = rel
        links_rel[tuple([t,s])] = 'customer-provider'
    else: 
        if rel == 'peer-peer':
            links_rel[tuple([s,t])] = rel
            links_rel[tuple([t,s])] = 'peer-peer'
        else:
            print('Error')

In [9]:
as_graph = nx.Graph()
as_graph.add_nodes_from(ases)
for k,v in links_rel.items():
    as_graph.add_edge(k[0],k[1])

In [10]:
def shortest_path(s,t,G):
    actual_paths = []
    paths = nx.all_shortest_paths(G,s,t)
    for path in paths:
        actual_path = []
        for i in range(len(path)-1):
            l = tuple([path[i],path[i+1]])
            actual_path.append(links_rel[l])
        actual_paths.append(actual_path)
    return actual_paths

In [11]:
gate_as = ['14618','13335','37907','38895','60781','9371','16509','46606','26496','4766','45102','57127','17511','53667','19551']

In [12]:
def check_bgp_rules(path):
    up = False
    down = True
    for p in path:
        if p == 'customer-provider':
            up = True
            down = False
        if p == 'provider-customer':
            down = True
            up = False
        if down and up:
            return False
    return True

In [13]:
for s in gate_as:
    for t in gate_as:
        paths = shortest_path(s,t,as_graph)
        for path in paths:
            if not check_bgp_rules(path):
                print('Bad path')         