# Analysing the data from https://stat.ripe.net/data/asn-neighbours-history/data.json?resource=AS200020 
- ASn that are neighbours of AS200020
- 89 days of observation (from 06-Jan till 06-Apr)

## Getting the json from RIPE 

In [1]:
import pandas as pd 
import urllib.request
import json

url_ripe = "https://stat.ripe.net/data/asn-neighbours-history/data.json?resource=AS200020"
with urllib.request.urlopen(url_ripe) as url:
    df = pd.DataFrame.from_dict(json.loads(url.read().decode()), orient='index')

df.tail()

Unnamed: 0,0
data_call_status,supported - connecting to ursa
process_time,36
build_version,2018.4.5.317
query_id,20180406135433-3127ae07-9372-4335-9841-fa83c83...
data,"{'resource': '200020', 'query_starttime': '201..."


## Filtering only the field 'data' (in the raw json)

In [2]:
df_data = pd.DataFrame.from_dict(df[0]['data'])
df_data.head()

Unnamed: 0,earliest_time,latest_time,neighbours,query_endtime,query_starttime,resource
0,2000-08-01T00:00:00,2018-04-06T00:00:00,"{'neighbour': 174, 'timelines': [{'endtime': '...",2018-04-06T00:00:00,2018-01-06T00:00:00,200020
1,2000-08-01T00:00:00,2018-04-06T00:00:00,"{'neighbour': 1103, 'timelines': [{'endtime': ...",2018-04-06T00:00:00,2018-01-06T00:00:00,200020
2,2000-08-01T00:00:00,2018-04-06T00:00:00,"{'neighbour': 1126, 'timelines': [{'endtime': ...",2018-04-06T00:00:00,2018-01-06T00:00:00,200020
3,2000-08-01T00:00:00,2018-04-06T00:00:00,"{'neighbour': 1764, 'timelines': [{'endtime': ...",2018-04-06T00:00:00,2018-01-06T00:00:00,200020
4,2000-08-01T00:00:00,2018-04-06T00:00:00,"{'neighbour': 2018, 'timelines': [{'endtime': ...",2018-04-06T00:00:00,2018-01-06T00:00:00,200020


## Spliting the column 'neighbours'

In [3]:
df_data['as_neighbour']=df_data['neighbours'].apply(lambda x: x['neighbour'])
df_data['endtime']=pd.to_datetime(pd.Series(df_data['neighbours'].apply(lambda x: x['timelines'][0]['endtime'])))
df_data['starttime']=pd.to_datetime(pd.Series(df_data['neighbours'].apply(lambda x: x['timelines'][0]['starttime'])))
df_data['delta_time'] = df_data['endtime']-df_data['starttime']

## Sorting by the time that a AS was observed as neighbour ('delta_time') 

In [4]:
df_data = df_data.sort_values(by='delta_time',ascending=True).reset_index(drop=True)
df_data.head()

Unnamed: 0,earliest_time,latest_time,neighbours,query_endtime,query_starttime,resource,as_neighbour,endtime,starttime,delta_time
0,2000-08-01T00:00:00,2018-04-06T00:00:00,"{'neighbour': 34968, 'timelines': [{'endtime':...",2018-04-06T00:00:00,2018-01-06T00:00:00,200020,34968,2018-01-11 16:00:00,2018-01-11 16:00:00,0 days 00:00:00
1,2000-08-01T00:00:00,2018-04-06T00:00:00,"{'neighbour': 31615, 'timelines': [{'endtime':...",2018-04-06T00:00:00,2018-01-06T00:00:00,200020,31615,2018-03-14 00:00:00,2018-03-14 00:00:00,0 days 00:00:00
2,2000-08-01T00:00:00,2018-04-06T00:00:00,"{'neighbour': 48886, 'timelines': [{'endtime':...",2018-04-06T00:00:00,2018-01-06T00:00:00,200020,48886,2018-03-27 16:00:00,2018-03-27 16:00:00,0 days 00:00:00
3,2000-08-01T00:00:00,2018-04-06T00:00:00,"{'neighbour': 49453, 'timelines': [{'endtime':...",2018-04-06T00:00:00,2018-01-06T00:00:00,200020,49453,2018-03-27 16:00:00,2018-03-27 16:00:00,0 days 00:00:00
4,2000-08-01T00:00:00,2018-04-06T00:00:00,"{'neighbour': 42158, 'timelines': [{'endtime':...",2018-04-06T00:00:00,2018-01-06T00:00:00,200020,42158,2018-02-01 00:00:00,2018-01-30 16:00:00,1 days 08:00:00


## Enriching the ASn (with team cymru)

In [5]:
from subprocess import Popen, PIPE
import os.path
import random
import time 

def teamcymru_asn_lookup (asn):
    command = 'whois -h whois.cymru.com " -v AS' + str(asn) + '"'
    p = Popen(command, universal_newlines=True, shell=True, stdout=PIPE, stderr=PIPE)
    output,error = p.communicate()
    p.wait()
    return output.split('\n')[1].split(' | ')

df_data['asn_info'] = df_data['as_neighbour'].apply(lambda x: teamcymru_asn_lookup(x))

## Splitting collumn 'asn_info'

In [6]:
df_data['asn_country']=df_data['asn_info'].apply(lambda x: x[1])
df_data['asn_name']=df_data['asn_info'].apply(lambda x: x[4])

## Filtering only ASN located in the Netherlands

In [7]:
df_only_nl_asn = df_data[df_data['asn_country']=='NL'][['as_neighbour','delta_time','asn_country','asn_name']].reset_index(drop=True)
df_only_nl_asn

Unnamed: 0,as_neighbour,delta_time,asn_country,asn_name
0,34968,0 days 00:00:00,NL,"IUNXI, NL"
1,31615,0 days 00:00:00,NL,"TMO-NL-AS, NL"
2,48886,0 days 00:00:00,NL,CSNET-AS +------------------------------------...
3,49453,0 days 00:00:00,NL,"GLOBALLAYER, NL"
4,42158,1 days 08:00:00,NL,"SENTIA-AS, NL"
5,48522,1 days 08:00:00,NL,"CGI-NEDERLAND-BV, NL"
6,48635,15 days 16:00:00,NL,"ASTRALUS, NL"
7,20495,17 days 00:00:00,NL,"WEDARE wd6.NET B.V, NL"
8,39591,21 days 08:00:00,NL,"GLOBAL-E, NL"
9,25151,23 days 08:00:00,NL,"CYSO-AS, NL"


## Keeping only NBIP neighbors that are less than 89 days
which I admit that are connected to NBIP just to receive protection!

In [8]:
potential_as_protected = df_only_nl_asn[df_only_nl_asn['delta_time']<pd.to_timedelta('89 days')].reset_index(drop=True)
display(potential_as_protected)

Unnamed: 0,as_neighbour,delta_time,asn_country,asn_name
0,34968,0 days 00:00:00,NL,"IUNXI, NL"
1,31615,0 days 00:00:00,NL,"TMO-NL-AS, NL"
2,48886,0 days 00:00:00,NL,CSNET-AS +------------------------------------...
3,49453,0 days 00:00:00,NL,"GLOBALLAYER, NL"
4,42158,1 days 08:00:00,NL,"SENTIA-AS, NL"
5,48522,1 days 08:00:00,NL,"CGI-NEDERLAND-BV, NL"
6,48635,15 days 16:00:00,NL,"ASTRALUS, NL"
7,20495,17 days 00:00:00,NL,"WEDARE wd6.NET B.V, NL"
8,39591,21 days 08:00:00,NL,"GLOBAL-E, NL"
9,25151,23 days 08:00:00,NL,"CYSO-AS, NL"
