In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
from ipwhois import IPWhois

### Get topology from Ripple

In [2]:
url = 'https://data.ripple.com/v2/network/topology?verbose=True'
result = requests.get(url).json()

### Extracting nodes and links dataframes

In [3]:
nodes_raw = result['nodes']
links_raw = result['links']

nodes = pd.DataFrame.from_dict(nodes_raw)
links = pd.DataFrame.from_dict(links_raw)

In [4]:
countries = pd.read_csv('country.csv', delimiter=',')

In [5]:
country_dict = countries.set_index('ISO 3166 Country Code')[['Latitude','Longitude']].dropna().to_dict()

In [6]:
nodes['country_lat'] = nodes['country_code'].apply(lambda x: country_dict['Latitude'].get(x,np.nan))
nodes['country_lon'] = nodes['country_code'].apply(lambda x: country_dict['Longitude'].get(x,np.nan))

# Observations

## Geographic 

### What are the most important cities?

In [7]:
top_cities = nodes[['city','lat','long']].copy()
city_count = top_cities.groupby('city').size()

top_cities = top_cities.dropna().drop_duplicates()
top_cities = top_cities.reset_index().set_index('city')
top_cities['city_count'] = city_count
top_cities.sort_values(by='city_count', ascending=False).head(10)

Unnamed: 0_level_0,index,lat,long,city_count
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Ann Arbor,80,42.2734,-83.7133,66
Cambridge,138,42.3646,-71.1028,52
Cambridge,608,42.38,-71.1329,52
Ottawa,73,45.3433,-75.8265,46
Tokyo,69,35.6845,139.7559,37
Houston,24,29.6997,-95.5858,36
Ashburn,22,39.018,-77.539,34
Boardman,1,45.8696,-119.688,29
Fairfield,135,41.1412,-73.2637,22
Singapore,124,1.2931,103.8558,21


### What are the most important countries?

In [8]:
nodes.groupby('country').size().sort_values(ascending=False).reset_index().head(10)

Unnamed: 0,country,0
0,United States,489
1,Germany,82
2,Canada,72
3,"Korea, Republic of",61
4,Japan,56
5,China,39
6,Ukraine,30
7,Singapore,21
8,United Kingdom,19
9,Netherlands,19


In [9]:
links['source_city'] = links['source'].map(nodes.set_index('node_public_key')['city'])
links['target_city'] = links['target'].map(nodes.set_index('node_public_key')['city'])
links['source_country'] = links['source'].map(nodes.set_index('node_public_key')['country'])
links['target_country'] = links['target'].map(nodes.set_index('node_public_key')['country'])

In [10]:
links_city = links.copy().dropna()

### Which cities are the most connected?

In [11]:
links_city.groupby(['source_city', 'target_city']).size().sort_values(ascending=False).reset_index().head(10)

Unnamed: 0,source_city,target_city,0
0,Boardman,Houston,233
1,Boardman,Boardman,221
2,Boardman,Ann Arbor,190
3,Boardman,Ashburn,183
4,Houston,Ann Arbor,127
5,Boardman,Ottawa,122
6,Boardman,Cambridge,122
7,Boardman,Tokyo,120
8,Houston,Ashburn,118
9,Houston,Houston,116


### Which cities send money?

In [12]:
links_city.groupby('source_city').size().sort_values(ascending=False).reset_index().head(10)

Unnamed: 0,source_city,0
0,Boardman,2130
1,Houston,1208
2,Ashburn,1075
3,Dallas,569
4,San Jose,271
5,Incheon,258
6,Ann Arbor,186
7,Chantilly,184
8,New York,78
9,Cambridge,73


### Which cities receive money?

In [13]:
links_city.groupby('target_city').size().sort_values(ascending=False).reset_index().head(10)

Unnamed: 0,target_city,0
0,Boardman,717
1,Houston,668
2,Ann Arbor,636
3,Ashburn,620
4,Cambridge,402
5,Dallas,358
6,Ottawa,323
7,Tokyo,274
8,San Jose,220
9,Fairfield,194


### Which countries are the most connected?

In [14]:
links_city.groupby(['source_country', 'target_country']).size().sort_values(ascending=False).reset_index().head(10)

Unnamed: 0,source_country,target_country,0
0,United States,United States,4550
1,United States,Canada,453
2,United States,Japan,252
3,United States,Germany,186
4,"Korea, Republic of",United States,166
5,United States,Singapore,151
6,United States,"Korea, Republic of",122
7,Canada,United States,107
8,United States,Ireland,99
9,United States,Netherlands,92


### Which countries send money?

In [15]:
links_city.groupby('source_country').size().sort_values(ascending=False).reset_index().head(10)

Unnamed: 0,source_country,0
0,United States,6201
1,"Korea, Republic of",258
2,Canada,132
3,Germany,62
4,Singapore,28
5,Netherlands,26
6,Ireland,21
7,Japan,17
8,Greece,17
9,United Kingdom,16


### Which countries receive money?

In [16]:
links_city.groupby('target_country').size().sort_values(ascending=False).reset_index().head(10)

Unnamed: 0,target_country,0
0,United States,4987
1,Canada,517
2,Japan,274
3,Germany,195
4,Singapore,166
5,"Korea, Republic of",131
6,Ireland,112
7,Netherlands,95
8,Brazil,68
9,Australia,41


## Get ASN from IP 

In [17]:
dict_asn_city = {} 
def get_as(x):
    if type(x) is str and 'ffff' in x:
        x = x[7:]
    try:
        obj = IPWhois(x)
        result = obj.lookup_whois()
        for elem in result['nets']:
            city = elem['city']
            if city != None:
                dict_asn_city[result['asn']] = city
        return  result['asn']
    except:
        print('ERROR with ' + str(x))
        return np.nan

In [18]:
nodes['asn'] = nodes['ip'].apply(lambda x: get_as(x))
dict_asn_city

ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with 202.32.183.77
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan
ERROR with nan


{'16509': 'Seoul',
 '36351': 'Dallas',
 '14618': 'Seattle',
 '20473': 'Elk Grove Village',
 '16509 38895': 'Tokyo',
 '45102': 'San Mateo',
 '14061': 'New York',
 '63949': 'Galloway',
 '16276': 'Montreal',
 '15169': 'Mountain View',
 '7922': 'Mt Laurel',
 '8075': 'Redmond',
 '7018': 'Redmond',
 '2711': 'Waynesville',
 '2901': 'Albuquerque',
 '19318': 'Secaucus',
 '36850': 'Chapel Hill',
 '6327': 'Calgary',
 '32244': 'Lansing',
 '33387': 'North Kansas City',
 '54540': 'Dallas',
 '59210': 'Tempe',
 '852': 'Edmonton',
 '4181': 'Madison',
 '156': 'BOSTON',
 '32915': 'Corvallis',
 '6256': 'Charlotte',
 '19437': 'Tempe',
 '22611': 'Los Angeles',
 '25': 'Berkeley',
 '8560': 'Chesterbrook',
 '16591': 'Mountain View',
 '135377': 'Los Angeles',
 '33363': 'Greenwood Village',
 '19108': 'Tyler',
 '6128': 'Hicksville',
 '17': 'West Lafayette',
 '30083': 'Saint Louis'}

In [19]:
nodes_as = nodes[['node_public_key','ip','asn', 'country_lat', 'country_lon']].copy()

In [20]:
nodes_as = nodes_as.set_index('node_public_key')

In [21]:
node_to_asn_dict = nodes_as.to_dict()['asn']

In [22]:
nodes_as[['asn','country_lat','country_lon']].to_csv('ripple-asn-nodes.csv', sep=',', header=None)

In [23]:
links_as = links[['source', 'target']].copy()
links_as['source'] = links_as['source'].apply(lambda x: node_to_asn_dict[x])
links_as['target'] = links_as['target'].apply(lambda x: node_to_asn_dict[x])

In [24]:
links_as.to_csv('ripple-asn-links.csv', sep=',', header=None)

In [27]:
dict_asn_city

{'16509': 'Seoul',
 '36351': 'Dallas',
 '14618': 'Seattle',
 '20473': 'Elk Grove Village',
 '16509 38895': 'Tokyo',
 '45102': 'San Mateo',
 '14061': 'New York',
 '63949': 'Galloway',
 '16276': 'Montreal',
 '15169': 'Mountain View',
 '7922': 'Mt Laurel',
 '8075': 'Redmond',
 '7018': 'Redmond',
 '2711': 'Waynesville',
 '2901': 'Albuquerque',
 '19318': 'Secaucus',
 '36850': 'Chapel Hill',
 '6327': 'Calgary',
 '32244': 'Lansing',
 '33387': 'North Kansas City',
 '54540': 'Dallas',
 '59210': 'Tempe',
 '852': 'Edmonton',
 '4181': 'Madison',
 '156': 'BOSTON',
 '32915': 'Corvallis',
 '6256': 'Charlotte',
 '19437': 'Tempe',
 '22611': 'Los Angeles',
 '25': 'Berkeley',
 '8560': 'Chesterbrook',
 '16591': 'Mountain View',
 '135377': 'Los Angeles',
 '33363': 'Greenwood Village',
 '19108': 'Tyler',
 '6128': 'Hicksville',
 '17': 'West Lafayette',
 '30083': 'Saint Louis'}