In [2]:
import pandas as pd
import numpy as np
import requests
import socket
from ipwhois import IPWhois
import json
import websocket
from websocket import create_connection
import matplotlib.pyplot as plt
import folium
import csv

# Recover gateways

In [3]:
url = 'https://data.ripple.com/v2/gateways'
result = requests.get(url).json()

## Recover domain and currencies using gateway account

In [4]:
def get_domain(account):
    domain = ''
    
    url = 'https://data.ripple.com/v2/gateways/' + account
    result = requests.get(url).json()
    if 'domain' in result:
        domain =  result['domain']
    else:
        domain = np.nan
        
    if account == 'razqQKzJRdB4UxFPWf5NEpEG3WMkmwgcXA':
        domain = 'wg.iripplechina.com'
    
    if account == 'rcoef87SYMJ58NAFx7fNM5frVknmvHsvJ':
        domain = 'bpgrefining.com'
    
    if account == 'rsP3mgGb2tcYUrxiLFiHJiQXhsziegtwBc':
        domain = 'coinex.co.nz'
    
    curr_set = set()
    if 'accounts' in result:
        for acc in result['accounts']:
            for c in(acc['currencies']):
                curr_set.add(c)
    return domain, curr_set

## Use previously recovered domain to get IP addresses

In [5]:
def get_ip(domain):
    try:
        return socket.gethostbyname_ex(domain)[2]
    except:
        return np.nan

### Bring everything together

In [6]:
gateway_dict = {}
for currency in result:
    for elem in result[currency]:
        name = elem['name']
        account = elem['account']
        domain, curr_set = get_domain(account)
        ip_list = get_ip(domain)
        gateway_dict[account] = {'name' : name, 'domain' : domain, 'currencies': curr_set, 'ip': ip_list}

In [7]:
gateways_df = pd.DataFrame.from_dict(gateway_dict,orient='index')

In [8]:
# RippleSingapore and DotPayco and JustCoin and PayRoutes are closed
# https://twitter.com/RippleSingapore/status/787552615005556736
# https://forum.ripple.com/viewtopic.php?f=3&t=15668
# https://www.ccn.com/norwegian-bitcoin-exchange-justcoin-exits-gracefully-after-being-dropped-by-bank
# https://www.xrpchat.com/topic/3607-a-list-of-bankrupt-gateways-lets-make/
# I highly suspect Ripula and Ripple exchange tokyo to be closed. They have no activity since 2014 (https://developers.ripple.com/data-api-v2-tool.html#get-account-transaction-history)

gateways_df

Unnamed: 0,name,domain,currencies,ip
r3ADD8kXSUKHd6zTCKfnKT3zV9EZHjzp1S,RippleUnion,rippleunion.com,{CAD},[23.20.239.12]
r94s8px6kSw1uZ1MV98dhSRTvc6VMPoPcN,TokyoJPY,tokyojpy.com,{JPY},"[104.28.13.46, 104.28.12.46]"
r9Dr5xwkeLegBeXq6ujinjSBLQzQ1zQGjH,Ripple Singapore,ripplesingapore.com,"{XAU, XAG, SGD, USD}",
r9ZFPSb1TFdnJwbTMYHvVwFK1bQPUCVNfJ,Ripple Exchange Tokyo,ripple-exchange.tokyo,{JPY},
rB3gZey7VWHYRqJHLoHDEJXJ2pEPNieKiS,Mr. Exchange,mr-ripple.com,"{STR, REP, LTC, BCC, ETC, DOG, JPY, ETH, ADA, ...","[52.202.40.211, 18.205.177.181]"
rBycsjqxD8RVZP5zrrndiVtJwht7Z457A8,Ripula,ripula.co.uk,{GBP},
rDAN8tzydyNfnNf2bfUQY6iR96UbpvNsze,Gatehub Fifth,gatehub.net,"{REP, BTC, ETC, ETH}","[104.31.64.177, 104.31.65.177]"
rG6FZ31hDHN1K5Dkbma3PSB5uVCuVVRzfn,Bitso,bitso.com,"{MXN, BTC}","[104.20.11.111, 104.20.12.111]"
rJHygWcTLVpSXkowott6kzgZU6viQSVYM1,Justcoin,justcoin.com,"{STR, NOK, LTC, EUR, BTC}",
rJRi8WW24gt9X85PHAxfWNPCizMMhqUQwg,Digital Gate Japan,ripple-market.jp,{JPY},[202.172.28.118]


## Convert IP addresses to ASN

In [9]:
def get_as(x):
    if type(x) is float:
        return np.nan, np.nan
    as_list = set()
    gateways_countries = set()
    for ip in x:
        try:
            obj = IPWhois(ip)
            result = obj.lookup_whois()
            as_list.add(result['asn'])
            gateways_countries.add(result['asn_country_code'])
        except:
            print('ERROR with ' + ip)
    return as_list, next(iter(gateways_countries))

In [10]:
gateways_df['asn'], gateways_df['countries'] = zip(*gateways_df['ip'].apply(lambda x: get_as(x)))

## Use ASN to get latitude/longitude

In [11]:
countries = pd.read_csv('country.csv', delimiter=',')
country_dict = countries.set_index('ISO 3166 Country Code')[['Latitude','Longitude']].dropna().to_dict()

In [12]:
gateways_df['latitude'] = gateways_df['countries'].apply(lambda x: country_dict['Latitude'].get(x),np.nan)
gateways_df['longitude'] = gateways_df['countries'].apply(lambda x: country_dict['Longitude'].get(x,np.nan))

In [13]:
gateways_df = gateways_df.dropna()
gateways_accounts = gateways_df.index

In [14]:
gateways_df.to_csv('gateways.csv', sep=',', encoding='utf-8')

## Is there any difference betweeen RouteViews and IP WhoIs?

In [15]:
ip_as = pd.read_csv('../Caida/RouteViews/routeviews-rv2-20190407-1200.pfx2as',delimiter='\t',header=None, names=['IP', 'length', 'ASN'])
ip_as = ip_as[['IP','ASN']]

In [16]:
ip_as_dict = ip_as.set_index('IP').to_dict()['ASN']

In [17]:
def replace_pos(ip, index):
    ip_list = ip.split('.')
    ip_list[index] = '0'
    return '.'.join(ip_list)

In [18]:
temp_gate = gateways_df[['ip','asn']].copy()
temp_gate['asn'] = temp_gate['asn'].apply(lambda x: next(iter(x)).replace(' ','_'))
gate_ip_as_dict = {}
for index, row in temp_gate.iterrows():
    for ip in row['ip']:
        gate_ip_as_dict[ip] = row['asn']

In [19]:
for ip in gate_ip_as_dict:
    if ip not in ip_as_dict:
        smaller = ip
        for i in range(3,-1,-1):
            smaller = replace_pos(smaller,i)
            if(smaller in ip_as_dict):
                if(ip_as_dict[smaller] != gate_ip_as_dict[ip]):
                    print('IP address: {} - IPWhois : {} - Routeviews {}'.format(
                        ip,gate_ip_as_dict[ip],ip_as_dict[smaller]))
                break
            if (i == 0):
                print('IP address not in the dataset: {}'.format(ip))
                print('IP address: {} - IPWhois : {} - Routeviews {}'.format(
                        ip,gate_ip_as_dict[ip],ip_as_dict['18.204.0.0']))

IP address not in the dataset: 18.205.177.181
IP address: 18.205.177.181 - IPWhois : 14618 - Routeviews 14618
IP address: 202.172.28.118 - IPWhois : 37907 - Routeviews 136518
IP address: 213.227.145.72 - IPWhois : 60781 - Routeviews 6739
IP address: 183.181.98.81 - IPWhois : 9371 - Routeviews 10013
IP address: 183.110.21.105 - IPWhois : 4766 - Routeviews 4134


We see that some IP address give different ASN if we use IPWhois or BGP routing. Is there any big difference?
1. 37907: DIGIROCK DigiRock, Inc., JP - 136518: WA-GOVERNMENT-AS-AP WA Government project, AU
2. 60781: LEASEWEB-NL-AMS-01 Netherlands, NL - 6739: ONO-AS Cableuropa - ONO, ES
3. 9371:  SAKURA-C SAKURA Internet Inc., JP - 10013: FBDC FreeBit Co.,Ltd., JP
4. 4766:  KIXS-AS-KR Korea Telecom, KR - 4134: CHINANET-BACKBONE No.31,Jin-rong Street, CN

As we can see, the difference is important. Even the country is different! Which one to choose?

# Compute paths

## Generate payload for API request

In [20]:
def gen_command(sender,receiver,receiver_currency):
    test_json = {
          "id": 2,
          "command": "ripple_path_find",
          "source_account": sender,
          "destination_account": receiver,
          "destination_amount": {
            "currency": receiver_currency,
            "value": "0.01",
            "issuer": receiver
          }
        }
    return test_json

## Convert API response in list of paths

In [21]:
def extract_paths(result):
    paths = []
    for p in result:
        path = []
        for acc in (p['paths_computed']):
            for c in acc:
                if 'account' in c:
                    path.append(c['account'])
                if 'issuer' in c:
                    path.append(c['issuer'])
        paths.append(path)
    return paths

## Bring everything together

In [22]:
def get_paths(sender,receiver,receiver_currency):
    to_send = gen_command(sender,receiver,receiver_currency)
    websocket.enableTrace(False)
    ws = create_connection('wss://s2.ripple.com:443')

    ws.send(json.dumps(to_send))
    result = ws.recv()
    return extract_paths(json.loads(result)['result']['alternatives'])

## Create edges

Create dictionary from gateway account to ASN

In [23]:
gate_to_as = gateways_df['asn'].dropna().apply(lambda x: next(iter(x))).to_dict()

Remove accounts in paths that are not gateways

In [24]:
def keep_gate(raw_links, gateways_accounts):
    gate_links = []
    for l in raw_links:
        only_gate = [elem for elem in l if elem in gateways_accounts]
        gate_links.append(only_gate)
    return gate_links

Extract edges between ASes

In [25]:
def links_to_as(gate_links, gate_to_as):
    links = set()
    for l in gate_links:
        for i in range(len(l)-1):
            source = gate_to_as[l[i]]
            dest = gate_to_as[l[i+1]]

            sources = source.split(' ')
            dests = dest.split(' ')
            for s in sources:
                for d in dests:
                    if(s != d):
                        original = (s,d)
                        reverse = (d,s)
                        if (original not in links) and (reverse not in links):
                            links.add(original)
    return links

Save computed links into a file

In [26]:
def save_path(links):
    with open('gateway_links.csv', 'a') as f:
        writer = csv.writer(f,lineterminator='\n')
        for tup in links:
            writer.writerow(tup)

In [36]:
def compute_paths(index_sender, sender, gateways_df):
    file = open("log_gateways.txt","a") 
    path_found = 0
    raw_links = []
    for index_receiver, receiver in gateways_df.iterrows():
        if not sender.equals(receiver):
            for c in receiver['currencies']:
                file.write('Sender : {} - Receiver : {} - Currency : {}\n'.format(index_sender,index_receiver,c))
                paths = get_paths(index_sender,index_receiver,c)
                count_in = set()
                count_out = set()
                file.write('# Paths found : {}\n'.format(len(paths)))
                if (len(paths) > 0):
                    path_found += len(paths)
                    for path in paths:
                        temp_path = [index_sender] + path + [index_receiver]
                        raw_links.append(temp_path)
                        if (len(temp_path) > 0):
                            for acc in temp_path:
                                if acc in gateways_accounts:
                                    count_in.add(acc)
                                else:
                                    count_out.add(acc)
                    
                    in_len = len(count_in)
                    out_len = len(count_out)
                    file.write('% of gateways : {:.0%} vs {:.0%} \n'.format(in_len/(in_len+out_len), out_len/(in_len+out_len)))              
    file.close()
    print('Sender : {} - # Paths : {}'.format(index_sender,path_found))
    return raw_links

In [31]:
 for index_sender, sender in gateways_df.iterrows():
    raw_links = compute_paths(index_sender,sender,gateways_df)
    gate_links = keep_gate(raw_links, gateways_accounts)
    links = links_to_as(gate_links, gate_to_as)
    save_path(links)

Sender : rLEsXccBGNR3UPuPu2hUXPjziKC3qKSBun - # Paths : 341
Sender : rMAz5ZnK73nyNUL4foAvaxdreczCkG3vA6 - # Paths : 158
Sender : rMwjYedjc7qqtKYVLiAccJSmCwih4LnE2q - # Paths : 326
Sender : rP5ShE8dGBH6hHtNvRESdMceen36XFBQmh - # Paths : 126
Sender : rPDXxSZcuVL3ZWoyU82bcde3zwvmShkRyF - # Paths : 296
Sender : rPxU6acYni7FcXzPCMeaPSwKcuS2GTtNVN - # Paths : 119
Sender : rUkMKjQitpgAM5WTGk79xpjT38DEJY283d - # Paths : 132
Sender : razqQKzJRdB4UxFPWf5NEpEG3WMkmwgcXA - # Paths : 282
Sender : rcA8X3TVMST1n3CJeAdGk1RdRCHii7N2h - # Paths : 111
Sender : rchGBxcD1A1C2tdxF6papQYZ8kjRKMYcL - # Paths : 95
Sender : rckzVpTnKpP4TJ1puQe827bV3X4oYtdTP - # Paths : 98
Sender : rcoef87SYMJ58NAFx7fNM5frVknmvHsvJ - # Paths : 100
Sender : rfNZPxoZ5Uaamdp339U9dCLWz2T73nZJZH - # Paths : 53
Sender : rfYv1TXnwgDDK4WQNbFALykYuEBnrR4pDX - # Paths : 216
Sender : rhub8VRN55s94qWKDv6jmDy1pUykJzF3wq - # Paths : 155
Sender : rnuF96W4SZoCJmbHYBFoJZpR8eCaxNvekK - # Paths : 238
Sender : rpDMez6pm6dBve2TJsmDpv7Yae6V5Pyvy2 - #

# Create Graph

In [32]:
links = set()
with open('gateway_links.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    line_count = 0
    for row in csv_reader:
        links.add(tuple(row))

## Create nodes

In [33]:
def sanitize(x):
    elem = next(iter(x))
    if ' ' in elem:
        return '38895'
    return elem

In [34]:
gateways_df['lat-lon'] = list(zip(gateways_df.latitude, gateways_df.longitude))
ases = gateways_df[['asn','lat-lon']].copy()
ases['asn'] = ases['asn'].apply(lambda x: sanitize(x))
ases = ases.set_index('asn').to_dict()['lat-lon']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


## Use nodes ande edges to draw map

In [35]:
def generate_map(ases,edges):
    edges_position = set()
    for elem in edges:
        lat = ases[elem[0]]
        lon = ases[elem[1]]
        edges_position.add((lat,lon))
        
    # Make an empty map
    m = folium.Map(location=[20, 0], tiles="Mapbox Bright", zoom_start=2)

    # I can add marker one by one on the map
    for elem in ases.keys():
        folium.Marker(ases[elem], popup=elem).add_to(m)

    for elem in edges_position:
        folium.PolyLine(locations = elem, weight=1).add_to(m) 
    return m

In [36]:
generate_map(ases,links)

In [37]:
ases

{'14618': (38.0, -97.0),
 '13335': (38.0, -97.0),
 '37907': (36.0, 138.0),
 '38895': (38.0, -97.0),
 '60781': (52.5, 5.75),
 '9371': (36.0, 138.0),
 '16509': (38.0, -97.0),
 '46606': (38.0, -97.0),
 '26496': (38.0, -97.0),
 '4766': (37.0, 127.5),
 '45102': (38.0, -97.0),
 '57127': (46.0, 15.0),
 '17511': (36.0, 138.0),
 '53667': (38.0, -97.0),
 '19551': (38.0, -97.0)}

In [38]:
ases['13335'] = (40,-97)
ases['38895'] = (41,-99)
ases['16509'] = (42,-101)
ases['46606'] = (43,-103)
ases['26496'] = (44,-105)
ases['45102'] = (45,-107)
ases['53667'] = (46,-109)
ases['19551'] = (47,-111)

ases['17511'] = (39,140)
ases['9371'] = (41,142)

In [39]:
generate_map(ases,links)

In [40]:
gateways_df

Unnamed: 0,name,domain,currencies,ip,asn,countries,latitude,longitude,lat-lon
r3ADD8kXSUKHd6zTCKfnKT3zV9EZHjzp1S,RippleUnion,rippleunion.com,{CAD},[23.20.239.12],{14618},US,38.0,-97.0,"(38.0, -97.0)"
r94s8px6kSw1uZ1MV98dhSRTvc6VMPoPcN,TokyoJPY,tokyojpy.com,{JPY},"[104.28.13.46, 104.28.12.46]",{13335},US,38.0,-97.0,"(38.0, -97.0)"
rB3gZey7VWHYRqJHLoHDEJXJ2pEPNieKiS,Mr. Exchange,mr-ripple.com,"{LTC, ETC, DOG, ADA, REP, STR, ETH, BTC, BCC, ...","[52.202.40.211, 18.205.177.181]",{14618},US,38.0,-97.0,"(38.0, -97.0)"
rDAN8tzydyNfnNf2bfUQY6iR96UbpvNsze,Gatehub Fifth,gatehub.net,"{ETC, ETH, BTC, REP}","[104.31.65.177, 104.31.64.177]",{13335},US,38.0,-97.0,"(38.0, -97.0)"
rG6FZ31hDHN1K5Dkbma3PSB5uVCuVVRzfn,Bitso,bitso.com,"{MXN, BTC}","[104.20.12.111, 104.20.11.111]",{13335},US,38.0,-97.0,"(38.0, -97.0)"
rJRi8WW24gt9X85PHAxfWNPCizMMhqUQwg,Digital Gate Japan,ripple-market.jp,{JPY},[202.172.28.118],{37907},JP,36.0,138.0,"(36.0, 138.0)"
rKiCet8SdvWxPXnAgYarFUXMh1zCPz432Y,Ripple Fox,ripplefox.com,"{FMM, CNY, XLM, STR}",[13.112.127.16],{16509 38895},US,38.0,-97.0,"(38.0, -97.0)"
rKxKhXZCeSDsbkyB8DVgxpjy5AHubFkMFe,Rippex,rippex.net,"{BTC, BRL}","[104.24.96.2, 104.24.97.2]",{13335},US,38.0,-97.0,"(38.0, -97.0)"
rLEsXccBGNR3UPuPu2hUXPjziKC3qKSBun,The Rock Trading,therocktrading.com,"{USD, BTC, LTC, EUR}",[213.227.145.72],{60781},NL,52.5,5.75,"(52.5, 5.75)"
rMAz5ZnK73nyNUL4foAvaxdreczCkG3vA6,Ripple Trade Japan,rippletrade.jp,{JPY},[183.181.98.81],{9371},JP,36.0,138.0,"(36.0, 138.0)"
