# Gateways
## This notebook is used to get information about Ripple gateways 

### 0. Preliminaries
This part is about reading and cleaning the transactions data, importing the right libraries.

In [1]:
import socket
import urllib.request
import json
import pickle
import collections
import math
import operator
import ccy
import numpy as np

from tqdm import tqdm_notebook as tqdm

from matplotlib import pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format='retina'

from random import shuffle

We use the Ripple Data API

In [2]:
from ripple_api import RippleDataAPIClient,RippleRPCClient
api = RippleDataAPIClient('https://data.ripple.com')

Read the data, run only once 

In [3]:
data = []
with open('../data/transactions/more-txs-from-missing-parsed.json') as f:
    for line in tqdm(list(f)):
        data.append(json.loads(line))
with open('../data/transactions/final_txs_2017.json') as f:
    for line in tqdm(list(f)):
        data.append(json.loads(line))

HBox(children=(IntProgress(value=0, max=1599868), HTML(value='')))




HBox(children=(IntProgress(value=0, max=2021479), HTML(value='')))




Try to recover some senderCurrency field that are missing. Getting currency of link with sender

In [4]:
for index in tqdm(range(len(data))):
    txn = data[index]
    if txn['success'] == 'tesSUCCESS':
        if 'senderCurrency' not in txn.keys() and 'links' in txn.keys() :
            sender = txn['sender']
            for link in txn['links']:
                if link['node1'] == sender:
                    txn['senderCurrency'] = link['currency']            

HBox(children=(IntProgress(value=0, max=3621347), HTML(value='')))




Uniform the data because we have fields 'actualReceiverCurrency' and 'receiverCurrency' so put them all under 'receiverCurrency'.

In [5]:
for index in tqdm(range(len(data))):
    txn = data[index]
    if txn['success'] == 'tesSUCCESS':
        if 'actualReceiverCurrency' in txn.keys() :
            txn['receiverCurrency'] = txn['actualReceiverCurrency']
            del txn['actualReceiverCurrency']

HBox(children=(IntProgress(value=0, max=3621347), HTML(value='')))




Uniform the data because we have fields 'actualReceiverAmount' and 'receiverCurrency' so put them all under 'receiverAmount'.

In [6]:
for index in tqdm(range(len(data))):
    txn = data[index]
    if txn['success'] == 'tesSUCCESS':
        if 'actualReceiverAmount' in txn.keys():
            txn['receiverAmount'] = txn['actualReceiverAmount']
            del txn['actualReceiverAmount']

HBox(children=(IntProgress(value=0, max=3621347), HTML(value='')))




Recover the senderCurrency, if not done before from the links then it is usually XRP.

In [7]:
for index in tqdm(range(len(data))):
    txn = data[index]
    if txn['success'] == 'tesSUCCESS':
        if 'senderCurrency' not in txn.keys():
            txn['senderCurrency'] = txn['receiverCurrency']    

HBox(children=(IntProgress(value=0, max=3621347), HTML(value='')))




In [8]:
print(json.dumps(data[1],indent = 4))
print(json.dumps(data[7],indent = 4))

{
    "fee": 1e-05,
    "hash": "3F7A1B3A771D477B03104E86CE73790BB8C8E516BEE680BB4AD9893444D21CF3",
    "sender": "rHb9CJAWyB4rj91VRWn96DkukG4bwdtyTh",
    "success": "tesSUCCESS",
    "XRPAcc": [
        {
            "isIncr": false,
            "account": "rHb9CJAWyB4rj91VRWn96DkukG4bwdtyTh",
            "amount": 1.100000000064938e-05
        },
        {
            "isIncr": true,
            "account": "rbvFfuUysurzPHq5kgs53A16j5svbFxgv",
            "amount": 1.0000001111620804e-06
        }
    ],
    "flags": "",
    "receiver": "rbvFfuUysurzPHq5kgs53A16j5svbFxgv",
    "date": "2013-06-29T13:10:20+00:00",
    "actualIssuerReceiver": "--",
    "receiverCurrency": "XRP",
    "receiverAmount": "0.000001",
    "senderCurrency": "XRP"
}
{
    "fee": 1.2e-05,
    "issuerReceiver": "--",
    "sender": "rQ9ArM8TdHmeQHvd969nQPP1TZosBhzdXg",
    "success": "tesSUCCESS",
    "paths": [
        [
            {
                "account": "ruazs5h1qEsqpke88pcqnaseXdm6od2xc"
            },


### 1. Gateways
We now use the api to get the known gateways. The we compoute the location of the gateways, and write to memory.

In [9]:
all_gateways = api.get_all_gateways()

In [10]:
print("There are",len(list(dict_gateways.values())),"known gateways.")

NameError: name 'dict_gateways' is not defined

Map gateway wallet addresses to the gateway name

In [None]:
gateways_address_to_name = dict() 
for currency,gateway_list in tqdm(all_gateways.items()):
    for gateway in gateway_list:
        gateway_name = gateway['name']
        gateway_account = gateway['account']
        gateways_address_to_name[gateway_account] = gateway_name

In [None]:
get_gateways = []
for address in tqdm(gateways_address_to_name.keys()):
    answer = api.get_gateway(address) #get information about each gateway
    if 'status' in answer.keys(): #error
        print(gateways_address_to_name[address])
    else:
        get_gateways.append(answer)

We also add the hot wallets to the dictionnary

In [None]:
for gateway in tqdm(get_gateways):
    gateway_name = gateway['name']
    for hotwallet in gateway['hotwallets']:
        gateways_address_to_name[hotwallet] = gateway_name

In [None]:
gateways_address_to_name

In [None]:
with open('../data/gateways/gateways_address_to_name.pickle', 'wb') as f:
    pickle.dump(gateways_address_to_name, f, protocol=pickle.HIGHEST_PROTOCOL) #save to disk

In [None]:
for gateway in tqdm(get_gateways):
    if 'domain' not in gateway.keys():
        print(get_gateways.index(gateway),gateway['name'])

In [None]:
get_gateways[27]['domain'] = 'bpgrefining.com' #add domain manually

In [None]:
# This method return the IP adrresses associated to a domain
def get_ip_list(domain):
    try:
        return socket.gethostbyname_ex(domain)[2] 
    except:
        return []

In [None]:
# This method return the country associated to a IP address
    with urllib.request.urlopen("https://geoip-db.com/jsonp/"+ip) as url:
        answer = url.read().decode()[9:-1] #remove first 9 char and the last of answer to be read to convert to dict
        answer = json.loads(answer)
        return (answer['country_name'])

In [None]:
gateway_to_country = {}
for gateway in tqdm(get_gateways):
        ip_list = get_ip_list(gateway['domain'])
        gateway_to_country[gateway['name']] = set()
        for ip in ip_list:
            gateway_to_country[gateway['name']].add(get_country_name(ip))    

For each gateway we have associated a set of countries. We now replace the set by strings.

In [None]:
#replace the set with a string 
#either '?' if empty 
#or the actual country in the set
for gateway,coutries in gateway_to_country.items():
    if coutries == set():
        gateway_to_country[gateway] = '?'
    if len(coutries) == 1:
        gateway_to_country[gateway] = coutries.pop()

List of gateways with their fiscal localisation according to https://wipple.devnull.network/research/gateways.html

In [None]:
gateways_names_to_fiscal_countries = { 
    'Bitstamp' : 'Britain (UK)',
    'Coinex' : 'Hong Kong',
    'Bitso' : 'Mexico',
    'Rippex' : 'Brazil?',
    'SnapSwap' : 'Luxembourg',
    'Gatehub Fifth' : 'Britain (UK)',
    'Gatehub' : 'Britain (UK)',
    'Mr. Exchange' : 'Japan',
    'The Rock Trading' : 'Italy',
    'Dividend Rippler' : '?',
    'Justcoin' : 'Norway',
    'Pax Moneta' : 'Korea (South)',
    'Payroutes' : 'Israel?',
    'Ripple LatAm' : '?',
    'WisePass' : 'Britain (UK)',
    'rippleCN' : '?',
    'Ripula' : 'Britain (UK)',
    'Ripple Exchange Tokyo' : '?',
    'TokyoJPY' : 'Japan',
    'Digital Gate Japan' : '?',
    'Ripple Trade Japan' : '?',
    'Ripple Singapore' : 'Singapore',
    'Lake BTC' : 'China',
    'Ripple Fox' : 'China',
    'RippleChina' : '?',
    'DotPayco' : '?',
    'RippleUnion' : 'Canada',
    'BPG' : 'Slovenia',
    'GBI' : 'United States',
    'EXRP' : 'Korea (South)',
}

We know the fiscal and the servers location for each gateway. We just merged them in a same dictionnary

In [None]:
for gateway_1,server_country in gateway_to_country.items():
    for gateway_2,fiscal_country in gateways_names_to_fiscal_countries.items():
        if gateway_1 == gateway_2:
            gateway_to_country[gateway_1] = fiscal_country + ', ' + server_country

In the `gateway_to_country` dictionnary, for a gateway the first coresponding country is the fiscal location and the second country is the servers location.

In [None]:
gateway_to_country

In [None]:
with open('../data/gateways/gateway_to_country.pickle', 'wb') as f:
    pickle.dump(gateway_to_country, f, protocol=pickle.HIGHEST_PROTOCOL) #save to disk

### 2. Currency and gateways
We now want to look for each transaction, if there is a gateway that is on the transaction path as a first or last hop. If that is the case we also want to know what is the currency of this first or last link. <br>
We then plot the currencies used for each gateway and conversly the gateways used for each currencies.

In [None]:
gateway_to_nb_txns_per_currency = collections.defaultdict(dict)

# add to the dictionnary one to the count of currency_link as hoptype for the gateway_name.
def add_hop(dictionnary,gateway_name,currency_link,hoptype):
    dictionnary[gateway_name] = dictionnary.get(gateway_name,{})
    dictionnary[gateway_name][hoptype] = dictionnary[gateway_name].get(hoptype,{})
    dictionnary[gateway_name][hoptype][currency_link] = dictionnary[gateway_name][hoptype].get(currency_link,0) + 1
    
# Iterate through all transactions  
for txn in tqdm(data):
    if(txn['success'] == 'tesSUCCESS'):
        if 'links' in txn.keys():
            sender = txn['sender']
            receiver = txn['receiver']
            for link in txn['links']:
                currency_link = link['currency']
                #first hop
                if sender == link['node1'] and sender == link['holder'] and link['isIncr'] == False:
                    if link['node2'] in gateways_address_to_name.keys():
                        gateway_name = gateways_address_to_name[link['node2']]
                        add_hop(gateway_to_nb_txns_per_currency,gateway_name,currency_link,'FirstHop')
                elif sender == link['node1'] and link['holder'] == link['node2'] and link['isIncr'] == True: 
                    if link['node2'] in gateways_address_to_name.keys():
                        gateway_name = gateways_address_to_name[link['node2']]
                        add_hop(gateway_to_nb_txns_per_currency,gateway_name,currency_link,'FirstHop')
                elif sender == link['node2'] and sender == link['holder'] and link['isIncr'] == False:
                    if link['node1'] in gateways_address_to_name.keys():        
                        gateway_name = gateways_address_to_name[link['node1']]
                        add_hop(gateway_to_nb_txns_per_currency,gateway_name,currency_link,'FirstHop')
                elif sender == link['node2'] and link['holder'] == link['node1'] and link['isIncr'] == True:
                    if link['node1'] in gateways_address_to_name.keys():        
                        gateway_name = gateways_address_to_name[link['node1']]
                        add_hop(gateway_to_nb_txns_per_currency,gateway_name,currency_link,'FirstHop')
                        
                #lasthop
                elif receiver == link['node1'] and receiver == link['holder'] and link['isIncr'] == True:
                    if link['node2'] in gateways_address_to_name.keys():
                        gateway_name = gateways_address_to_name[link['node2']]
                        add_hop(gateway_to_nb_txns_per_currency,gateway_name,currency_link,'LastHop')
                elif receiver == link['node1'] and link['holder'] == link['node2'] and link['isIncr'] == False: 
                    if link['node2'] in gateways_address_to_name.keys():
                        gateway_name = gateways_address_to_name[link['node2']]
                        add_hop(gateway_to_nb_txns_per_currency,gateway_name,currency_link,'LastHop')
                elif receiver == link['node2'] and receiver == link['holder'] and link['isIncr'] == True:
                    if link['node1'] in gateways_address_to_name.keys():
                        gateway_name = gateways_address_to_name[link['node1']]
                        add_hop(gateway_to_nb_txns_per_currency,gateway_name,currency_link,'LastHop')
                elif receiver == link['node2'] and link['holder'] == link['node1'] and link['isIncr'] == False:
                    if link['node1'] in gateways_address_to_name.keys():
                        gateway_name = gateways_address_to_name[link['node1']]
                        add_hop(gateway_to_nb_txns_per_currency,gateway_name,currency_link,'LastHop')
                        
for gateway,hops in gateway_to_nb_txns_per_currency.items():
    gateway_to_nb_txns_per_currency[gateway] = collections.OrderedDict(sorted(hops.items())) #make same FirstHop appears as first on the next graphs

In [None]:
currency_to_nb_txns_per_gateways = collections.defaultdict(dict)

# add to the dictionnary one to the count of gateway_name as hoptype for the currency_link.
def add_hop_2(dictionnary,gateway_name,currency_link,hoptype):
    dictionnary[currency_link] = dictionnary.get(currency_link,{})
    dictionnary[currency_link][hoptype] = dictionnary[currency_link].get(hoptype,{})
    dictionnary[currency_link][hoptype][gateway_name] = dictionnary[currency_link][hoptype].get(gateway_name,0) + 1

# Iterate through all transactions        
for txn in tqdm(data):
    if(txn['success'] == 'tesSUCCESS'):
        if 'links' in txn.keys():
            sender = txn['sender']
            receiver = txn['receiver']
            for link in txn['links']:
                currency_link = link['currency']
               #first hop
                if sender == link['node1'] and sender == link['holder'] and link['isIncr'] == False:
                    if link['node2'] in gateways_address_to_name.keys():
                        gateway_name = gateways_address_to_name[link['node2']]
                        add_hop_2(currency_to_nb_txns_per_gateways,gateway_name,currency_link,'FirstHop')
                elif sender == link['node1'] and link['holder'] == link['node2'] and link['isIncr'] == True: 
                    if link['node2'] in gateways_address_to_name.keys():
                        gateway_name = gateways_address_to_name[link['node2']]
                        add_hop_2(currency_to_nb_txns_per_gateways,gateway_name,currency_link,'FirstHop')
                elif sender == link['node2'] and sender == link['holder'] and link['isIncr'] == False:
                    if link['node1'] in gateways_address_to_name.keys():        
                        gateway_name = gateways_address_to_name[link['node1']]
                        add_hop_2(currency_to_nb_txns_per_gateways,gateway_name,currency_link,'FirstHop')
                elif sender == link['node2'] and link['holder'] == link['node1'] and link['isIncr'] == True:
                    if link['node1'] in gateways_address_to_name.keys():        
                        gateway_name = gateways_address_to_name[link['node1']]
                        add_hop_2(currency_to_nb_txns_per_gateways,gateway_name,currency_link,'FirstHop')
                
                #lasthop
                elif receiver == link['node1'] and receiver == link['holder'] and link['isIncr'] == True:
                    if link['node2'] in gateways_address_to_name.keys():
                        gateway_name = gateways_address_to_name[link['node2']]
                        add_hop_2(currency_to_nb_txns_per_gateways,gateway_name,currency_link,'LastHop')
                elif receiver == link['node1'] and link['holder'] == link['node2'] and link['isIncr'] == False: 
                    if link['node2'] in gateways_address_to_name.keys():
                        gateway_name = gateways_address_to_name[link['node2']]
                        add_hop_2(currency_to_nb_txns_per_gateways,gateway_name,currency_link,'LastHop')
                elif receiver == link['node2'] and receiver == link['holder'] and link['isIncr'] == True:
                    if link['node1'] in gateways_address_to_name.keys():
                        gateway_name = gateways_address_to_name[link['node1']]
                        add_hop_2(currency_to_nb_txns_per_gateways,gateway_name,currency_link,'LastHop')
                elif receiver == link['node2'] and link['holder'] == link['node1'] and link['isIncr'] == False:
                    if link['node1'] in gateways_address_to_name.keys():
                        gateway_name = gateways_address_to_name[link['node1']]
                        add_hop_2(currency_to_nb_txns_per_gateways,gateway_name,currency_link,'LastHop')
                   
for currency,hops in currency_to_nb_txns_per_gateways.items():
    currency_to_nb_txns_per_gateways[currency] = collections.OrderedDict(sorted(hops.items())) #make same FirstHop appears as first on the next graphs

In [None]:
plt.figure(figsize=(15,20)) 
number_of_gateways = len(gateway_to_nb_txns_per_currency.keys())
number_of_rows = math.ceil(number_of_gateways / 3)

#index for each subplot
index = 0 

#get one colors for each currency
number_of_currencies = len(currency_to_nb_txns_per_gateways.keys()) + 1 #+1 for the 'others'
colormap = plt.cm.rainbow
colors = [colormap(i) for i in np.linspace(0, 1,number_of_currencies)] #get number_of_currencies colors
shuffle(colors) #randomize the picked colors

#assign a number to each color
currency_to_colors = {}
index_color = 0
for currency in currency_to_nb_txns_per_gateways.keys():
    currency_to_colors[currency] = colors[index_color]
    index_color+=1

#now we plot
for gateway,hops in gateway_to_nb_txns_per_currency.items():
    index+=1
    plt.subplot(number_of_rows, 3, index)
    pos=0
    for hop,counts in hops.items():
        sorted_counts = collections.OrderedDict(sorted(counts.items(),key=operator.itemgetter(1),reverse = False))
        total = sum(sorted_counts.values())
        summ = 0 #where to start from (to stack the bar)
        littles = 0
        plotted_littles = False
    
        for currency,count in sorted_counts.items():
            bars = count/total*100
            
            #if under 5% we add to the littles category
            if bars < 5: 
                   littles+= bars
            else:
                if(not plotted_littles and littles != 0):
                    color = colors[-1] #last color
                    ploted_littles = True
                    plt.bar(pos,littles,width = 10,color = color)
                    plt.text(pos,littles*0.5,'others',horizontalalignment='center',verticalalignment='center')
                color = currency_to_colors[currency]
                plt.bar(pos,bars,bottom = summ,width = 10,color = color)
                
                #add country to currency string if it is not a cryptocurrency
                try:
                    currency+= (' ('+ccy.country(ccy.currency(currency).default_country)+')')
                except:
                    currency+= ('')
                
                plt.text(pos,summ+bars*0.5,currency,horizontalalignment='center',verticalalignment='center')

            summ+= bars
            
        pos+= 12.5
        
    plt.title(gateway + ' (' + gateway_to_country[gateway]+')')
    plt.xticks([0,12.5],list(hops.keys()))
    
plt.tight_layout()
plt.savefig('../visualisations/First_last_hop_currencies_per_gateways.png',dpi = 255)
plt.show()

In [None]:
plt.figure(figsize=(15,30))
number_of_currency = len(currency_to_nb_txns_per_gateways.keys())
number_of_rows = math.ceil(number_of_currency / 3)

#index for each subplot
index = 0

#get one colors for each currency
number_of_gateway = len(gateway_to_nb_txns_per_currency.keys()) + 1 #+1 for the others
colormap = plt.cm.rainbow
colors = [colormap(i) for i in np.linspace(0, 1,number_of_gateway)] #get number_of_currencies colors
shuffle(colors) #randomize the picked colors

#assign a number to each color
gateway_to_colors = {}
index_color = 0
for gateway in gateway_to_nb_txns_per_currency.keys():
    gateway_to_colors[gateway] = colors[index_color]
    index_color+=1

#now we plot
for currency,hops in currency_to_nb_txns_per_gateways.items():
    index+=1
    plt.subplot(number_of_rows, 3, index)
    pos=0
    for hop,counts in hops.items():
        sorted_counts = collections.OrderedDict(sorted(counts.items(),key=operator.itemgetter(1),reverse = False))
        total = sum(sorted_counts.values())
        summ = 0 #where to start from (to stack the bar)
        littles = 0
        plotted_littles = False
    
        for gateway,count in sorted_counts.items():
            bars = count/total*100
            
            #if under 5% we add to the littles category
            if bars < 5:
                   littles+= bars 
            else:
                if(not plotted_littles and littles != 0):
                    color = colors[-1] #last color
                    ploted_littles = True
                    plt.bar(pos,littles,width = 10,color = color)
                    plt.tfext(pos,littles*0.5,'littles',horizontalalignment='center',verticalalignment='center')
                color = gateway_to_colors[gateway]
                plt.bar(pos,bars,bottom = summ,width = 10,color = color)
                plt.text(pos,summ+bars*0.5,gateway + '\n(' + gateway_to_country[gateway]+')',horizontalalignment='center',verticalalignment='center')

            summ+= bars
            
        pos+= 12.5
        
    #add country to currency string if it is not a cryptocurrency
    try:
        currency+= (' ('+ccy.country(ccy.currency(currency).default_country)+')')
    except :
        currency+= ('')
        
    plt.title(currency)
    plt.xticks([0,12.5],list(hops.keys()))
    
plt.tight_layout()
plt.savefig('../visualisations/First_last_hop_gateways_per_currency.png',dpi = 255)
plt.show()