In [1]:
import pandas as pd
import numpy as np
import json
import requests

In [2]:
data = pd.read_csv('txs-parsed-jan-2013-aug-2017/final_txs_2017.txt', delimiter='\n', header=None)

Well, the data is not a valid json. It has the following format {...} {...} {...} {...} instead of [{...},{...},{...},{...}]. Let's format it correctly

In [3]:
json_list = '['
for i in range(len(data)):
    json_text = data[0][i]
    json_list += json_text + ','
json_list = json_list[:-1]+']'

Then, we create a DataFrame from the data

In [4]:
final_json = json.loads(json_list)
df = pd.DataFrame.from_dict(final_json)

In [5]:
df.columns, len(df.columns)

(Index(['XRPAcc', 'actualIssuerReceiver', 'actualReceiverAmount',
        'actualReceiverCurrency', 'date', 'fee', 'flags', 'hash',
        'issuerReceiver', 'issuerSender', 'links', 'offers', 'paths',
        'receiver', 'receiverAmount', 'receiverCurrency', 'sender',
        'senderAmount', 'senderCurrency', 'success'],
       dtype='object'), 20)

## Basics

### Sender

In [6]:
df[['sender','senderAmount', 'senderCurrency']].head(10)

Unnamed: 0,sender,senderAmount,senderCurrency
0,rwNvFwF1jVKhJJo6bnNWqCeqjRVVM4pwmZ,,
1,rBbVbvuxc6HDUzm1eVmjZyY9zWzW23snMa,,
2,rLJj2tDG3hXhKmhijNchGzXHf6baa8HVw2,,
3,rogue5HnPRSszD9CWGSUz8UGHMVwSSKF6,0.10072363,CNY
4,ra7mnD89mz1sKNBDHo9NJcEJy9RMxMwdJX,,
5,ra7mnD89mz1sKNBDHo9NJcEJy9RMxMwdJX,,
6,ra7mnD89mz1sKNBDHo9NJcEJy9RMxMwdJX,,
7,rDYbrHsuaHHWrYEycWGf8d6E3UCy8sDQYB,,
8,rBz47fbhq449Xjqz6WzwXozHe212YJ3QUu,3562.068,CNY
9,rBz47fbhq449Xjqz6WzwXozHe212YJ3QUu,2242.806,CNY


sender : Account number of the sender

senderAmount : Amount of money send. It contains the money + transaction fee + exchange fee

senderCurrency : Currency used by the sender

### Receiver

In [7]:
df[['receiver', 'actualReceiverAmount', 'receiverAmount', 'actualReceiverCurrency', 'receiverCurrency']].head(10)

Unnamed: 0,receiver,actualReceiverAmount,receiverAmount,actualReceiverCurrency,receiverCurrency
0,rJj4Y1Vk2ZeFCGLnxN1R8wNvE6Q1ZYzd2A,480.0,,XRP,
1,rUTonwTH88DLjaq1pNHVjzJwEtLTVVEB35,4.9,,XRP,
2,rh2GPYezFhVWaNJU89fQNechgu7R7DKzrt,20480.0,,XRP,
3,rogue5HnPRSszD9CWGSUz8UGHMVwSSKF6,0.342302,0.342302,XRP,XRP
4,rp2diYfVtpbgEMyaoWnuaWgFCAkqCAEg28,39000.0,,XRP,
5,rp2diYfVtpbgEMyaoWnuaWgFCAkqCAEg28,50000.0,,XRP,
6,rp2diYfVtpbgEMyaoWnuaWgFCAkqCAEg28,100070.0,,XRP,
7,rE8y6WyEEbToUp7oiPjzdAik4YkVezLPxV,166635.0,,XRP,
8,rJUdngAfcZmratmZtDCpUGkqK9D2vGYrQt,3526.8,3526.8,CNY,CNY
9,rJUdngAfcZmratmZtDCpUGkqK9D2vGYrQt,2220.6,2220.6,CNY,CNY


receiver : Account number of the receiver

receiverAmount : Amount of money received

receiverCurrency : Currency used by the receiver

### Issuer

In [8]:
df[['issuerSender', 'actualIssuerReceiver', 'issuerReceiver']].head(10)

Unnamed: 0,issuerSender,actualIssuerReceiver,issuerReceiver
0,,--,
1,,--,
2,,--,
3,rKiCet8SdvWxPXnAgYarFUXMh1zCPz432Y,--,--
4,,--,
5,,--,
6,,--,
7,,--,
8,--,rJUdngAfcZmratmZtDCpUGkqK9D2vGYrQt,--
9,--,rJUdngAfcZmratmZtDCpUGkqK9D2vGYrQt,--


issuerSender : Financial institution of the sender

issuerReceiver : Financial institution of the receiver

### Transaction properties

In [9]:
df[['fee','date','hash', 'offers', 'links','success']].head(10)

Unnamed: 0,fee,date,hash,offers,links,success
0,1.5e-05,2017-08-28T14:30:01+00:00,FEE8EE1CBCF388D647D9451E867F9D6A3A7644C8C9994F...,,,tesSUCCESS
1,1.2e-05,2017-04-18T10:06:00+00:00,F67A2A2B691E45488F93D095EBC9BA8B60FADC201C9596...,,,tesSUCCESS
2,1.5e-05,2017-08-24T08:56:01+00:00,EA9E2424E9BED5C01AF2BD295306905A2C1A2E839F9C67...,,,tesSUCCESS
3,0.01,2017-04-29T10:58:32+00:00,05624C6AD74ECEBAD7C872CDA7716B802C2E84B267DEC1...,"[{'takerPays': '971.048447 XRP', 'takerGets': ...","[{'isIncr': True, 'currency': 'CNY', 'amount':...",tesSUCCESS
4,1.5e-05,2017-06-27T11:17:02+00:00,C7C3007914E8B15890CA5B8F547AA7D5C7F5E28E4FA161...,,,tesSUCCESS
5,1.5e-05,2017-07-01T11:38:20+00:00,595E6544939C0479746B4648E86B05DBAFE87166968B11...,,,tesSUCCESS
6,1.5e-05,2017-07-01T11:44:01+00:00,524AE7E669B0B9F2AE0A1EDDB4407725172F5FB308B477...,,,tesSUCCESS
7,1.5e-05,2017-06-09T14:12:12+00:00,4A739E9FBE09D19A8E6F935325DC98C1CBF2512B37886C...,,,tesSUCCESS
8,1.2e-05,2017-06-14T11:36:32+00:00,89B86B846D4C9B157B8186C316834593648BDE04F328B0...,,"[{'isIncr': False, 'currency': 'CNY', 'amount'...",tesSUCCESS
9,1.2e-05,2017-06-29T14:24:22+00:00,40EDDACAA577B3B08B1DF613F034E527782D3C045B27A1...,,"[{'isIncr': False, 'currency': 'CNY', 'amount'...",tesSUCCESS


In [10]:
df['success'].drop_duplicates()

0                   tesSUCCESS
852602     tecUNFUNDED_PAYMENT
852834             tecPATH_DRY
853112         tecPATH_PARTIAL
853993     tecNO_DST_INSUF_XRP
854125       tecDST_TAG_NEEDED
1136781              tecNO_DST
Name: success, dtype: object

In [11]:
df['XRPAcc'][0]

[{'isIncr': False,
  'account': 'rwNvFwF1jVKhJJo6bnNWqCeqjRVVM4pwmZ',
  'amount': 480.000015},
 {'isIncr': True,
  'account': 'rJj4Y1Vk2ZeFCGLnxN1R8wNvE6Q1ZYzd2A',
  'amount': 480.0}]

fee : Cost of the transaction

date : Date of the transaction

hash : Hash of the transaction

offers : Exchange fee used in the transaction

links : Show exact path of the transaction

success : result of the transaction:
    * tesSUCCESS : successful transactions
    * others : unsuccessful, for more details see https://developers.ripple.com/transaction-results.html
   
xrpacc : Summary of the transaction (like an acknowledgement?)

### Transaction options

In [12]:
df[['flags', 'paths']].head(10)

Unnamed: 0,flags,paths
0,[tfFullyCanonicalSig],
1,[tfFullyCanonicalSig],
2,[tfFullyCanonicalSig],
3,"[tfNoDirectRipple, tfPartialPayment, tfLimitQu...","[[{'currency': 'CNY', 'issuer': 'razqQKzJRdB4U..."
4,[tfFullyCanonicalSig],
5,[tfFullyCanonicalSig],
6,[tfFullyCanonicalSig],
7,[tfFullyCanonicalSig],
8,[tfFullyCanonicalSig],[[{'account': 'rKiCet8SdvWxPXnAgYarFUXMh1zCPz4...
9,[tfFullyCanonicalSig],[[{'account': 'rKiCet8SdvWxPXnAgYarFUXMh1zCPz4...


In [13]:
flags_list = []
for flags in df['flags']:
    for f in flags:
        if f not in flags_list:
            flags_list.append(f)
flags_list

['tfFullyCanonicalSig',
 'tfNoDirectRipple',
 'tfPartialPayment',
 'tfLimitQuality']

flags : Options for the transactions: 
* tfFullyCanonicalSig : Require signature
* tfNoDirectRipple : Use the path found in the paths field
* tfPartialPayment : Enable if the amount of money received can be decreased (for example if too much exchange fee)
* tfLimitQuality : Use path with a limited input/output ratio

paths : Specified path used for the transaction

## Questions

### What is the difference between receiver and actualReceiver?

In [14]:
df[['actualReceiverAmount', 'receiverAmount', 'actualReceiverCurrency', 'receiverCurrency']].head(10)

Unnamed: 0,actualReceiverAmount,receiverAmount,actualReceiverCurrency,receiverCurrency
0,480.0,,XRP,
1,4.9,,XRP,
2,20480.0,,XRP,
3,0.342302,0.342302,XRP,XRP
4,39000.0,,XRP,
5,50000.0,,XRP,
6,100070.0,,XRP,
7,166635.0,,XRP,
8,3526.8,3526.8,CNY,CNY
9,2220.6,2220.6,CNY,CNY


It looks like they represent the same information

In [15]:
temp = df[['actualReceiverAmount', 'receiverAmount']].dropna()
len(temp[temp['actualReceiverAmount'] != temp['receiverAmount']])/len(temp)

0.183357129359397

Roughly 20% are different. Hence what is the difference?

### What is the difference between the actualIssuerReceiver and the issuerReceiver?

In [16]:
df[['actualIssuerReceiver', 'issuerReceiver']].head(10)

Unnamed: 0,actualIssuerReceiver,issuerReceiver
0,--,
1,--,
2,--,
3,--,--
4,--,
5,--,
6,--,
7,--,
8,rJUdngAfcZmratmZtDCpUGkqK9D2vGYrQt,--
9,rJUdngAfcZmratmZtDCpUGkqK9D2vGYrQt,--


In [17]:
temp = df[['actualIssuerReceiver', 'issuerReceiver']].replace('--', np.nan).dropna()
temp['actualIssuerReceiver'].equals(temp['issuerReceiver'])

True

There is no difference

### About XRPAcc, is it exactly a summary of the transactions?

In [18]:
df['XRPAcc'].head(10)

0    [{'isIncr': False, 'account': 'rwNvFwF1jVKhJJo...
1    [{'isIncr': False, 'account': 'rBbVbvuxc6HDUzm...
2    [{'isIncr': False, 'account': 'rLJj2tDG3hXhKmh...
3    [{'isIncr': False, 'account': 'rmPbyAqWuvmtPHY...
4    [{'isIncr': True, 'account': 'rp2diYfVtpbgEMya...
5    [{'isIncr': True, 'account': 'rp2diYfVtpbgEMya...
6    [{'isIncr': True, 'account': 'rp2diYfVtpbgEMya...
7    [{'isIncr': False, 'account': 'rDYbrHsuaHHWrYE...
8    [{'isIncr': False, 'account': 'rBz47fbhq449Xjq...
9    [{'isIncr': False, 'account': 'rBz47fbhq449Xjq...
Name: XRPAcc, dtype: object

In [19]:
# Assume isIncr = false is sender and isIncr = true is receiver
def parse_xrpacc(elem):
    if len(elem) == 2:
        first = elem[0]
        second = elem[1]
        if not first['isIncr']:
            sender_acc = first['account']
            receiver_acc = second['account']
        else: 
            sender_acc = second['account']
            receiver_acc = first['account']
        return sender_acc, receiver_acc
    else:
        return np.nan, np.nan

In [20]:
df['acc'] = df['XRPAcc'].apply(lambda x: parse_xrpacc(x))

In [21]:
df['acc-sender'] = df['acc'].apply(lambda x: x[0])
df['acc-receiver'] = df['acc'].apply(lambda x: x[1])

In [22]:
df[['sender', 'acc-sender', 'receiver', 'acc-receiver']].dropna().head(10)

Unnamed: 0,sender,acc-sender,receiver,acc-receiver
0,rwNvFwF1jVKhJJo6bnNWqCeqjRVVM4pwmZ,rwNvFwF1jVKhJJo6bnNWqCeqjRVVM4pwmZ,rJj4Y1Vk2ZeFCGLnxN1R8wNvE6Q1ZYzd2A,rJj4Y1Vk2ZeFCGLnxN1R8wNvE6Q1ZYzd2A
1,rBbVbvuxc6HDUzm1eVmjZyY9zWzW23snMa,rBbVbvuxc6HDUzm1eVmjZyY9zWzW23snMa,rUTonwTH88DLjaq1pNHVjzJwEtLTVVEB35,rUTonwTH88DLjaq1pNHVjzJwEtLTVVEB35
2,rLJj2tDG3hXhKmhijNchGzXHf6baa8HVw2,rLJj2tDG3hXhKmhijNchGzXHf6baa8HVw2,rh2GPYezFhVWaNJU89fQNechgu7R7DKzrt,rh2GPYezFhVWaNJU89fQNechgu7R7DKzrt
3,rogue5HnPRSszD9CWGSUz8UGHMVwSSKF6,rmPbyAqWuvmtPHYykJMNgCxeiFye8Ge7H,rogue5HnPRSszD9CWGSUz8UGHMVwSSKF6,rogue5HnPRSszD9CWGSUz8UGHMVwSSKF6
4,ra7mnD89mz1sKNBDHo9NJcEJy9RMxMwdJX,ra7mnD89mz1sKNBDHo9NJcEJy9RMxMwdJX,rp2diYfVtpbgEMyaoWnuaWgFCAkqCAEg28,rp2diYfVtpbgEMyaoWnuaWgFCAkqCAEg28
5,ra7mnD89mz1sKNBDHo9NJcEJy9RMxMwdJX,ra7mnD89mz1sKNBDHo9NJcEJy9RMxMwdJX,rp2diYfVtpbgEMyaoWnuaWgFCAkqCAEg28,rp2diYfVtpbgEMyaoWnuaWgFCAkqCAEg28
6,ra7mnD89mz1sKNBDHo9NJcEJy9RMxMwdJX,ra7mnD89mz1sKNBDHo9NJcEJy9RMxMwdJX,rp2diYfVtpbgEMyaoWnuaWgFCAkqCAEg28,rp2diYfVtpbgEMyaoWnuaWgFCAkqCAEg28
7,rDYbrHsuaHHWrYEycWGf8d6E3UCy8sDQYB,rDYbrHsuaHHWrYEycWGf8d6E3UCy8sDQYB,rE8y6WyEEbToUp7oiPjzdAik4YkVezLPxV,rE8y6WyEEbToUp7oiPjzdAik4YkVezLPxV
13,rfzzj6YrYvirR4XoVAp6KY5X2THwVsCENP,rfzzj6YrYvirR4XoVAp6KY5X2THwVsCENP,rPVMhWBsfF9iMXYj3aAzJVkPDTFNSyWdKy,rPVMhWBsfF9iMXYj3aAzJVkPDTFNSyWdKy
14,rfzzj6YrYvirR4XoVAp6KY5X2THwVsCENP,rfzzj6YrYvirR4XoVAp6KY5X2THwVsCENP,rPVMhWBsfF9iMXYj3aAzJVkPDTFNSyWdKy,rPVMhWBsfF9iMXYj3aAzJVkPDTFNSyWdKy


In [23]:
temp = df[['sender', 'acc-sender', 'receiver', 'acc-receiver']].dropna()
len(temp[temp['sender'] != temp['acc-sender']])/len(temp), len(temp[temp['receiver'] != temp['acc-receiver']])/len(temp)

(0.056082478704402444, 0.05436688690666855)

We see that the sender in the sender field is generally the same as the sender in the XRPAcc field, same for receiver

## Are gateways present in the account path?

In [24]:
url = 'https://data.ripple.com/v2/gateways'
result = requests.get(url).json()

In [25]:
gateways_set = set()
gateways_accounts = set()
gateways_names = set()

for currency in result:
    for elem in result[currency]:
        name = elem['name']
        account = elem['account']
        gateways_set.add(tuple([name, account]))
        gateways_accounts.add(account)
        gateways_names.add(name)
gateways_names

{'BPG',
 'BTC 2 Ripple',
 'Bitso',
 'Bitstamp',
 'Coinex',
 'Digital Gate Japan',
 'Dividend Rippler',
 'DotPayco',
 'EXRP',
 'GBI',
 'Gatehub',
 'Gatehub Fifth',
 'Justcoin',
 'Lake BTC',
 'Mr. Exchange',
 'Pax Moneta',
 'Payroutes',
 'Rippex',
 'Ripple Exchange Tokyo',
 'Ripple Fox',
 'Ripple LatAm',
 'Ripple Singapore',
 'Ripple Trade Japan',
 'RippleChina',
 'RippleUnion',
 'Ripula',
 'SnapSwap',
 'The Rock Trading',
 'TokyoJPY',
 'WisePass',
 'rippleCN'}

In [26]:
len(gateways_accounts)

34

In [27]:
def how_many_gateways(column):
    transactions_accounts = set()
    transactions_accounts.update(df[column].values)
    counter_in = 0
    counter_out = 0
    for acc in gateways_accounts:
        if acc in transactions_accounts:
            counter_in += 1
        else:
            counter_out += 1
    print('Percentage of gateways in {} : {}'.format(column,counter_in * 100 / len(gateways_accounts)))
    print('Percentage of gateways not in {} : {}'.format(column,counter_out * 100 / len(gateways_accounts)))

In [28]:
l = ['sender', 'receiver', 'issuerSender', 'issuerReceiver', 'actualIssuerReceiver']
for col in l:
    how_many_gateways(col)

Percentage of gateways in sender : 35.294117647058826
Percentage of gateways not in sender : 64.70588235294117
Percentage of gateways in receiver : 88.23529411764706
Percentage of gateways not in receiver : 11.764705882352942
Percentage of gateways in issuerSender : 94.11764705882354
Percentage of gateways not in issuerSender : 5.882352941176471
Percentage of gateways in issuerReceiver : 97.05882352941177
Percentage of gateways not in issuerReceiver : 2.9411764705882355
Percentage of gateways in actualIssuerReceiver : 100.0
Percentage of gateways not in actualIssuerReceiver : 0.0


As we can see, all the gateways accounts are present in the issuer fields 

In [29]:
transactions_accounts = set()

transactions_accounts.update(df['issuerSender'].values)
transactions_accounts.update(df['issuerReceiver'].values)
transactions_accounts.update(df['sender'].values)
transactions_accounts.update(df['receiver'].values)
transactions_accounts.update(df['actualIssuerReceiver'].values)

## Can we build a topology using the links between parent and account?

In [32]:
import time

start_time = time.time()
counter = 0
for acc in transactions_accounts:
    if type(acc) is not float:
        url = 'https://data.ripple.com/v2/accounts/'+acc
        result = requests.get(url).json()
        if 'account_data' in result:
            parent = result['account_data']['parent']
            if(parent in gateways_accounts):
                print('ok', acc, parent)
            counter += 1
            if (counter % 10 == 0):
                break
time_elapsed = (time.time() - start_time)

print('Time to do 10 requests : {} seconds'.format(time_elapsed))

Time to do 100 requests : 34.05596208572388 seconds


In [33]:
len(transactions_accounts)

220186

No, we are limited by the http request time

## Can we build the topology by using getAccounts() ?

In [34]:
def get_acc_parent(iterations):
    acc_par_set = set()
    for i in range(iterations):
        url = 'https://data.ripple.com/v2/accounts/?descending=True'
        result = requests.get(url).json()
        for acc in result['accounts']:
            if (result['result'] == 'success'):
                acc_par_set.add(tuple([acc['account'],acc['parent']]))
    return acc_par_set

In [35]:
len(get_acc_parent(10))

200

No, because we always get the same subset

In [36]:
url = 'https://data.ripple.com/v2/accounts/?descending=True'
result1 = requests.get(url).json()
result2 = requests.get(url).json()
result1 == result2

True

## Is 'parent' a gateway?

In [37]:
parent_set = set ()
for p in result1['accounts']:
    parent_set.add(p['parent'])

In [38]:
counter_in = 0
counter_out = 0
for p in parent_set:
    if p in gateways_accounts:
        counter_in += 1
    else:
        counter_out += 1
print('Percentage of parent that are gateways : {}'.format(counter_in * 100 / len(parent_set)))
print('Percentage of parent that are not gateways : {}'.format(counter_out * 100 / len(parent_set)))  

Percentage of parent that are gateways : 0.0
Percentage of parent that are not gateways : 100.0


Here we consider a subset of 200 accounts. Here, no parent is a gateway. 

## Can we build the topology by considering the links and paths field in the transaction dataset?

In [39]:
nodes = set()
links = set()
for elem in df['links'].dropna():
    for link in elem:
        node1 = link['node1']
        node2 = link['node2']
        l1 = tuple([node1,node2])
        l2 = tuple([node2,node1])
        nodes.add(node1)
        nodes.add(node2)
        if(l1 not in links and l2 not in links):
            links.add(l1)

In [40]:
len(nodes), len(links)

(44807, 62739)

Yes, but

1) How can we be sure that the topology is complete?

2) How can we get the AS of an account?

In [41]:
gateways_set

{('BPG', 'rcoef87SYMJ58NAFx7fNM5frVknmvHsvJ'),
 ('BTC 2 Ripple', 'rMwjYedjc7qqtKYVLiAccJSmCwih4LnE2q'),
 ('Bitso', 'rG6FZ31hDHN1K5Dkbma3PSB5uVCuVVRzfn'),
 ('Bitstamp', 'rvYAfWj5gh67oV6fW32ZzP3Aw4Eubs59B'),
 ('Coinex', 'rsP3mgGb2tcYUrxiLFiHJiQXhsziegtwBc'),
 ('Digital Gate Japan', 'rJRi8WW24gt9X85PHAxfWNPCizMMhqUQwg'),
 ('Dividend Rippler', 'rfYv1TXnwgDDK4WQNbFALykYuEBnrR4pDX'),
 ('DotPayco', 'rM8199qFwspxiWNZRChZdZbGN5WrCepVP1'),
 ('EXRP', 'rPxU6acYni7FcXzPCMeaPSwKcuS2GTtNVN'),
 ('GBI', 'rrh7rf1gV2pXAoqA8oYbpHd8TKv5ZQeo67'),
 ('Gatehub', 'rhub8VRN55s94qWKDv6jmDy1pUykJzF3wq'),
 ('Gatehub Fifth', 'rDAN8tzydyNfnNf2bfUQY6iR96UbpvNsze'),
 ('Gatehub Fifth', 'rcA8X3TVMST1n3CJeAdGk1RdRCHii7N2h'),
 ('Gatehub Fifth', 'rchGBxcD1A1C2tdxF6papQYZ8kjRKMYcL'),
 ('Gatehub Fifth', 'rckzVpTnKpP4TJ1puQe827bV3X4oYtdTP'),
 ('Justcoin', 'rJHygWcTLVpSXkowott6kzgZU6viQSVYM1'),
 ('Lake BTC', 'rpDMez6pm6dBve2TJsmDpv7Yae6V5Pyvy2'),
 ('Mr. Exchange', 'rB3gZey7VWHYRqJHLoHDEJXJ2pEPNieKiS'),
 ('Pax Moneta', 'rUkMKjQi

## ripple.txt

In [49]:
domain_set = set()
for n in gateways_names:
    url = 'https://data.ripple.com/v2/gateways/' + n
    result = requests.get(url).json()
    if 'domain' in result:
        domain_set.add(result['domain'])
    else:
        print('No domain for {}'.format(n))

No domain for BPG


ripple-exchange.tokyo -> No website </br>

ripple-market.jp -> No ripple.txt on website </br>

bitso.com -> No website</br>

ripplesingapore.com -> No website</br>

wisepass.com -> No website</br>

goldbullioninternational.com -> No ripple.txt on website</br>

coinexgateway.com -> No website</br>

justcoin.com -> No website</br>

No domain for BPG</br>

btc2ripple.com -> Security issue </br>

gatehub.net -> OK</br>

dividendrippler.com -> Forbidden error</br>

ripplelatam.com -> No ripple.txt ... Come on! Should I really continue?</br>

rippex.net</br>

gatehub.net</br>

therocktrading.com</br>

bitstamp.net</br>

xrpchina.net</br>

snapswap.us</br>

ripula.co.uk</br>

lakebtc.com</br>

mr-ripple.com</br>

tokyojpy.com</br>

ripplechina.net</br>

payroutes.com</br>

ripplecn.com</br>

paxmoneta.com</br>

rippletrade.jp</br>

rippleunion.com</br>

ripplefox.com</br>

exrp.co.kr</br>

In [58]:
for d in domain_set:
    try:
        add = socket.gethostbyname_ex(d)
        print('{} - {}'.format(d, add))
    except:
        print('No IP for {}'.format(d))

bitstamp.net - ('bitstamp.net', [], ['107.154.249.133', '45.60.110.133'])
No IP for ripula.co.uk
lakebtc.com - ('lakebtc.com', [], ['104.244.76.176'])
ripplefox.com - ('ripplefox.com', [], ['13.112.127.16'])
ripplecn.com - ('ripplecn.com', [], ['18.220.246.233'])
btc2ripple.com - ('btc2ripple.com', [], ['13.32.176.84', '13.32.176.233', '13.32.176.64', '13.32.176.56'])
ripplelatam.com - ('ripplelatam.com', [], ['192.185.4.55'])
rippex.net - ('rippex.net', [], ['104.24.96.2', '104.24.97.2'])
gatehub.net - ('gatehub.net', [], ['104.31.64.177', '104.31.65.177'])
exrp.co.kr - ('exrp.co.kr', [], ['50.63.202.56'])
wisepass.com - ('wisepass.com', [], ['104.20.244.28', '104.20.245.28'])
ripple-market.jp - ('ripple-market.jp', [], ['202.172.28.118'])
rippleunion.com - ('hdredirect-lb5-1afb6e2973825a56.elb.us-east-1.amazonaws.com', ['rippleunion.com'], ['23.20.239.12'])
rippletrade.jp - ('rippletrade.jp', [], ['183.181.98.81'])
mr-ripple.com - ('mr-ripple.com', [], ['18.205.177.181', '52.202.40.2

In [56]:
print(domain_set)

{'bitstamp.net', 'ripula.co.uk', 'lakebtc.com', 'ripplefox.com', 'ripplecn.com', 'btc2ripple.com', 'ripplelatam.com', 'rippex.net', 'gatehub.net', 'exrp.co.kr', 'wisepass.com', 'ripple-market.jp', 'rippleunion.com', 'rippletrade.jp', 'mr-ripple.com', 'coinexgateway.com', 'goldbullioninternational.com', 'payroutes.com', 'xrpchina.net', 'ripple-exchange.tokyo', 'justcoin.com', 'snapswap.us', 'ripplesingapore.com', 'dividendrippler.com', 'therocktrading.com', 'tokyojpy.com', 'bitso.com', 'paxmoneta.com', 'ripplechina.net'}


In [43]:
gateway_dict = {v: k for k, v in dict(gateways_set).items()}

In [44]:
def top_issuer(col):
    top_issuer_sender = df.groupby(col).size().sort_values(ascending=False)

    top_issuer_sender.index = top_issuer_sender.index.map(gateway_dict)

    return top_issuer_sender

In [45]:
top_issuer('issuerSender').head(15)

issuerSender
NaN                 637860
NaN                  27752
Ripple Fox           25205
NaN                  25020
Bitstamp             15028
Gatehub              11088
NaN                   8179
RippleChina           8071
Mr. Exchange          7676
TokyoJPY              4977
NaN                   3164
NaN                   2292
NaN                   1814
Gatehub Fifth         1177
The Rock Trading       747
dtype: int64

In [46]:
top_issuer('issuerReceiver').head(15)

issuerReceiver
NaN                 295680
TokyoJPY            115985
Mr. Exchange        112348
Bitstamp            104224
Ripple Fox           35808
NaN                  27723
NaN                  23721
Gatehub              21874
Payroutes            12580
NaN                  11874
RippleChina           6483
BTC 2 Ripple          3083
NaN                   2292
NaN                   2016
The Rock Trading      2000
dtype: int64

In [48]:
df.columns

Index(['XRPAcc', 'actualIssuerReceiver', 'actualReceiverAmount',
       'actualReceiverCurrency', 'date', 'fee', 'flags', 'hash',
       'issuerReceiver', 'issuerSender', 'links', 'offers', 'paths',
       'receiver', 'receiverAmount', 'receiverCurrency', 'sender',
       'senderAmount', 'senderCurrency', 'success', 'acc', 'acc-sender',
       'acc-receiver'],
      dtype='object')