In [1]:
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import math
import folium
import csv
import json
import datetime
from ripple_helper import get_gateways_topology, get_gateways_df

# Processing transactions data

### Process transactions

In [2]:
data = pd.read_csv('txs-parsed-jan-2013-aug-2017/final_txs_2017.txt', delimiter='\n', header=None)

json_list = '['
for i in range(len(data)):
    json_text = data[0][i]
    json_list += json_text + ','
json_list = json_list[:-1]+']'

final_json = json.loads(json_list)
transactions = pd.DataFrame.from_dict(final_json)

## Get Exchange rates

In [4]:
rates = pd.read_csv('exchange_rate.csv', delimiter=',')

### Process gateways

In [5]:
gateways_df = pd.read_csv('gateways.csv', sep=',').set_index('Unnamed: 0')
gateways_df['asn'] = gateways_df['asn'].apply(lambda x: x[2:-2])
gateways_accounts = gateways_df.index

In [6]:
cols = ['actualIssuerReceiver','issuerReceiver','issuerSender','receiver','sender']
for c in cols:
    mask = (transactions[c] != '--') & (transactions[c] != 'nan')
    l = list(transactions[mask][c].values)
    count = 0
    for acc in l:
        if(acc in gateways_accounts):
                count += 1
    print('Column: {} - Percentage of gateways: {}'.format(c,count/len(l)))

Column: actualIssuerReceiver - Percentage of gateways: 0.2413851587544084
Column: issuerReceiver - Percentage of gateways: 0.26485471367175434
Column: issuerSender - Percentage of gateways: 0.09547136892453775
Column: receiver - Percentage of gateways: 0.024353950745963724
Column: sender - Percentage of gateways: 8.854902771683505e-05


In [7]:
account_to_asn = gateways_df['asn'].to_dict()
account_to_asn['rKiCet8SdvWxPXnAgYarFUXMh1zCPz432Y'] = '38895'

In [8]:
def sanitize(x):
    if x == '16509 38895':
        return '38895'
    return x

In [9]:
gateways_df['lat-lon'] = list(zip(gateways_df.latitude, gateways_df.longitude))
ases = gateways_df[['asn','lat-lon']].copy()
ases['asn'] = ases['asn'].apply(lambda x: sanitize(x))
ases = ases.set_index('asn').to_dict()['lat-lon']

In [10]:
links = []
with open('gateway_links.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    line_count = 0
    for row in csv_reader:
        links.append(tuple([row[0],row[1],1]))

In [11]:
def resolve_sender_receiver(path):
    sender = ''
    for node in path:
        if node in gateways_accounts:
            sender = node
            break
    
    receiver = ''
    for node in reversed(path):
        if node in gateways_accounts:
            receiver = node
            
    if sender != receiver:
        return (sender,receiver)
    else:
        return None

In [12]:
def convert_date(d):
    readable = datetime.datetime.fromtimestamp(d + 946684800).isoformat()
    return readable[:7]

In [13]:
transactions_processed = pd.DataFrame(columns=['sender', 'receiver', 'amount'])

In [16]:
sender_cols = ['issuerSender', 'sender']
receiver_cols = ['issuerReceiver','receiver','actualIssuerReceiver']
count_valid = 0
count_invalid = 0
for s in sender_cols:
    for r in receiver_cols:
        mask = transactions[s].isin(gateways_accounts) & transactions[r].isin(gateways_accounts)
        valid_transactions = transactions[mask][[s, r,'actualReceiverAmount', 'actualReceiverCurrency', 'date']]
        for index, row in valid_transactions.iterrows():
            source = account_to_asn[row[s]]
            dest = account_to_asn[row[r]]
            amount = float(row['actualReceiverAmount'])
            curr = row['actualReceiverCurrency']
            if (source != dest and not math.isnan(amount)):
                is_xrp = True
                if (curr != 'XRP'):
                    is_xrp = False
                    if (type(row['date']) is str or type(row['date']) is int):
                        if(type(row['date']) is int):
                            d = convert_date(row['date'])
                        
                        if(type(row['date']) is str):
                            d = row['date'][:7]
                        exchange = rates[(rates['date'] == d) & (rates['from'] == curr)]['exchange'].values
                        if(len(exchange) > 0):
                            amount = amount*exchange[0]
                            is_xrp = True
                        else:
                            is_xrp = False
                            #print('No exchange rate at date {} for currency {}'.format(curr,d))
                
                if (is_xrp):
                    count_valid += 1
                    transactions_processed = transactions_processed.append({'sender' : source , 'receiver' : dest, 'amount' : amount} , ignore_index=True)
                else:
                    count_invalid += 1
                
                #try :
                #    path = nx.shortest_path(gateways_graph,source, dest)
                #    for i in range(len(path)-1):
                #        s_p = path[i]
                #        t_p = path[i+1]
                #        gateways_graph[s_p][t_p]['weight'] += amount
                #except:
                #    print('No path between {} and {}'.format(source,dest))

'''
invalid_transactions = transactions[~mask][[s, r,'actualReceiverAmount', 'actualReceiverCurrency', 'paths', 'date']]
for index, row in invalid_transactions.iterrows():
    if type(row['paths']) is not float:
        source = row[s]
        dest = row[r]
        amount = float(row['actualReceiverAmount'])
        curr = row['actualReceiverCurrency']
        paths = row['paths']
        if (source != dest and not math.isnan(amount)):
            for p in paths:
                full_path = [source]
                for node in p:
                    if 'issuer' in node:
                        full_path.append(node['issuer'])
                    if 'account' in node:
                        full_path.append(node['account'])
                full_path.append(dest)
                reduced = resolve_sender_receiver(full_path)
                if reduced != None:
                    sender = account_to_asn[reduced[0]]
                    receiver = account_to_asn[reduced[1]]
                    if sender != receiver:

                        if (curr != 'XRP'):
                            d = row['date'][:7]
                            exchange = rates[(rates['date'] == d) & (rates['from'] == curr)]['exchange'].values[0]
                            amount = amount*exchange
                        print('HEEEEEEEEYYYYY')
                        transactions_processed = transactions_processed.append({'sender' : sender , 'receiver' : receiver, 'amount' : amount} , ignore_index=True)
'''

"\ninvalid_transactions = transactions[~mask][[s, r,'actualReceiverAmount', 'actualReceiverCurrency', 'paths', 'date']]\nfor index, row in invalid_transactions.iterrows():\n    if type(row['paths']) is not float:\n        source = row[s]\n        dest = row[r]\n        amount = float(row['actualReceiverAmount'])\n        curr = row['actualReceiverCurrency']\n        paths = row['paths']\n        if (source != dest and not math.isnan(amount)):\n            for p in paths:\n                full_path = [source]\n                for node in p:\n                    if 'issuer' in node:\n                        full_path.append(node['issuer'])\n                    if 'account' in node:\n                        full_path.append(node['account'])\n                full_path.append(dest)\n                reduced = resolve_sender_receiver(full_path)\n                if reduced != None:\n                    sender = account_to_asn[reduced[0]]\n                    receiver = account_to_asn[reduced[1

In [17]:
count_valid, count_invalid

(22994, 5198)

In [18]:
len(transactions),len(transactions_processed)

(2021479, 22994)

In [19]:
transactions_processed.to_csv('transactions_processed.csv')