In [1]:
import pandas as pd
import numpy as np

In [2]:
pd.options.display.float_format = '{:,.2f}'.format

In [3]:
import qgrid

qgrid.set_grid_option('maxVisibleRows', 10)
qgrid.set_grid_option('editable', False)

In [4]:
df = pd.read_csv('tether_transactions_522647.csv')
valid = df[df.is_valid == 1]

In [5]:
valid.tx_type.value_counts()

Simple Send               1490224
Crowdsale Purchase             97
Grant Property Tokens          80
Revoke Property Tokens          1
Name: tx_type, dtype: int64

In [6]:
len(valid)

1490402

In [7]:
valid.pivot_table(index='tx_type', values=['amount'], aggfunc=sum)

Unnamed: 0_level_0,amount
tx_type,Unnamed: 1_level_1
Crowdsale Purchase,113213.79
Grant Property Tokens,2550000000.0
Revoke Property Tokens,30000000.0
Simple Send,56683546721.74


In [8]:
simple = valid[valid.tx_type == 'Simple Send']
crowdsale = valid[valid.tx_type == 'Crowdsale Purchase']
grant = valid[valid.tx_type == 'Grant Property Tokens']
revoke = valid[valid.tx_type == 'Revoke Property Tokens']

burnt = valid[valid.reference_address.isnull()]

In [9]:
burnt.amount.sum()

30099925.6114425

In [10]:
def get_trans(address, df=valid):
    return df[df.sending_address == address], df[df.reference_address == address]

In [11]:
def compute_summary(df=valid):
    # Compute sent summary
    sent = df.pivot_table(index='sending_address',
                          values=['amount', 'block_time', 'is_valid'], 
                          aggfunc={'amount': np.sum, 'block_time': ['first', 'last'], 'is_valid': 'count'}).reset_index()
    # Renaming
    sent = sent.rename(columns={'sending_address': 'address', 
                                'amount': 'amount_sent', 
                                'block_time': '',
                                'is_valid': 'sent_count'})
    sent = sent.rename(columns={'first': 'sent_first', 'last': 'sent_last', 'sum': '', 'count': ''}, level=1)
    sent.columns = sent.columns.map(''.join)
    
    # Compute receive summary
    recv = df.pivot_table(index='reference_address',
                          values=['amount', 'block_time', 'is_valid'], 
                          aggfunc={'amount': np.sum, 'block_time': ['first', 'last'], 'is_valid': 'count'}).reset_index()
    # Renaming
    recv = recv.rename(columns={'reference_address': 'address', 
                                'amount': 'amount_recv', 
                                'block_time': '',
                                'is_valid': 'recv_count'}).reset_index()
    recv = recv.rename(columns={'first': 'recv_first', 'last': 'recv_last', 'sum': '', 'count': ''}, level=1)
    recv.columns = recv.columns.map(''.join)

    out = pd.merge(sent, recv, how='outer', on='address').fillna(0)
    out['balance'] = out['amount_recv'] - out['amount_sent']
    
    out['balance'] = out['balance'].astype(int)
    out['balance'] = out['balance'].astype(float)
    out['sent_count'] = out['sent_count'].astype(int)
    out['recv_count'] = out['recv_count'].astype(int)
    
    # Attach available labels to addresses
    addresses = pd.read_csv('address.csv').iloc[::-1].drop_duplicates('address').iloc[::-1]
    out = pd.merge(out, addresses, how='left', on='address')
    
    return out[['address', 'sent_count', 'recv_count', 'amount_sent', 'amount_recv', 'balance', 
                'recv_first', 'recv_last', 'sent_first', 'sent_last', 'id', 'exchange']].set_index('address')


In [12]:
def print_detail(address, df=valid):
    sent, recv = get_trans(address, df)
    
    print("Total sent: {:,.2f} ({} transactions to {} unique recepients)".format(sent.amount.sum(), len(sent), sent.reference_address.nunique()))
    print("Total received: {:,.2f} ({} transactions from {} unique senders)".format(recv.amount.sum(), len(recv), recv.sending_address.nunique()))
    print("Balance: {:,.2f} tokens".format(recv.amount.sum() - sent.amount.sum()))

In [13]:
%time summary = compute_summary()

CPU times: user 1min, sys: 1.1 s, total: 1min 1s
Wall time: 1min 2s


In [14]:
grant.reference_address.value_counts()

3BbDtxBSjgfTRxaBUgR2JACWRukLKtZdiQ    38
1NTMakcgVwQpMdGxRQnFKyb3G1FAJysSfz    33
1Nf3oM2pmoKx7M5oNUhyKYr3GLecineMHX     5
132j6EiUWNamSSjiYEQBhsbufsnBh4a28U     2
377UotoWsGk7K2Sx2QCnSVRBhCEZLtQtE4     2
Name: reference_address, dtype: int64

In [18]:
print_detail('16tg2RJuEPtZooy18Wxn2me2RhUdC94N7r')

Total sent: 0.00 (0 transactions to 0 unique recepients)
Total received: 30,950,000.00 (3 transactionsf from 1 unique senders)
Balance: 30,950,000.00 tokens


In [19]:
def get_sent(addresses, df=valid, cols=['sending_address', 'reference_address', 'amount']):
    return df[df.sending_address.isin(addresses)][cols]

In [20]:
def get_sent_all(address, df=valid, depth=1):
    sent = get_sent([address], df=df)

    for i in range(depth-1):
        sent = pd.concat([sent, get_sent(list(sent.reference_address), df=df)], axis=0)
        
    return sent

In [21]:
import networkx as nx
import matplotlib.pyplot as plt

from networkx.readwrite import json_graph
from bokeh.util.browser import view

import json, math

In [22]:
def plot_transactions(address, summary, df=valid, depth=1, count_nodes=240):
    g = get_sent_all(address, df, depth) \
            .groupby(['sending_address','reference_address']).agg(np.sum).reset_index()
    g = g.sort_values('amount', ascending=False)[:count_nodes]
    g['amount'] = g['amount'].astype(int)
    g = g.rename(columns={'amount': 'value'})
    
    G = nx.from_pandas_edgelist(g, 'sending_address', 'reference_address', ['value'], create_using=nx.DiGraph())
    pos = nx.kamada_kawai_layout(G)
    
    data = json_graph.node_link_data(G, {'link': 'edges', 'source': 'from', 'target': 'to'})
    
    for node in data['nodes']: 
        info = summary.loc[node['id']]
        size = int(info.balance)
        node['label'] = node['id'][:6] + '...'
        node['title'] = "Balance: {:,d}<br/>Total Sent: {:,d}<br/>Total Recv: {:,d}<br/>Number of sent tx: {:,d}<br/>Number of recv tx: {:,d}".format(size, int(info.amount_sent), int(info.amount_recv), int(info.sent_count), int(info.recv_count), )
        node['size'] = 2 if size < 4 else int(math.log(size, 2))
        node['x'] = pos[node['id']][0]
        node['y'] = pos[node['id']][1]
        
        if (not pd.isnull(info.id)):
            node['label'] = info.id
        
        if (node['id'] == address):
            node['color'] = { 'background':'#ccffcc', 'border': 'green' }
            node['size'] = 10 if size < 10 else size
        elif (not pd.isnull(info.exchange)):
            node['title'] = node['title'] + '<br/><br/>Info: ' + info.id
            node['group'] = 'Exchange'
        else:
            node['group'] = 'Normal'
            

    with open('notebook/data-notebook.json', 'w') as outfile:
        json.dump(data, outfile)
        
    view('http://0.0.0.0:8000/notebook')

In [23]:
print_detail('3BbDtxBSjgfTRxaBUgR2JACWRukLKtZdiQ')

Total sent: 700,137,097.47 (638 transactions to 284 unique recepients)
Total received: 700,137,124.94 (398 transactionsf from 187 unique senders)
Balance: 27.47 tokens


In [24]:
plot_transactions('3BbDtxBSjgfTRxaBUgR2JACWRukLKtZdiQ', summary, count_nodes=240, depth=4)

In [25]:
print("{:,d}".format(int(summary.iloc[1].amount_sent)))

20


In [26]:
widget = qgrid.show_grid(summary.reset_index(), grid_options={'forceFitColumns': False})
widget

In [117]:
from bokeh.io import show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool, NumeralTickFormatter
from bokeh.palettes import Spectral6
from bokeh.plotting import figure
from bokeh.transform import factor_cmap

output_notebook()

In [227]:
def plot_hist(address, df=valid):
    sent, recv = get_trans(address, df)
    
    sent['block_time'] = pd.to_datetime(sent['block_time']).dt.date
    recv['block_time'] = pd.to_datetime(recv['block_time']).dt.date
    
    sent = sent[['block_time', 'amount']].groupby(['block_time']).agg('sum').reset_index()
    recv = recv[['block_time', 'amount']].groupby(['block_time']).agg('sum').reset_index()
    
    sent.loc[:,'label'] = 'sent'
    recv.loc[:,'label'] = 'recv'
    
    trans = pd.concat([sent, recv]).sort_values('block_time', ascending=True)

    dates = np.array(list(trans['block_time']), dtype=np.datetime64)
    source = ColumnDataSource(data=dict(date=dates, 
                                        amount=trans['amount'],
                                        label=trans['label']))

    p = figure(plot_height=300, 
               plot_width=800,
               tools="xpan,xzoom_in,xzoom_out,box_zoom,reset",
               toolbar_location='left',
               x_axis_type="datetime",
               x_range=(dates[-1]-np.timedelta64(180, 'D'), dates[-1]))

    p.yaxis.formatter = NumeralTickFormatter(format="0,000")

    p.add_tools(HoverTool(
        tooltips=[
            ( 'date',   '@date{%F}'            ),
            ( 'tokens',  '@amount{0,000}' ), # use @{ } for field names with spaces
            ( 'type',  '@label' ), # use @{ } for field names with spaces
        ],

        formatters={
            'date': 'datetime', # use 'datetime' formatter for 'date' field
            'tokens': 'printf',   # use 'printf' formatter for 'adj close' field
        },

        # display a tooltip whenever the cursor is vertically in line with a glyph
        mode='vline'
    ))
    
    w = 1/2*24*60*60*1000 # halfday in ms

    p.vbar(x='date',top='amount', width=w, source=source, legend='label',
           line_width=0.1, line_color='white', fill_color=factor_cmap('label', palette=['#fc8d59', '#3288bd'], factors=['sent', 'recv']))

    show(p)
    
plot_hist('1FoWyxwPXuj4C6abqwhjDWdz6D4PZgYRjA')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """
