In [1]:
"""
Run this to start the script. It contains all the packages and user defined functions needed
"""

import csv
import blocksci
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.ticker
import collections
import pandas as pd
import numpy as np
import datetime
from collections import Counter
%matplotlib notebook

# Some utility functions 
def get_balances(cluster=None, heights=None):
    """
    Method to compute the balances of a cluster for a given list of block heights.
    """
    return [(int(height), cluster.balance(int(height))) for height in heights]


def get_lifetime(address):
    """
    Method to get the lifetime of a Bitcoin Address which is the difference between the last 
    time the address released a payment and the first time it received a payment. Returns a tuple
    containing the address value, the first / last block time, first / last block height, and the 
    lifetime.
    """
    address_value = address.address_string
    first_block_time = address.first_tx.block_time
    first_block_height = address.first_tx.block_height
    try:
        last_time_block = address.out_txes()[-1].block_time
    except:
        last_time_block = None
    try:
        last_block_height = address.out_txes()[-1].block_height
    except:
        last_block_height = None
    try:
        life_time = address.out_txes()[-1].block_time - address.first_tx.block_time
    except:
        life_time = None
    return (address_value,
            first_block_time,
            first_block_height,
            last_time_block,
            last_block_height,
            life_time)


def get_lifetimes(addresses):
    """
    Method returning a list of tuples containing the lifetime given a list of Bitcoin Addresses. 
    """
    return [get_lifetime(address) for address in addresses]


def get_payments_df(txes, chain):
    """
    Method to compute the amount of received or spent payments in dollars given a list of 
    input or output transactions. It returns a DataFrame indexed by time with payments in 
    USD and the block height as columns. 
    """
    payments = [(tx.block, tx.block_height, tx.input_value, tx.hash, tx.ins, tx.outs) if isinstance(tx, blocksci.Tx)
                else (tx.block, tx.tx.block_height, tx.value, tx.tx.hash, tx.tx.ins, tx.tx.outs) for tx in txes]
    df = pd.DataFrame(payments, columns=["block", "height", "payment", "hash", "inputs", "outs"])
    return get_dollars_df(chain, df, "payment")

def get_dollars_df(chain, df=None, column_name=None):
    df.index = df["height"]
    converter = blocksci.CurrencyConverter()
    df = chain.heights_to_dates(df)
    df["usd"] = df.apply(lambda x: converter.satoshi_to_currency(x[column_name], chain[x["height"]].time), axis=1)
    df.index.name = "date"
    return df


def is_it_in_cluster(cluster=None, seed=None):
    """
    Method to get the size of a cluster. 
    """
    addresses = [address for address in cluster.addresses]
    return seed in addresses


def get_size(cluster=None, limit=10000):
    """
    Method to get the size of a cluster. This method does not count the number of addresses above 
    a certain limit (default: 10000).
    """    
    addresses = []
    iteration = 0
    for address in cluster.addresses:
        if iteration < limit:
            addresses.append(address)
            iteration += 1
        else:
            print("Too many addresses!")
            break
    return len(addresses)


#---------------------------------------------------------------
#Hidde additions

def from_list_strings_to_address(list_of_strings, chain):
    working = []
    for str_address in list_of_strings:
        try:
            block_address = chain.address_from_string(str_address)
            if not block_address == None:
                working.append(block_address)
        except RuntimeError:
            continue
    return working


def get_donors_counter(individual_list, chain):
    transactions = []
    for address in individual_list:
        transactions.append(get_payments_df(address.out_txes(), chain))
    
    transaction_df = pd.concat(transactions)
    donor_list = [tx.address for row in transaction_df['inputs'] for tx in row]
    donor_counter = Counter(donor_list)
    return donor_counter

def get_overlapping(transaction_list_0, transaction_list_1):
    list_0 = set([tx.address for row in transaction_list_0['inputs'] for tx in row])
    list_1 = set([tx.address for row in transaction_list_1['inputs'] for tx in row])
    
    overlapping = []
    overlapping_count = 0
    for address in list_0:
        if address in list_1:
            overlapping.append(address)
            overlapping_count += 1

    return overlapping, overlapping_count


def get_donations(address):
    """
    sometime a transaction is to multiple accounts. We retriece only the relevant amount
    """
    donations_tx = []
    for txs in address.out_txes():
        for tx in txs.outputs:
            if tx.address == address:
                donations_tx.append(tx)
    return donations_tx

def get_cash_out(address):
    """
    sometime a transaction is to multiple accounts. We retriece only the relevant amount
    """
    cash_out_tx = []
    for txs in address.in_txes():
        for tx in txs.inputs:
            if tx.address == address:
                cash_out_tx.append(tx)
    return cash_out_tx

In [4]:
# Start the chain
bitcoin_chain = blocksci.Blockchain("/home/ubuntu/bitcoin")


converter = blocksci.CurrencyConverter()

print(bitcoin_chain.blocks[-1].time)
print(bitcoin_chain.blocks[-1].height)


2018-06-15 07:01:06
527528


In [3]:
from collections import defaultdict
"""
We will check what addresses are correct first
"""

working_addresses = defaultdict(list)
chain_addresses = defaultdict(list)


with open('Blockchain_addresses.txt') as f:
    reader = csv.reader(f, delimiter="\t")
    d = list(reader)
    N_addresses_start = len(d)
    for address in d:
        try:
            pot = bitcoin_chain.address_from_string(address[1])
            if not pot == None:    
                working_addresses[address[0]].append(address[1])
                chain_addresses[address[0]].append(pot)
        except RuntimeError:
            continue    

            
            
N_working = len([item for sublist in list(working_addresses.values()) for item in sublist]) 

print("After checking all addresses we are left with {} addresses out of {}".format(N_working,
                                                                                    N_addresses_start))

After checking all addresses we are left with 140 addresses out of 299


In [4]:
from copy import copy
"""
Now we will check if these accounts have any transactions in them, if not we drop them 
"""

tx_working_addresses = copy(working_addresses)
tx_chain_addresses   = copy(chain_addresses)


for key in chain_addresses:
    for i, address in enumerate(chain_addresses[key]):
        N_in  = len(address.in_txes())
        N_out = len(address.out_txes())
        if N_in + N_out == 0:
            del tx_chain_addresses[key][i]
            del tx_working_addresses[key][i]

tx_chain_addresses   = dict([(k,v) for k,v in tx_chain_addresses.items() if len(v)>0])            
tx_working_addresses = dict([(k,v) for k,v in tx_working_addresses.items() if len(v)>0])   


N_2 = len([item for sublist in list(tx_working_addresses.values()) for item in sublist])


print("After checking all addresses for transactions we are left with {} addresses out of {}".format(N_2,
                                                                                              N_working))


"""
Duplicate addresses are also removed here
"""

for key in tx_chain_addresses:
     tx_chain_addresses[key] = list(dict.fromkeys(tx_chain_addresses[key]))
        
for key in tx_working_addresses:
     tx_working_addresses[key] = list(dict.fromkeys(tx_working_addresses[key]))
        
        
N_3 = len([item for sublist in list(tx_working_addresses.values()) for item in sublist])

print("After dropping duplicates we are left with {} addresses out of {}".format(N_3,N_2))


After checking all addresses for transactions we are left with 139 addresses out of 140
After dropping duplicates we are left with 71 addresses out of 139


In [47]:
"""
Now we will get some basic statistics per owner of each account
"""
stats_per_owner = defaultdict(dict)
all_donations   = defaultdict(list)
all_cash_out    = defaultdict(list)

testing = ['tolvajkergetok']

#tx_chain_addresses
for key in tx_chain_addresses:
    total_donations_usd = 0
    total_cash_out_usd = 0
    
    total_donations_btc = 0
    total_cash_out_btc = 0
    
    N_donations = 0
    unique_donors = set()
    
    current_balance_usd = 0
    current_balance_btc = 0 
    
    for address in tx_chain_addresses[key]:
        out_txes = address.out_txes()
        in_txes  = address.in_txes()
        
        if out_txes:
            donation_tx = get_donations(address)            
            donations = get_payments_df(donation_tx, bitcoin_chain)   

            total_donations_usd += donations.usd.sum()          
            total_donations_btc += donations.payment.sum()
            all_donations['Date'].extend(list(donations.index))
            all_donations['USD amount'].extend(list(donations.usd))
            all_donations['BTC amount'].extend(list(donations.payment / 1e8))
            all_donations['Channel'].extend([key] * donations.shape[0])
            all_donations['Donor'].extend(donations.inputs.apply(lambda x: x.address.all))
            
            N_donations += donations.shape[0]
            potential_new_donors = {i.address for row in donations.inputs for i in row}
            unique_donors = unique_donors | potential_new_donors

            latest_time = donations.index[-1]
            df = pd.DataFrame({'Balance':address.balance()}, index = [latest_time])
            balance = converter.satoshi_to_currency_df(df, bitcoin_chain)
            current_balance_usd += balance.Balance.iloc[0]
            current_balance_btc += address.balance()
            
        if in_txes:
            cash_out_tx = get_cash_out(address)
            cash_out = get_payments_df(cash_out_tx, bitcoin_chain)
            
            total_cash_out_usd  += cash_out.usd.sum()
            total_cash_out_btc  += cash_out.payment.sum()
            
            all_cash_out['Date'].extend(list(cash_out.index))
            all_cash_out['USD amount'].extend(list(cash_out.usd))
            all_cash_out['BTC amount'].extend(list(cash_out.payment/ 1e8))
            all_cash_out['Channel'].extend([key] * cash_out.shape[0])
            all_cash_out['Recipient'].extend(cash_out.outs.apply(lambda x: x.address.all))
            
    stats_per_owner[key]['Donations USD'] = total_donations_usd
    stats_per_owner[key]['Cash out USD'] = total_cash_out_usd
    
    stats_per_owner[key]['Donations BTC'] = total_donations_btc / 1e8
    stats_per_owner[key]['Cash out BTC'] = total_cash_out_btc / 1e8
    
    stats_per_owner[key]['N donations'] = N_donations
    stats_per_owner[key]['N unique donors'] = len(unique_donors)
      
    stats_per_owner[key]['Current balance USD'] = current_balance_usd
    stats_per_owner[key]['Current balance BTC'] = current_balance_btc / 1e8


In [48]:
df_stats_per_owner = pd.DataFrame(stats_per_owner).T
df_all_donations   = pd.DataFrame(all_donations)
df_all_cash_out    = pd.DataFrame(all_cash_out)

df_all_donations = df_all_donations.set_index('Date')
df_all_cash_out  = df_all_cash_out.set_index('Date')

df_stats_per_owner.to_csv('stats_per_owner.csv', sep =';')
df_all_donations.to_csv('all_donations.csv', sep =';')
df_all_cash_out.to_csv('all_cash_out.csv', sep =';')

In [46]:
correct = []

[12, 321, 5,1,3251] / 10

TypeError: unsupported operand type(s) for /: 'list' and 'int'