In [None]:
import blocksci
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib notebook

In [None]:
# Some utility functions 
def get_balances(cluster=None, heights=None):
    """
    Method to compute the balances of a cluster for a given list of block heights.
    """
    return [(int(height), cluster.balance(int(height))) for height in heights]


def get_lifetime(address):
    """
    Method to get the lifetime of a Bitcoin Address which is the difference between the last 
    time the address released a payment and the first time it received a payment. Returns a tuple
    containing the address value, the first / last block time, first / last block height, and the 
    lifetime.
    """
    address_value = address.address_string
    first_block_time = address.first_tx.block_time
    first_block_height = address.first_tx.block_height
    try:
        last_time_block = address.out_txes()[-1].block_time
    except:
        last_time_block = None
    try:
        last_block_height = address.out_txes()[-1].block_height
    except:
        last_block_height = None
    try:
        life_time = address.out_txes()[-1].block_time - address.first_tx.block_time
    except:
        life_time = None
    return (address_value,
            first_block_time,
            first_block_height,
            last_time_block,
            last_block_height,
            life_time)


def get_lifetimes(addresses):
    """
    Method returning a list of tuples containing the lifetime given a list of Bitcoin Addresses. 
    """
    return [get_lifetime(address) for address in addresses]


def get_payments_df(txes):
    """
    Method to compute the amount of received or spent payments in dollars given a list of 
    input or output transactions. It returns a DataFrame indexed by time with payments in 
    USD and the block height as columns. 
    """
    payments = [(tx.block_height, tx.input_value) for tx in txes]
    df = pd.DataFrame(payments, columns=["height", "payment"])
    return get_dollars_df(df, "payment")


def get_dollars_df(df=None, column_name=None):
    df.index = df["height"]
    converter = blocksci.CurrencyConverter()
    df = chain.heights_to_dates(df)
    df["usd"] = df.apply(lambda x: converter.satoshi_to_currency(x[column_name], chain[x["height"]].time), axis=1)
    df.index.name = "date"
    return df


def is_it_in_cluster(cluster=None, seed=None):
    """
    Method to get the size of a cluster. 
    """
    addresses = [address for address in cluster.addresses]
    return seed in addresses


def get_size(cluster=None, limit=10000):
    """
    Method to get the size of a cluster. This method does not count the number of addresses above 
    a certain limit (default: 10000).
    """    
    addresses = []
    iteration = 0
    for address in cluster.addresses:
        if iteration < limit:
            addresses.append(address)
            iteration += 1
        else:
            print("Too many addresses!")
            break
    return len(addresses)

In [None]:
# parser_data_directory should be set to the data-directory which the blocksci_parser output
chain = blocksci.Blockchain("/home/ubuntu/bitcoin")

In [None]:
# current workaround to disable change address heuristic 
# (https://citp.github.io/BlockSci/reference/heuristics/change.html)
no_change_heuristic = blocksci.heuristics.change.legacy() - blocksci.heuristics.change.legacy() 

In [None]:
# use the ClusterManager to cluster the blockchain using ONLY multi-input
cm_no_change = blocksci.cluster.ClusterManager.create_clustering("/home/ubuntu/bitcoin/clusters/nochange", chain, no_change_heuristic, True)

In [None]:
# Seed Addresses
cryptolocker = ["18iEz617DoDp8CNQUyyrjCcC7XCGDf5SVb", "1KP72fBmh3XBRfuJDMn53APaqM6iMRspCh"]

In [None]:
# Check latest balance and number of addresses
address_1 = chain.address_from_string(cryptolocker[0])
# Get the cluster containing that address
cluster_no_change = cm_no_change.cluster_with_address(address_1)
n_addreses = get_size(cluster_no_change)
print(n_addreses, address_1)       

In [None]:
# Get input payments into the cluster
payments = cluster_no_change.txes()

In [None]:
# Construct a DataFrame of payments in USD 
df_payments = get_payments_df(payments)

In [None]:
# Sum up all the payments to get the volume of transactions
df_payments["usd"].sum()

In [None]:
# Get the list of block heights at which these payments were made
heights = df_payments.sort_values(by="height")["height"].values

In [None]:
# Compute the balances for the given list of block heights
balances = get_balances(cluster=cluster_no_change, heights=heights)

In [None]:
# Get the balance in dollars
df = pd.DataFrame(balances, columns=["height", "balance"])
df = get_dollars_df(df, "balance")

In [None]:
# df.to_csv("/home/ubuntu/cluster-data/cryptolocker_balance_cluster_0.csv")

In [None]:
df["usd"].plot()
plt.xlabel('Date')
plt.ylabel('Balance in US Dollars');