In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import csv
from datetime import datetime
import os

from pathlib import Path
from pprint import pprint

import pandas as pd
from pandas import DataFrame

import json

import graphsense
from graphsense.api import addresses_api, blocks_api, entities_api, general_api, txs_api, bulk_api

## Load host and API key from config


In [3]:
f = open('config.json')
config = json.load(f)
f.close()

## Configure GraphSense client


In [4]:
configuration = graphsense.Configuration(
    host = config['graphsense']['host'],
    api_key = {'api_key': config['graphsense']['api_key']})

CURRENCY = 'btc'

## Fetch address details

In [5]:
def fetch_address_details(address):
    with graphsense.ApiClient(configuration) as api_client:
        try:
            api_instance = addresses_api.AddressesApi(api_client)

            address_details = api_instance.get_address(CURRENCY, address)
            return address_details

        except graphsense.ApiException as e:
            print("Exception when calling Bulk Api: %s\n" % e)


# Part 1: Find paths to exchanges

First we try to find paths to any known exchange entities. Luckily the GraphSense REST API provides us with an endpoint specifically made for this task: `search_entity_neighbors`.

In [6]:
# First we set our seed address
seed_address = '1BswBQiQrUMk3efFdTFZfnsgodB7xjrcGc'

# and fetch address details
seed_address_details = fetch_address_details(seed_address)
seed_address_details

{'address': '1BswBQiQrUMk3efFdTFZfnsgodB7xjrcGc',
 'balance': {'fiat_values': [{'code': 'eur', 'value': 0.0},
                             {'code': 'usd', 'value': 0.0}],
             'value': 0},
 'currency': 'btc',
 'entity': 435755628,
 'first_tx': {'height': 547167,
              'timestamp': 1540406274,
              'tx_hash': 'e2606176793515d476ac63f63ac0ec8cc75af46ca4c5a466695122ad50462364'},
 'in_degree': 1,
 'last_tx': {'height': 576808,
             'timestamp': 1558296707,
             'tx_hash': 'b09aa1531c41dac74df2bf383955ea767954f6086a0be116efb9d5394d096408'},
 'no_incoming_txs': 1,
 'no_outgoing_txs': 1,
 'out_degree': 1,
 'status': 'clean',
 'total_received': {'fiat_values': [{'code': 'eur', 'value': 0.35},
                                    {'code': 'usd', 'value': 0.4}],
                    'value': 6156},
 'total_spent': {'fiat_values': [{'code': 'eur', 'value': 0.45},
                                 {'code': 'usd', 'value': 0.5}],
                 'value': 6156}

In [7]:
# We get the entity of our seed address:
seed_entity = seed_address_details.entity
seed_entity

435755628

Next we pass the entity to `search_entity_neighbors`. This endpoint takes the following parameters:

* `direction`: Whether we want to search incoming or outgoing neighbors
* `key`: The property of the search entity we want to check, here `'category'`.
* `value`: A list of values of the property we want the search entity to match, here `['exchange']`.
* `depth`: How many hops we want to traverse the transaction graph.
* `breadth`: How many neighbors per hop we want to search deeper.
* `skip_num_addresses`: Don't follow neighbors of entities which have more addresses than this.

In [15]:
with graphsense.ApiClient(configuration) as api_client:
    try:
        api_instance = entities_api.EntitiesApi(api_client)
        paths = api_instance.search_entity_neighbors(CURRENCY, seed_entity,
                                                     direction='out', key='category', value=['exchange'],
                                                     depth=4, breadth=20, skip_num_addresses=20)
    except graphsense.ApiException as e:
            print("Exception when calling Entities Api: %s\n" % e)        

As a result we get a recursive data structure with entities as nodes and paths to matching entities. We can iterate through it like this:

In [9]:
print(seed_entity, end='')

def print_path(path):
    print(" -> " + str(path.neighbor.entity.entity), end='')
    if not path.paths:
        print(" (" + path.neighbor.entity.best_address_tag.label + ")")
        return
    for path in path.paths:
        print_path(path)
    
for path in paths:
    print_path(path)

435755628 -> 512947546 -> 513098613 -> 513170593 -> 134568894 (paxful deposit wallet)
 -> 512947546 -> 513098613 -> 513170593 -> 487558635 (localbitcoin hot wallet)
 -> 512947546 -> 513098613 -> 513170594 -> 4635851 (coinbase.com)


# Part 2: Find paths between two addresses

In this example we try to find paths between two given addresses, the `seed_address` and the `target_address`. In order to achieve this we implement a "Breadth-First Search" (BFS). Starting from the `seed_address`, this algorithm retrieves address neighbors layer by layer and checks for addresses matching our `target_address`.

In [10]:
# This is our target address:
target_address = '33qpZENhgeX3huCzVGDD5NNKr9rK5NcMD9'

In [11]:
# The Breadth-First Search algorithm:

def bfs(seed_address,
        target_addresses = [],
        target_entities = [],
        max_depth = 3, max_outdegree = 10, verbose = False):
    
    # collect matching paths
    matching_paths = []
    
    # record visited addresses and entities
    visited_entities = []
    
    # maintain a queue of paths
    queue = []
    
    # push the first path into the queue
    queue.append([seed_address])
    
    # count number of requests
    no_requests = 0
    
    while(queue):

        # get first path from the queue
        path = queue.pop(0)
        print(f"No requests: {no_requests}, " +
              f"Queue size: {len(queue)}, " +
              f"path length: {len(path)}, " +
              f"seed address {seed_address}", end='\r')
        
        # get the last node from the path
        addr = path[-1]

        # retrieve address neighbors
        df_neighbors = get_addr_neighbors(addr)
        no_requests = no_requests + 1

        # continue with neighbors out_degree < max_outdegree
        for index, neighbor in df_neighbors.iterrows():

            new_path = list(path)
            new_path.append(neighbor['address'])

            # found path (address match)
            if(neighbor['address'] in target_addresses):
                print(new_path, end=' ')
                print("MATCH | address")
                matching_paths.append(new_path)
                continue

            # found path (entity match)
            if(neighbor['entity'] in target_entities):
                print(new_path, end=' ')
                print("MATCH | entity")
                matching_paths.append(new_path)
                continue
                                
            # stop if max depth is reached
            if len(new_path) == max_depth:
                if verbose:
                    print(new_path, end=' ') 
                    print("STOP | max depth")
                continue
            
            # stop if entity has already been visited
            if(neighbor['entity'] in visited_entities):
                if verbose:
                    print(new_path, end=' ') 
                    print("STOP | same entity")
                continue

            # stop if address out_degree exceeds threshold
            if(neighbor['out_degree'] > max_outdegree):
                if verbose:
                    print(new_path, end=' ') 
                    print("STOP | max outdegree")
                continue

            queue.append(new_path)
            visited_entities.append(neighbor['entity'])
                
        if len(queue) == 0:
            return matching_paths

In [12]:
# utility function to retrieve neighbors of an address:

def get_addr_neighbors(address):
    
    with graphsense.ApiClient(configuration) as api_client:
        try:
            api_instance = bulk_api.BulkApi(api_client)
            print(f"get_addr_neighbors of {address}")
            operation = "list_address_neighbors"
            body = {'address': [address], 'direction': 'out'}

            df_address_neighbors = pd.read_csv(api_instance.bulk_csv(CURRENCY, operation, body=body,
                                                                   num_pages=1, _preload_content=False))
            
            df_address_neighbors = df_address_neighbors \
                .loc[(df_address_neighbors['_error'] != 'not found') &
                     (df_address_neighbors['_info'] != 'no data')].reset_index(drop=True)
            if df_address_neighbors.empty:
                df_address_neighbors.columns = ['address', 'entity', 'out_degree']
                return df_address_neighbors

            
            df_address_neighbors = df_address_neighbors \
                .rename(columns={'address_address': 'address', 
                                 'address_entity': 'entity',
                                 'address_out_degree': 'out_degree'}) \

            return df_address_neighbors[['address', 'entity', 'out_degree']]
            
        except graphsense.ApiException as e:
            print("Exception when calling Bulk Api: %s\n" % e)

In [13]:
# Find paths!

paths = bfs(seed_address,
            [target_address],
            [],
            7, 20, False)

get_addr_neighbors of 1BswBQiQrUMk3efFdTFZfnsgodB7xjrcGcess 1BswBQiQrUMk3efFdTFZfnsgodB7xjrcGc
get_addr_neighbors of bc1qhlsngxttkqa2pl6ahjjn8h4qhgut9ajetrddq7BQiQrUMk3efFdTFZfnsgodB7xjrcGc
get_addr_neighbors of 1CPXbR6axkHXxNhWrtouRcu13cN77ccZDZess 1BswBQiQrUMk3efFdTFZfnsgodB7xjrcGc
get_addr_neighbors of bc1qj2a6a7xqnf5pkc3spjnhm224rem4mxvphu8eeaBQiQrUMk3efFdTFZfnsgodB7xjrcGc
get_addr_neighbors of 3Bjo93bKcNk5nL1vNPr4hp7cWfGd77Hhv7ess 1BswBQiQrUMk3efFdTFZfnsgodB7xjrcGc
get_addr_neighbors of 34SBebUu7FjqHAGP9LiPjjtZbs5cFzisHGess 1BswBQiQrUMk3efFdTFZfnsgodB7xjrcGc
get_addr_neighbors of bc1qru49trsw24yqkrc4yh7kgu8u5crta06zlhdvrrBQiQrUMk3efFdTFZfnsgodB7xjrcGc
get_addr_neighbors of bc1qkwuutl6lk9q9j38qhnrzk35rqqnjhmazgugy0vBQiQrUMk3efFdTFZfnsgodB7xjrcGc
get_addr_neighbors of 39HSKGYcb3VguLbd7f8TBd6Nd3oAKeKcjJess 1BswBQiQrUMk3efFdTFZfnsgodB7xjrcGc
get_addr_neighbors of 17yZaZjE4jQzygSbcgPEKMZxqagWNk7rBKess 1BswBQiQrUMk3efFdTFZfnsgodB7xjrcGc
get_addr_neighbors of bc1qa0lejnyttw5zahr9ytlagd0y

In [14]:
for path in paths:
    print('Found path: ' + ' '.join(path))

Found path: 1BswBQiQrUMk3efFdTFZfnsgodB7xjrcGc bc1qhlsngxttkqa2pl6ahjjn8h4qhgut9ajetrddq7 bc1qkwuutl6lk9q9j38qhnrzk35rqqnjhmazgugy0v bc1q78yrdssz4tj0tr7xx5sku5vxjr7wj3gud43ahv 33qpZENhgeX3huCzVGDD5NNKr9rK5NcMD9


To visualize a path, copy and paste it into the search box of Iknaio's dashboard at https://app.ikna.io.