In [54]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [55]:
import csv
from datetime import datetime
import os

from pathlib import Path
from pprint import pprint

import pandas as pd
from pandas import DataFrame

import json

import graphsense
from graphsense.api import addresses_api, blocks_api, entities_api, general_api, txs_api, bulk_api

## Establish GS Connection

In [56]:
f = open('config.json')
config = json.load(f)
f.close()

In [57]:
configuration = graphsense.Configuration(
    host = config['graphsense']['host'],
    api_key = {'api_key': config['graphsense']['api_key']})

CURRENCY = 'eth'

## Fetch address details

In [58]:
def fetch_address_details(address):
    with graphsense.ApiClient(configuration) as api_client:
        try:
            api_instance = addresses_api.AddressesApi(api_client)

            address_details = api_instance.get_address(CURRENCY, address)
            return address_details

        except graphsense.ApiException as e:
            print("Exception when calling Bulk Api: %s\n" % e)


# Part 1: Find paths to exchanges

First we try to find paths to any known exchange entities. Luckily the GraphSense REST API provides us with an endpoint specifically made for this task: `search_entity_neighbors`.

In [59]:
# First we set our seed address
seed_address = '0xDAFEA492D9c6733ae3d56b7Ed1ADB60692c98Bc5'

# and fetch address details
seed_address_details = fetch_address_details(seed_address)
seed_address_details

{'address': '0xdafea492d9c6733ae3d56b7ed1adb60692c98bc5',
 'balance': {'fiat_values': [{'code': 'eur', 'value': 1876.08},
                             {'code': 'usd', 'value': 2024.85}],
             'value': 1164353663862197020},
 'currency': 'eth',
 'entity': 230036266,
 'first_tx': {'height': 15569271,
              'timestamp': 1663611287,
              'tx_hash': 'ecf2cf3de196141f67b2207242987714c1a230d4b77797e05709e1b6aafb3912'},
 'in_degree': 31,
 'is_contract': False,
 'last_tx': {'height': 17041745,
             'timestamp': 1681430147,
             'tx_hash': 'c8478e88f104f09cba72800cedc5f11bd612fa6716ef3d8d49a83c262cc2a536'},
 'no_incoming_txs': 87,
 'no_outgoing_txs': 307346,
 'out_degree': 4220,
 'status': 'dirty',
 'token_balances': {'usdt': {'fiat_values': [{'code': 'eur', 'value': 0.0},
                                             {'code': 'usd', 'value': 0.0}],
                             'value': 0}},
 'total_received': {'fiat_values': [{'code': 'eur', 'value': 167.7

In [60]:
# We get the entity of our seed address:
seed_entity = seed_address_details.entity
seed_entity

230036266

Next we pass the entity to `search_entity_neighbors`. This endpoint takes the following parameters:

* `direction`: Whether we want to search incoming or outgoing neighbors
* `key`: The property of the search entity we want to check, here `'category'`.
* `value`: A list of values of the property we want the search entity to match, here `['exchange']`.
* `depth`: How many hops we want to traverse the transaction graph.
* `breadth`: How many neighbors per hop we want to search deeper.
* `skip_num_addresses`: Don't follow neighbors of entities which have more addresses than this.

In [61]:
with graphsense.ApiClient(configuration) as api_client:
    try:
        api_instance = entities_api.EntitiesApi(api_client)
        paths = api_instance.search_entity_neighbors(CURRENCY, seed_entity,
                                                     direction='out', key='category', value=['exchange'],
                                                     depth=4, breadth=20, skip_num_addresses=20)
    except graphsense.ApiException as e:
            print("Exception when calling Entities Api: %s\n" % e)        

As a result we get a recursive data structure with entities as nodes and paths to matching entities. We can iterate through it like this:

In [62]:
print(seed_entity, end='')

def print_path(path):
    print(" -> " + str(path.neighbor.entity.entity), end='')
    if not path.paths:
        print(" (" + path.neighbor.entity.best_address_tag.label + ")")
        return
    for path in path.paths:
        print_path(path)
    
for path in paths:
    print_path(path)

230036266 -> 8009 -> 133000950 ()
 -> 135155 -> 120500089 ()
 -> 135155 -> 154748397 ()
 -> 137060 -> 27407253 ()
 -> 151375 -> 33268876 ()
 -> 153683 -> 20190853 ()
 -> 173309 -> 20190853 ()
 -> 66 -> 139558190 -> 24301750 (binance)
 -> 66 -> 139558190 -> 170897662 ()
 -> 66 -> 157551485 -> 24301750 (binance)
 -> 66 -> 157551485 -> 170897662 ()
 -> 66 -> 158553255 -> 24301750 (binance)
 -> 66 -> 158553255 -> 170897662 ()
 -> 66 -> 161912794 -> 24301750 (binance)
 -> 66 -> 161912794 -> 170897662 ()
 -> 66 -> 164915493 -> 24301750 (binance)
 -> 66 -> 164915493 -> 170897662 ()
 -> 1495 -> 164887081 -> 24301750 (binance)
 -> 6836 -> 108719 -> 9844 ()
 -> 9154 -> 63300464 -> 9844 ()
 -> 9154 -> 63300464 -> 11020 ()
 -> 113797 -> 100495671 -> 21226897 (huobi reserve wallets ht)
 -> 113797 -> 212124914 -> 96572478 ()
 -> 113797 -> 212124914 -> 109888545 (okx erc20 reserves)
 -> 113797 -> 218262852 -> 96572478 ()
 -> 113797 -> 218262852 -> 109888545 (okx erc20 reserves)
 -> 135155 -> 134220 -

# Part2: Find paths between two addresses

In this example we try to find paths between two given addresses, the `seed_address` and the `target_address`. In order to achieve this we implement a "Breadth-First Search" (BFS). Starting from the `seed_address`, this algorithm retrieves address neighbors layer by layer and checks for addresses matching our `target_address`.

In [63]:
# This is our target address:
target_address = '0xDe12C3d2257fc9bB1c1A00d409f292eecD55fFaF'

In [64]:
# The Breadth-First Search algorithm:

def bfs(seed_address,
        target_addresses = [],
        target_entities = [],
        max_depth = 3, max_outdegree = 10, verbose = False):
    
    # collect matching paths
    matching_paths = []
    
    # record visited addresses and entities
    visited_entities = []
    
    # maintain a queue of paths
    queue = []
    
    # push the first path into the queue
    queue.append([seed_address])
    
    # count number of requests
    no_requests = 0
    
    while(queue):

        # get first path from the queue
        path = queue.pop(0)
        print(f"No requests: {no_requests}, " +
              f"Queue size: {len(queue)}, " +
              f"path length: {len(path)}, " +
              f"seed address {seed_address}", end='\r')
        
        # get the last node from the path
        addr = path[-1]

        # retrieve address neighbors
        df_neighbors = get_addr_neighbors(addr)
        no_requests = no_requests + 1

        # continue with neighbors out_degree < max_outdegree
        for index, neighbor in df_neighbors.iterrows():

            new_path = list(path)
            new_path.append(neighbor['address'])

            # found path (address match)
            if(neighbor['address'] in target_addresses):
                print(new_path, end=' ')
                print("MATCH | address")
                matching_paths.append(new_path)
                continue

            # found path (entity match)
            if(neighbor['entity'] in target_entities):
                print(new_path, end=' ')
                print("MATCH | entity")
                matching_paths.append(new_path)
                continue
                                
            # stop if max depth is reached
            if len(new_path) == max_depth:
                if verbose:
                    print(new_path, end=' ') 
                    print("STOP | max depth")
                continue
            
            # stop if entity has already been visited
            if(neighbor['entity'] in visited_entities):
                if verbose:
                    print(new_path, end=' ') 
                    print("STOP | same entity")
                continue

            # stop if address out_degree exceeds threshold
            if(neighbor['out_degree'] > max_outdegree):
                if verbose:
                    print(new_path, end=' ') 
                    print("STOP | max outdegree")
                continue

            queue.append(new_path)
            visited_entities.append(neighbor['entity'])
                
        if len(queue) == 0:
            return matching_paths

In [65]:
# utility function to retrieve neighbors of an address:

def get_addr_neighbors(address):
    
    with graphsense.ApiClient(configuration) as api_client:
        try:
            api_instance = bulk_api.BulkApi(api_client)
            print(f"get_addr_neighbors of {address}")
            operation = "list_address_neighbors"
            body = {'address': [address], 'direction': 'out'}

            df_address_neighbors = pd.read_csv(api_instance.bulk_csv(CURRENCY, operation, body=body,
                                                                   num_pages=1, _preload_content=False))
            
            df_address_neighbors = df_address_neighbors \
                .loc[(df_address_neighbors['_error'] != 'not found') &
                     (df_address_neighbors['_info'] != 'no data')].reset_index(drop=True)
            if df_address_neighbors.empty:
                df_address_neighbors.columns = ['address', 'entity', 'out_degree']
                return df_address_neighbors

            
            df_address_neighbors = df_address_neighbors \
                .rename(columns={'address_address': 'address', 
                                 'address_entity': 'entity',
                                 'address_out_degree': 'out_degree'}) \

            return df_address_neighbors[['address', 'entity', 'out_degree']]
            
        except graphsense.ApiException as e:
            print("Exception when calling Bulk Api: %s\n" % e)

In [66]:
# Find paths!

paths = bfs(seed_address,
            [target_address],
            [],
            7, 20, False)

get_addr_neighbors of 0xDAFEA492D9c6733ae3d56b7Ed1ADB60692c98Bc5FEA492D9c6733ae3d56b7Ed1ADB60692c98Bc5
get_addr_neighbors of 0x1f3da68fe87eaf43a829ab6d7ec5a6e009b204fbAFEA492D9c6733ae3d56b7Ed1ADB60692c98Bc5
get_addr_neighbors of 0x44e2fdc679e6bee01e93ef4a3ab1bcce012abc7cAFEA492D9c6733ae3d56b7Ed1ADB60692c98Bc5
get_addr_neighbors of 0x65ee20b06d9ad589a7e7ce04b9f5f795f402aeceDAFEA492D9c6733ae3d56b7Ed1ADB60692c98Bc5
get_addr_neighbors of 0xcfe2caaf3cec97061d0939748739bffe684ae91fDAFEA492D9c6733ae3d56b7Ed1ADB60692c98Bc5
get_addr_neighbors of 0xec73833de4b810bb027810fc8f69f544e83c12d1DAFEA492D9c6733ae3d56b7Ed1ADB60692c98Bc5
get_addr_neighbors of nane: 124, path length: 2, seed address 0xDAFEA492D9c6733ae3d56b7Ed1ADB60692c98Bc5


KeyError: "None of [Index(['address', 'entity', 'out_degree'], dtype='object')] are in the [columns]"

In [None]:
for path in paths:
    print('Found path: ' + ' '.join(path))

To visualize a path, copy and paste it into the search box of Iknaio's dashboard at https://app.ikna.io.