# UTXO Ledgers: Exercise 4 - Inspecting a Bitcoin address

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import json
from pprint import pprint

import graphsense
from graphsense.api import addresses_api, bulk_api, entities_api, general_api

## Notebook Setup

This notebook assumes that you already followed the instructions outlined in `README.md` and installed the [GraphSense Python API](https://github.com/graphsense/graphsense-python) library locally.

First, we must setup our notebook and establish a connection to some hosted GraphSense instance.

We connect to a GraphSense instance hosted by [Iknaio](https://www.ikna.io/) and enter our API key in the provided `config.json` configuration file. An API key will be provided during the tutorial. If you would like to get an API key later, drop an email to contact@iknaio.com

## Load host and API key from config

In [3]:
f = open('config.json')
config = json.load(f)
f.close()
config

{'graphsense': {'host': 'https://api.ikna.io/',
  'api_key': 'PyTlTKIuo9CrTLR6VjlJRB5Tr22prmmI'}}

## Configure GraphSense client

Now we configure the GraphSense Python library

In [4]:
configuration = graphsense.Configuration(
    host = config['graphsense']['host'],
    api_key = {'api_key': config['graphsense']['api_key']})

We can test if our libary is working by retrieving summary statistics on supported ledgers.

In [5]:
with graphsense.ApiClient(configuration) as api_client:
    api_instance = general_api.GeneralApi(api_client)
    api_response = api_instance.get_statistics()
    pprint(api_response)

{'currencies': [{'name': 'btc',
                 'no_address_relations': 6130165038,
                 'no_addresses': 965532532,
                 'no_blocks': 732355,
                 'no_entities': 441173417,
                 'no_labels': 1069,
                 'no_tagged_addresses': 156062908,
                 'no_txs': 726448123,
                 'timestamp': 1650239875},
                {'name': 'bch',
                 'no_address_relations': 2177670078,
                 'no_addresses': 329163172,
                 'no_blocks': 734148,
                 'no_entities': 149786318,
                 'no_labels': 9,
                 'no_tagged_addresses': 307782,
                 'no_txs': 355677567,
                 'timestamp': 1648942357},
                {'name': 'ltc',
                 'no_address_relations': 1204027150,
                 'no_addresses': 131228194,
                 'no_blocks': 2238818,
                 'no_entities': 56664129,
                 'no_labels': 23,
      

# Address API

We start by retrieving some summary statistics on the Internet Archive's donation address. The response basically corresponds to the data shown in the property box of the GraphSense dashboard.

In [6]:
with graphsense.ApiClient(configuration) as api_client:
    api_instance = addresses_api.AddressesApi(api_client)

    currency = "btc"
    address = '1Archive1n2C579dMsAu3iC6tWzuQJz8dN'
    include_tags = True

    try:
        resp_addr = api_instance.get_address(currency, address, include_tags=include_tags)
        pprint(resp_addr)
    except graphsense.ApiException as e:
        print("Exception when calling AddressesApi->get_address: %s\n" % e)

{'address': '1Archive1n2C579dMsAu3iC6tWzuQJz8dN',
 'balance': {'fiat_values': [{'code': 'eur', 'value': 44854.36},
                             {'code': 'usd', 'value': 48496.07}],
             'value': 120219817},
 'entity': 2647117,
 'first_tx': {'height': 256314,
              'timestamp': 1378415426,
              'tx_hash': '04d92601677d62a985310b61a301e74870fa942c8be0648e16b1db23b996a8cd'},
 'in_degree': 6342,
 'last_tx': {'height': 732319,
             'timestamp': 1650222879,
             'tx_hash': '384ed3216b755e315a6097be19453dec02fbccac9195c50b35f21c35bf2af375'},
 'no_incoming_txs': 4805,
 'no_outgoing_txs': 274,
 'out_degree': 293,
 'tags': [{'active': True,
           'address': '1Archive1n2C579dMsAu3iC6tWzuQJz8dN',
           'category': 'organization',
           'currency': 'BTC',
           'is_cluster_definer': True,
           'is_public': True,
           'label': 'internet archive',
           'lastmod': 1636675200,
           'source': 'https://archive.org/donate

In [7]:
print(f"Address {resp_addr.address} received {resp_addr.total_received.fiat_values[0].value} EUR " + 
      f"from {resp_addr.in_degree} addresses.")

Address 1Archive1n2C579dMsAu3iC6tWzuQJz8dN received 2464600.5 EUR from 6342 addresses.


# Entities API

Next, we inspect the corresponding entity, which clusters other addresses that are likely conrolled by the owner of that address. The entity ID is contained in the address response.

In [8]:
with graphsense.ApiClient(configuration) as api_client:
    api_instance = entities_api.EntitiesApi(api_client)

    currency = "btc" 
    entity = resp_addr.entity 
    include_tags = True 

    try:
        resp_entity = api_instance.get_entity(currency, entity, include_tags=include_tags)
        pprint(resp_entity)
    except graphsense.ApiException as e:
        print("Exception when calling EntitiesApi->get_entity: %s\n" % e)

{'balance': {'fiat_values': [{'code': 'eur', 'value': 44854.36},
                             {'code': 'usd', 'value': 48496.07}],
             'value': 120219817},
 'entity': 2647117,
 'first_tx': {'height': 156529,
              'timestamp': 1323298692,
              'tx_hash': 'dc035c562acc3230cec8c870293c1119d62e60b13932565231dbe5c407ff7508'},
 'in_degree': 5138,
 'last_tx': {'height': 732319,
             'timestamp': 1650222879,
             'tx_hash': '384ed3216b755e315a6097be19453dec02fbccac9195c50b35f21c35bf2af375'},
 'no_addresses': 120,
 'no_incoming_txs': 5683,
 'no_outgoing_txs': 287,
 'out_degree': 174,
 'root_address': '17gN64BPHtxi4mEM3qWrxdwhieUvRq8R2r',
 'tags': {'address_tags': [{'active': True,
                            'address': '1Archive1n2C579dMsAu3iC6tWzuQJz8dN',
                            'category': 'organization',
                            'currency': 'BTC',
                            'is_cluster_definer': True,
                            'is_public':

In [9]:
print(f"Entity {resp_entity.entity} likely controls {resp_entity.no_addresses} addresses, " +
      f"which received {resp_entity.total_received.fiat_values[0].value} EUR " + 
      f"from {resp_entity.in_degree} entities.")

Entity 2647117 likely controls 120 addresses, which received 2496009.75 EUR from 5138 entities.


# Entity Neighbors

Finally, we retrieve all neighbor entities receiving funds from our focus entity. Since it is more covenient to work with dataframes, we use the GraphSense Bulk API interface to retrieve a dataframe of receiving neighbor nodes.

In [10]:
import pandas as pd

data = {'entity': [resp_addr.entity]}

entityDF = pd.DataFrame.from_dict(data)

In [11]:
with graphsense.ApiClient(configuration) as api_client:
    api_instance = bulk_api.BulkApi(api_client)

    CURRENCY = "btc"
    operation = "list_entity_neighbors"
    body = {'entity': entityDF['entity'].to_list(),
            'direction': 'out',
            'include_labels': True}

    try:
        respDF = pd.read_csv(
                    api_instance.bulk_csv(CURRENCY, operation, body=body,
                                          num_pages=2,
                                          _preload_content=False))
        pprint(respDF)
    except graphsense.ApiException as e:
        print("Exception when calling BulkApi->bulk_csv: %s\n" % e)

     _error  _info  _request_entity   balance_eur   balance_usd  \
0       NaN    NaN          2647117  5.080910e+08  5.493428e+08   
1       NaN    NaN          2647117  6.571453e+04  7.104988e+04   
2       NaN    NaN          2647117  0.000000e+00  0.000000e+00   
3       NaN    NaN          2647117  1.011325e+07  1.093434e+07   
4       NaN    NaN          2647117  0.000000e+00  0.000000e+00   
..      ...    ...              ...           ...           ...   
169     NaN    NaN          2647117  3.440400e+02  3.719800e+02   
170     NaN    NaN          2647117  1.548500e+02  1.674200e+02   
171     NaN    NaN          2647117  0.000000e+00  0.000000e+00   
172     NaN    NaN          2647117  0.000000e+00  0.000000e+00   
173     NaN    NaN          2647117  0.000000e+00  0.000000e+00   

     balance_value         id      labels  labels_count  no_txs node_type  \
0    1361798644690     109577   epay.info             1       2    entity   
1        176129799    1084055  bitpay.com

We filter the result and see the reltaions between known entity IDs. We also see two known receiving entities.

In [12]:
resultDF = respDF[['id', '_request_entity', 'labels', 'no_txs']] \
    .rename(columns={'id': 'src', '_request_entity': 'dst', 'labels': 'dst_label'})
resultDF

Unnamed: 0,src,dst,dst_label,no_txs
0,109577,2647117,epay.info,2
1,1084055,2647117,bitpay.com,1
2,2818641,2647117,,1
3,4635850,2647117,,76
4,8361735,2647117,,3
...,...,...,...,...
169,763066736,2647117,,1
170,763066737,2647117,,1
171,834908304,2647117,,1
172,849802069,2647117,,1
