# Exercise 5 - Automatically find Exchange links for DWM Query

In this demo we will see how to combine DWM and Iknaio to automatically find connections to exchanges given a bunch of crypto addresses mentioned some darkweb context

## Preparations

First, we install the graphsense-python package and define an API-key. An API-key for the [GraphSense](https://graphsense.github.io/) instance hosted by [Iknaio](https://www.ikna.io/) can be requested by sending an email to [contact@iknaio.com](contact@iknaio.com).

In [33]:
!pip install graphsense-python seaborn

import graphsense
from graphsense.api import bulk_api, general_api

import json
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt



In [34]:
# load config.json
with open('../config.json') as f:
    config = json.load(f)


configuration = graphsense.Configuration(
    host = "https://api.ikna.io/",
    api_key = {
        'api_key': config['graphsense']['api_key']
    }
)

GraphSense supports Bitcoin-like UTXO and Ethereum-like Account-Model ledgers. Iknaio currently hosts BTC, LTC, BCH, ZEC, and ETH.

We are investigating Bitcoin transactions, therefore we set the default currency to Bitcoin **BTC**.

In [35]:
CURRENCY = 'btc'

We can test whether or client works, by checking what data the GraphSense endpoint provides

In [36]:
with graphsense.ApiClient(configuration) as api_client:
    api_instance = general_api.GeneralApi(api_client)
    api_response = api_instance.get_statistics()
    display(api_response)

{'currencies': [{'name': 'btc',
                 'no_address_relations': 10702661835,
                 'no_addresses': 1362717773,
                 'no_blocks': 879053,
                 'no_entities': 619014618,
                 'no_labels': 28866,
                 'no_tagged_addresses': 313255850,
                 'no_txs': 1143063996,
                 'timestamp': 1736752998},
                {'name': 'bch',
                 'no_address_relations': 2815733259,
                 'no_addresses': 357464141,
                 'no_blocks': 880818,
                 'no_entities': 166550051,
                 'no_labels': 436,
                 'no_tagged_addresses': 15707850,
                 'no_txs': 405436925,
                 'timestamp': 1736738682},
                {'name': 'ltc',
                 'no_address_relations': 2218283863,
                 'no_addresses': 321713100,
                 'no_blocks': 2826418,
                 'no_entities': 164551776,
                 'no_labels': 5

# 1. Load Starting Addresses from DWM


In [37]:
addresses = pd.read_csv(
    'https://raw.githubusercontent.com/iknaio/iknaio-api-tutorial/main/data/sextortion_addresses.csv',
    header=None,
    names=["address"]
)
addresses

Unnamed: 0,address
0,1JwRp2J8bQcoG8XTUbxQZaEj9QB4RB6zEa
1,1EZS92K4xJbymDLwG4F7PNF5idPE62e9XY
2,16B4HuSAJ4WRdCq7dzA5b4ASh6QQ7ytZWB
3,1EdX5vtBiHGmkqbJc7VRSuVMx9Kpgh53Tp
4,3Ch7RPfwkJ3wHhiBfA4CNc8SagGdjbZwVs
...,...
240,1BC1pvPUQF9QHg73ha4AEAhaoEvg6HmTbS
241,13QKq8RsvbJnLRbi5ZcVX1ziYW83tqvp1q
242,1DiEqE5R1Ktu7QCLUuJN31PNtpoBU41x2E
243,1NWybUp8ZJXKyDg2DR5MaePspforMPYbM3


# Q1. How many of the addresses are used?

Instead of querying each address individually, we just pass the dataframe of known sextortion payment addresses.

In [38]:
with graphsense.ApiClient(configuration) as clnt:
    blkapi = bulk_api.BulkApi(clnt)

    # documentation about available bulk operations can be found
    # here https://api.ikna.io/#/bulk/bulk_csv
    rcsv = blkapi.bulk_csv(
                CURRENCY,
                operation="get_address",
                body={
                    'address': addresses['address'].to_list()
                },
                num_pages=1,
                _preload_content=False
              )
    respAddrDF = pd.read_csv(rcsv)
respAddrDF[["address", "balance_eur", "total_received_eur", "total_spent_eur", "in_degree", "out_degree", "no_incoming_txs", "no_outgoing_txs", "first_tx_height", "last_tx_height"]]

Unnamed: 0,address,balance_eur,total_received_eur,total_spent_eur,in_degree,out_degree,no_incoming_txs,no_outgoing_txs,first_tx_height,last_tx_height
0,14JLSAk9TKR6hVFswC6oRPoeAcydSecLFX,1.0,3898.19,3916.81,13,10,11,5,559043,574559
1,196FGBPW7JzPNCNjRZLtvpn3s6Ugppgdt1,0.0,904.93,1043.82,2,1,1,1,561952,566904
2,1CnQxRTP5WeGmwaSriYdefDEHyjWpY7tjw,0.0,772.30,798.33,3,4,3,3,556871,557764
3,1FCGhC7ncVgfepxzaAzq8Qdq2ypjvfYHhF,0.0,661.63,661.63,1,2,1,1,562961,563038
4,1e8o68StxCFLr6wdwKBrBqMQZc1VbFVMk,0.0,909.67,909.67,3,2,3,2,555912,557770
...,...,...,...,...,...,...,...,...,...,...
240,19rvCcYfSwPUSvJJKNyTyRFi5vxt6zaqJC,0.0,4437.93,4004.67,15,1,9,1,557341,558630
241,1JRCbCH9E3iLhSXPTqtkgfAsJNT2xD74C5,0.0,1496.23,1442.25,86,2,8,2,552351,552832
242,1PjxMdF22GCcTKSTaky7ema3F5V4kjRxam,0.0,746.67,720.09,4,3,4,2,559627,562563
243,17viZFKw1Xn8WQcpC6GwLqjzLTcE7qBJ93,0.0,1419.97,1419.97,3,3,3,3,554410,555462


In [39]:
print(f"{len(respAddrDF)} addresses received {sum(respAddrDF['total_received_usd']):.2f} USD")

245 addresses received 886359.19 USD


# Q2: Are there direct links to exchanges?

In [64]:
import requests
import time
from tqdm import tqdm

seconds = 40
address = "15sJ8z2VXR6T7spiDj2W7UJbjJhqWpy33W" # todo
max_depth = 30
max_breadth = 100


header = {
    "cookie": f"remember_prod={config['graphsense']['session']}"
}

def get_QL_results(address: str) -> dict:

    def get_task_id(address: str) -> str:
        rq = f"https://api.ikna.io/quicklock/follow_flows_to_exchange/{CURRENCY}?perpetrator_address={address}&max_search_depth={max_depth}&max_search_breadth={max_breadth}&search_time_seconds={seconds}"

        response = requests.get(rq, headers=header)
        response.json()
        return response.json()['task_id']

    def get_data(task_id):

        response_got = False
        while not response_got:
            req = f"https://api.ikna.io/quicklock/get_task_state/{task_id}?include_path_details=false"
            response = requests.get(req, headers=header)
            response_json = response.json()
            if response_json['state'] in ["done", "timeout"]:
                response_got = True
            else:
                time.sleep(2)

        rq_get_result = f"https://api.ikna.io/quicklock/get_task_state/{task_id}?include_path_details=true"

        response = requests.get(rq_get_result, headers=header)
        result = response.json()
        results = result['results']
        data = {
            "address": address,
            "pct_traced_to_exchange": results['pct_traced_to_exchange'],
            "nr_pathes_found": results['nr_pathes_found']
        }
        return data

    task_id = get_task_id(address)
    return get_data(task_id)

address_list = addresses['address'].to_list()[:10]

results_list = []
for address in tqdm(address_list, desc="Processing addresses"):
    results_list.append(get_QL_results(address))

df_ql = pd.DataFrame(results_list)
df_ql

Processing addresses: 100%|██████████| 10/10 [00:41<00:00,  4.13s/it]


Unnamed: 0,address,pct_traced_to_exchange,nr_pathes_found
0,1JwRp2J8bQcoG8XTUbxQZaEj9QB4RB6zEa,3.109556,9
1,1EZS92K4xJbymDLwG4F7PNF5idPE62e9XY,0.0,0
2,16B4HuSAJ4WRdCq7dzA5b4ASh6QQ7ytZWB,0.0,0
3,1EdX5vtBiHGmkqbJc7VRSuVMx9Kpgh53Tp,45.495085,19
4,3Ch7RPfwkJ3wHhiBfA4CNc8SagGdjbZwVs,0.0,0
5,19GqTJDhu7A1qg7rnK3KS7tmCkCTMTz6xD,0.013151,2
6,1NMRCQMfhfVyAyuEubdfneE2H458Njog3v,0.0,0
7,16XhmM7nPvR15eFdmVJs4QWcWpnYVS6FTv,0.0,0
8,1EyXwmxKd74HeyqbZbmXJsNxmfpiPeAF3F,0.0,0
9,1NPy1TBRyk6vMeGG3e5GaJWxYa9HbsNtDm,21.851681,1


https://api.ikna.io/quicklock/get_task_state/e6a3fa4d1a064c6966ae8fbc16ba30a7?include_path_details=false
https://api.ikna.io/quicklock/get_task_state/e6a3fa4d1a064c6966ae8fbc16ba30a7?include_path_details=false
https://api.ikna.io/quicklock/get_task_state/e6a3fa4d1a064c6966ae8fbc16ba30a7?include_path_details=false
https://api.ikna.io/quicklock/get_task_state/e6a3fa4d1a064c6966ae8fbc16ba30a7?include_path_details=false
{'address': '15sJ8z2VXR6T7spiDj2W7UJbjJhqWpy33W', 'pct_traced_to_exchange': 53.20496806360333, 'nr_pathes_found': 107}


107

# Q3: Can i link more addresses to our seed addresses?

We now fetch summary statistics for each entity.

In [43]:
with graphsense.ApiClient(configuration) as clnt:
  blkapi = bulk_api.BulkApi(api_client)
  rcsv = blkapi.bulk_csv(
                                 CURRENCY,
                                 operation = "get_entity",
                                 body={
                                     'entity': respAddrDF['entity'].drop_duplicates().to_list()
                                     },
                                 num_pages=1,
                                 _preload_content=False
                                 )
  respEntityDF = pd.read_csv(rcsv)

respEntityDF[
    ["best_address_tag_label",
     "root_address",
     "no_addresses",
     "balance_eur",
     "total_received_eur",
     "total_spent_eur",
     "in_degree",
     "out_degree",
     "no_incoming_txs",
     "no_outgoing_txs",
     "first_tx_height",
     "last_tx_height"]
     ]

Unnamed: 0,best_address_tag_label,root_address,no_addresses,balance_eur,total_received_eur,total_spent_eur,in_degree,out_degree,no_incoming_txs,no_outgoing_txs,first_tx_height,last_tx_height
0,Sextortion Spam,17YQspbjPuCR65TUtEDqsc37qVZKR7zuBJ,2,0.00,1.28,1.28,1,2,1,1,557906,557906
1,Sextortion Spam,12L7czMjP1P9Sd35of8jRsbgVcLmy22LQ2,4,0.00,1588.48,1727.12,4,1,4,1,558400,564600
2,Sextortion Spam,12VA3fnjCkJBYwgxffD138xXqFNYTWwV5w,2,0.00,1.31,1.32,2,1,2,1,557906,557906
3,Sextortion Spam,1BzkoGfrLtL59ZGjhKfvBwy47DEb6oba5f,22,3.04,160123.50,158363.14,255,14,299,13,548022,566798
4,Sextortion Spam,17cehgZvgasN7gi7wUHBcLqtRuJmyYpJti,4,0.00,7440.59,7518.55,16,10,16,5,545648,559739
...,...,...,...,...,...,...,...,...,...,...,...,...
91,Sextortion Spam,1GR7rJfntdcbfhKT1s33RDby4z5ex1ou4Z,4,0.00,12208.39,11790.50,24,4,24,4,551269,553564
92,Sextortion Spam,1DVU5Q2HQ4srFNSSaWBrVNMtL4pvBkfP5w,19,4352.46,99749.96,96475.85,205,15,250,14,547232,762446
93,Sextortion Spam,16e6KPCPGy5RNAmsBFKKChe91PcjgLDN6o,1,0.00,5958.79,5960.50,10,8,10,4,543361,569578
94,Sextortion Spam,1EGMpZV7AnKKpNK9C1ziPMaQqNc9zVT4C,38,0.00,25797.67,16432.64,45,4,47,4,547938,560647
