# Exercise 5 - Automatically find Exchange links for DWM Query

In this demo we will see how to combine DWM and Iknaio to automatically find connections to exchanges given a bunch of crypto addresses mentioned some darkweb context

## Preparations

First, we install the graphsense-python package and define an API-key. An API-key for the [GraphSense](https://graphsense.github.io/) instance hosted by [Iknaio](https://www.ikna.io/) can be requested by sending an email to [contact@iknaio.com](contact@iknaio.com).

In [124]:
!pip install graphsense-python seaborn

import graphsense
from graphsense.api import bulk_api, general_api

import json
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt



In [125]:
# load config.json
with open('../config.json') as f:
    config = json.load(f)


configuration = graphsense.Configuration(
    host = "https://api.ikna.io/",
    api_key = {
        'api_key': config['graphsense']['api_key']
    }
)

GraphSense supports Bitcoin-like UTXO and Ethereum-like Account-Model ledgers. Iknaio currently hosts BTC, LTC, BCH, ZEC, and ETH.

We are investigating Bitcoin transactions, therefore we set the default currency to Bitcoin **BTC**.

In [126]:
CURRENCY = 'btc'

We can test whether or client works, by checking what data the GraphSense endpoint provides

In [127]:
with graphsense.ApiClient(configuration) as api_client:
    api_instance = general_api.GeneralApi(api_client)
    api_response = api_instance.get_statistics()
    display(api_response)

{'currencies': [{'name': 'btc',
                 'no_address_relations': 10702744647,
                 'no_addresses': 1362721808,
                 'no_blocks': 879056,
                 'no_entities': 619018653,
                 'no_labels': 28866,
                 'no_tagged_addresses': 313255850,
                 'no_txs': 1143075248,
                 'timestamp': 1736754212},
                {'name': 'bch',
                 'no_address_relations': 2815811387,
                 'no_addresses': 357465240,
                 'no_blocks': 880823,
                 'no_entities': 166551150,
                 'no_labels': 436,
                 'no_tagged_addresses': 15707850,
                 'no_txs': 405437430,
                 'timestamp': 1736742856},
                {'name': 'ltc',
                 'no_address_relations': 2218283863,
                 'no_addresses': 321713100,
                 'no_blocks': 2826418,
                 'no_entities': 164551776,
                 'no_labels': 5

# 1. Load Starting Addresses from DWM


In [128]:
addresses = pd.read_csv(
    'https://raw.githubusercontent.com/iknaio/iknaio-api-tutorial/main/data/sextortion_addresses.csv',
    header=None,
    names=["address"]
)
addresses

Unnamed: 0,address
0,1JwRp2J8bQcoG8XTUbxQZaEj9QB4RB6zEa
1,1EZS92K4xJbymDLwG4F7PNF5idPE62e9XY
2,16B4HuSAJ4WRdCq7dzA5b4ASh6QQ7ytZWB
3,1EdX5vtBiHGmkqbJc7VRSuVMx9Kpgh53Tp
4,3Ch7RPfwkJ3wHhiBfA4CNc8SagGdjbZwVs
...,...
240,1BC1pvPUQF9QHg73ha4AEAhaoEvg6HmTbS
241,13QKq8RsvbJnLRbi5ZcVX1ziYW83tqvp1q
242,1DiEqE5R1Ktu7QCLUuJN31PNtpoBU41x2E
243,1NWybUp8ZJXKyDg2DR5MaePspforMPYbM3


# Q1. How many of the addresses are used?

Instead of querying each address individually, we just pass the dataframe of known sextortion payment addresses.

In [129]:
with graphsense.ApiClient(configuration) as clnt:
    blkapi = bulk_api.BulkApi(clnt)

    # documentation about available bulk operations can be found
    # here https://api.ikna.io/#/bulk/bulk_csv
    rcsv = blkapi.bulk_csv(
                CURRENCY,
                operation="get_address",
                body={
                    'address': addresses['address'].to_list()
                },
                num_pages=1,
                _preload_content=False
              )
    respAddrDF = pd.read_csv(rcsv)
respAddrDF[["address", "balance_eur", "total_received_eur", "total_spent_eur", "in_degree", "out_degree", "no_incoming_txs", "no_outgoing_txs", "first_tx_height", "last_tx_height"]]

Unnamed: 0,address,balance_eur,total_received_eur,total_spent_eur,in_degree,out_degree,no_incoming_txs,no_outgoing_txs,first_tx_height,last_tx_height
0,1PWkGaj53fhxJH4mmB5iF6MuF4MysKDKuu,0.0,713.46,690.21,3,4,3,2,552687,553303
1,1PL9ewB1y3iC7EyuePDoPxJjwC4CgAvWTo,0.0,14249.31,13982.02,42,6,26,5,547521,551903
2,1HQ7wGdA5G9qUtM8jyDt5obDv1x3vEvjCy,0.0,20056.30,20221.39,46,4,39,3,547591,549762
3,1NWybUp8ZJXKyDg2DR5MaePspforMPYbM3,0.0,5503.60,5525.39,29,6,24,4,552796,553832
4,192CdbpYmpQhbpSZy5J9qyNE3YCxPpxdxv,0.0,0.86,0.87,1,2,1,1,543949,544640
...,...,...,...,...,...,...,...,...,...,...
240,17viZFKw1Xn8WQcpC6GwLqjzLTcE7qBJ93,0.0,1419.97,1419.97,3,3,3,3,554410,555462
241,1A6fQU7wLzwXdvR2nsS4YtbEBKyNim82pq,0.0,777.48,586.61,2,1,2,1,549991,554513
242,18YDAf11psBJSavARQCwysE7E89zSEMfGG,0.0,3641.37,3496.77,7,3,5,2,551191,552832
243,1MuQXHNBcAbYyMvMsvHfnXdymeuoLAK14Z,0.0,2092.60,2044.14,45,4,12,2,541403,542974


In [130]:
print(f"{len(respAddrDF)} addresses received {sum(respAddrDF['total_received_usd']):.2f} USD")

245 addresses received 886359.19 USD


# Q2: Are there direct links to exchanges?

In [131]:
import requests
import time
from tqdm import tqdm

seconds = 40
address = "15sJ8z2VXR6T7spiDj2W7UJbjJhqWpy33W" # todo
max_depth = 30
max_breadth = 100


header = {
    "cookie": f"remember_prod={config['graphsense']['session']}"
}

def get_QL_results(address: str) -> dict:

    def get_task_id(address: str) -> str:
        rq = f"https://api.ikna.io/quicklock/follow_flows_to_exchange/{CURRENCY}?perpetrator_address={address}&max_search_depth={max_depth}&max_search_breadth={max_breadth}&search_time_seconds={seconds}"

        response = requests.get(rq, headers=header)
        response.json()
        return response.json()['task_id']

    def get_data(task_id):

        response_got = False
        while not response_got:
            req = f"https://api.ikna.io/quicklock/get_task_state/{task_id}?include_path_details=false"
            response = requests.get(req, headers=header)
            response_json = response.json()
            if response_json['state'] in ["done", "timeout"]:
                response_got = True
            else:
                time.sleep(2)

        rq_get_result = f"https://api.ikna.io/quicklock/get_task_state/{task_id}?include_path_details=true"

        response = requests.get(rq_get_result, headers=header)
        result = response.json()
        results = result['results']
        data = {
            "address": address,
            "pct_traced_to_exchange": results['pct_traced_to_exchange'],
            "nr_pathes_found": results['nr_pathes_found'],
            "paths": results['paths']
        }
        return data

    task_id = get_task_id(address)
    return get_data(task_id)

address_list = addresses['address'].to_list()[:10]

results_list = []
for address in tqdm(address_list, desc="Searching centralized exchange connections for addresses"):
    results_list.append(get_QL_results(address))

df_ql = pd.DataFrame(results_list)
df_ql.drop(columns=["paths"])

Searching centralized exchange connections for addresses: 100%|██████████| 10/10 [00:46<00:00,  4.62s/it]


Unnamed: 0,address,pct_traced_to_exchange,nr_pathes_found
0,1JwRp2J8bQcoG8XTUbxQZaEj9QB4RB6zEa,3.109556,9
1,1EZS92K4xJbymDLwG4F7PNF5idPE62e9XY,0.0,0
2,16B4HuSAJ4WRdCq7dzA5b4ASh6QQ7ytZWB,0.0,0
3,1EdX5vtBiHGmkqbJc7VRSuVMx9Kpgh53Tp,45.495085,19
4,3Ch7RPfwkJ3wHhiBfA4CNc8SagGdjbZwVs,0.0,0
5,19GqTJDhu7A1qg7rnK3KS7tmCkCTMTz6xD,0.013151,2
6,1NMRCQMfhfVyAyuEubdfneE2H458Njog3v,0.0,0
7,16XhmM7nPvR15eFdmVJs4QWcWpnYVS6FTv,0.0,0
8,1EyXwmxKd74HeyqbZbmXJsNxmfpiPeAF3F,0.0,0
9,1NPy1TBRyk6vMeGG3e5GaJWxYa9HbsNtDm,21.851681,1


### Get a single path


In [132]:
# get the first paths that is not None
paths = next(filter(None, df_ql['paths']))
paths

# get the first path of the paths
path = paths[0]["nodes"]
addresses_ql = [node["output_address"] for node in path]
transactions_ql = [node["tx_hash"] for node in path]

# assemble the trace string
transaction_prefix = "T_"
perpetrator_prefix = "PA_"
neutral_prefix = "HA_"
trace_str = f"{perpetrator_prefix}{addresses_ql[0]}"
n_txs = len(transactions_ql)
for i in range(1, n_txs):
    trace_str += f",T_{transactions_ql[i]},{neutral_prefix}{addresses_ql[i]}"


url = f"https://app.ikna.io/pathfinder/btc/path/{trace_str}"
url

'https://app.ikna.io/pathfinder/btc/path/PA_1JwRp2J8bQcoG8XTUbxQZaEj9QB4RB6zEa,T_db7adde440d9f1ee69938cffeededc5b6917e79fb6263dc632546ae33a4a78d9,HA_1JwRp2J8bQcoG8XTUbxQZaEj9QB4RB6zEa,T_22da5847109363a6193b36558263a509fb48c2c0e3438dfdbb6c58141a1a712e,HA_38YEQmUhQWb37JzhNJSmXtXQhKy7adAcNu,T_24c3471a977673be1246112aabed3547204ca3bc20cbec747618c638dce34372,HA_bc1ql4909z0h0jhtr4ntuz6l2fndc7pdf8rflssz8s,T_e0fcded3fa053197403825a55fc07d2099eafbf4ea15f9614a38cbfe318a220c,HA_3L4KtC4QDY4hANF8J7zc2KqrnBuJdgyvkc,T_9856b203625e7b09f8d16d198315403542dcb4d68bd20d4784c2ac0369ad175e,HA_3Guu5tUGZBCQdwX6zLyDhgDrHX9GZ14DKX,T_b8ed67db6f7396f4b1183d4bb44507149acccf79c38cb8bc1138caeaff2118d1,HA_3ANXMo5pCSMCKykATA4ogH3eY2qVsyu4qM'

# Q3: Can i link more addresses to our seed addresses?

We now fetch summary statistics for each entity.

In [133]:
with graphsense.ApiClient(configuration) as clnt:
  blkapi = bulk_api.BulkApi(api_client)
  rcsv = blkapi.bulk_csv(
                                 CURRENCY,
                                 operation = "get_entity",
                                 body={
                                     'entity': respAddrDF['entity'].drop_duplicates().to_list()
                                     },
                                 num_pages=1,
                                 _preload_content=False
                                 )
  respEntityDF = pd.read_csv(rcsv)

respEntityDF[
    ["best_address_tag_label",
     "root_address",
     "no_addresses",
     "balance_eur",
     "total_received_eur",
     "total_spent_eur",
     "in_degree",
     "out_degree",
     "no_incoming_txs",
     "no_outgoing_txs",
     "first_tx_height",
     "last_tx_height"]
     ]

Unnamed: 0,best_address_tag_label,root_address,no_addresses,balance_eur,total_received_eur,total_spent_eur,in_degree,out_degree,no_incoming_txs,no_outgoing_txs,first_tx_height,last_tx_height
0,Sextortion Spam,18eBGkYam1wjz1S77jz3VmADuYYFzhA3vB,1,0.00,7413.08,7847.13,17,1,19,1,556368,557342
1,Sextortion Spam,192CdbpYmpQhbpSZy5J9qyNE3YCxPpxdxv,1,0.00,0.86,0.87,1,2,1,1,543949,544640
2,Sextortion Spam,16xXHkPvKb72cwqYmH1ZfBMaW5K3hFP6My,1,0.00,537.74,537.74,1,2,1,1,553677,553678
3,Sextortion Spam,16FTEMtNaCEWzFRpkkBMqpuh18cbwU4KzM,1,0.00,1539.99,1574.69,1,5,1,5,542145,542437
4,Sextortion Spam,1BCGDtVZPqBMZWm5FdFe1RVgCGku17LZgb,1,0.00,1145.46,1143.37,2,3,2,2,559062,559433
...,...,...,...,...,...,...,...,...,...,...,...,...
91,Sextortion Spam,17UrRFJn3xs4acGTd76rhDphJvMpwZuAwY,9,0.00,11543.47,14916.18,10,1,10,1,543626,576808
92,Sextortion Spam,3JRKMDfhiMudTsjUxn4zzDhkpxnke4eGD4,1,0.00,444.50,439.72,1,1,1,1,558780,559654
93,Sextortion Spam,17iRfpgSwmJ6nLXR8evx6pUBo3R33S5LXB,4,0.00,5954.55,6086.09,14,3,14,2,537047,547878
94,Sextortion Spam,1DzM9y4fRgWqpZZCsvf5Rx4HupbE5Q5r4y,52,36.19,212858.48,211622.39,340,7,411,7,543026,647614
