In [1]:
# To display full output in Notebook, instead of only the last result
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
import pandas as pd
import requests
import json
import time
from bech32 import bech32_encode, convertbits

from datetime import datetime
from tqdm import tqdm
from bech32 import bech32_decode, convertbits
from binascii import hexlify

In [3]:
pd.options.display.float_format = "{:.6f}".format

In [4]:
def bech32_to_hex(addr_bech32: str) -> str:
    hrp, data = bech32_decode(addr_bech32)
    if data is None:
        raise ValueError(f"Invalid bech32 address: {addr_bech32}")
    decoded = convertbits(data, 5, 8, False)
    return hexlify(bytes(decoded)).decode()

def safe_bech32_to_hex(addr):
    if pd.isna(addr):
        return None
    try:
        return bech32_to_hex(addr)
    except Exception:
        return None

def hex_to_bech32(addr_hex: str, hrp="addr") -> str:
    """Convert 128-char hex string to bech32 address."""
    data = bytes.fromhex(addr_hex)
    five_bit = convertbits(data, 8, 5)
    return bech32_encode(hrp, five_bit)


def safe_hex_to_bech32(addr_hex: str, hrp="addr") -> str:
    """Convert 128-char hex string to bech32 address with error handling."""
    try:
        data = bytes.fromhex(addr_hex)
        five_bit = convertbits(data, 8, 5)
        return bech32_encode(hrp, five_bit)
    except Exception as e:
        return None  

In [5]:
path = "/home/jovyan/work/New Topic/WRT_holders_df.csv"
WRT_holders_df = pd.read_csv(path)

In [6]:
WRT_holders_df.head()

Unnamed: 0,token,bech32_address,quantity,token_amount,hex_address
0,WRT,addr1wypr0np3xatwhddulsnj3aaac65qg768zgs2xpd2x...,18802768670327,18802768.670327,710237cc313756ebb5bcfc2728f7bdc6a8047b471220a3...
1,WRT,addr1zxhew7fmsup08qvhdnkg8ccra88pw7q5trrncja3d...,15889017227655,15889017.227655,11af97793b8702f381976cec83e303e9ce17781458c73c...
2,WRT,addr1wy9z0v8mrkhtyll43fu6mnhu0p87tna48xt4p5649...,13797083749206,13797083.749206,710a27b0fb1daeb27ff58a79adcefc784fe5cfb5399750...
3,WRT,addr1q8fhnpzgdukm4fc8xatkpx6kmdev7h22ze8nm52l6...,8805566666666,8805566.666666,01d37984486f2dbaa7073757609b56db72cf5d4a164f3d...
4,WRT,addr1qyr6ddcy3cc0srahvqxkzxctr4h84nml9ngtsdlue...,7560817668505,7560817.668505,0107a6b7048e30f80fb7600d611b0b1d6e7acf7f2cd0b8...


In [7]:
# Count how many bech32 addresses start with "addr1v"
enterprise_addr_count = WRT_holders_df["bech32_address"].str.startswith("addr1v").sum()
print(f"Number of Enterprise (addr1v...) addresses: {enterprise_addr_count}")

Number of Enterprise (addr1v...) addresses: 12


In [8]:
WRT_holders_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9978 entries, 0 to 9977
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   token           9978 non-null   object 
 1   bech32_address  9978 non-null   object 
 2   quantity        9978 non-null   int64  
 3   token_amount    9978 non-null   float64
 4   hex_address     9955 non-null   object 
dtypes: float64(1), int64(1), object(3)
memory usage: 389.9+ KB


# API Fetching Txs Summaries

In [14]:
def bech32_to_hex(bech32_addr):
    """
    Convert a bech32 Shelley address (e.g., addr1...) to hex.
    """
    hrp, data = bech32_decode(bech32_addr)
    if hrp is None or data is None:
        raise ValueError("Invalid bech32 address")
    decoded_bytes = bytes(convertbits(data, 5, 8, False))
    return decoded_bytes.hex()

def safe_bech32_to_hex(addr):
    try:
        return bech32_to_hex(addr)
    except Exception:
        return None

In [108]:
from collections import Counter

all_addresses = WRT_holders_df["bech32_address"].dropna().astype(str)

def classify_prefix(addr):
    if addr.startswith("DdzFF"):
        return "Byron (DdzFF)"
    elif addr.startswith("Ae2td"):
        return "Byron (Ae2td)"
    elif addr.startswith("addr1"):
        return "Shelley"
    elif addr.startswith("stake1"):
        return "Stake"
    else:
        return "Other / Unknown"

prefix_counts = Counter([classify_prefix(a) for a in all_addresses])
print(prefix_counts)


Counter({'Shelley': 9955, 'Byron (Ae2td)': 16, 'Byron (DdzFF)': 7})


In [None]:
import requests
import json
import time
from tqdm.notebook import tqdm
from bech32 import bech32_decode, convertbits  # pip install bech32


def fetch_blockfrost_address_summaries(addresses, output_file="wrt_address_txs_summary.json",
                                       api_key="mainnetD5tXDiNzInPOA8ABiVQjb9gBJxB2zqXu",
                                       max_retries=3, sleep_time=0.2):
    headers = {"project_id": api_key}
    
    # ✅ Filter only valid bech32 addresses (exclude Byron)
    valid_addresses = [addr for addr in addresses if safe_bech32_to_hex(addr) is not None]

    summary = {}
    bar = tqdm(total=len(valid_addresses), desc="Fetching summaries")

    for addr in valid_addresses:
        for _ in range(max_retries):
            try:
                res = requests.get(f"https://cardano-mainnet.blockfrost.io/api/v0/addresses/{addr}/total", headers=headers)
                if res.status_code == 200:
                    summary[addr] = res.json()
                    break
            except Exception:
                pass
            time.sleep(sleep_time)
        bar.update(1)

    bar.close()

    successful_data = {k: v for k, v in summary.items() if "error" not in v}

    with open(output_file, "w") as f:
        json.dump(successful_data, f, indent=2)

    print(f"✅ Done! {len(successful_data)} results saved to '{output_file}'")

# 🟡 Replace with your real list
address_list = WRT_holders_df["bech32_address"].dropna().unique().tolist()
fetch_blockfrost_address_summaries(address_list)


Fetching summaries:   0%|          | 0/9955 [00:00<?, ?it/s]

In [107]:
valid_addresses = [addr for addr in address_list if safe_bech32_to_hex(addr) is not None]
dropped_addresses = set(address_list) - set(valid_addresses)
print(f"❌ Dropped {len(dropped_addresses)} invalid addresses")


❌ Dropped 23 invalid addresses


# Get Address Txs Summary

In [9]:
import json
import pandas as pd

# === Load JSON file ===
with open("wrt_address_txs_summary.json") as f:
    data = json.load(f)

# === Only keep WRT policy & decimals ===
WRT_POLICY = "c0ee29a85b13209423b10447d3c2e6a50641a15c57770e27cb9d507357696e67526964657273"
WRT_DECIMALS = 6  # WRT usually has 6 decimals

records = []

for addr, info in data.items():
    row = {"bech32_address": addr}

    # Transaction count
    row["tx_count"] = info.get("tx_count", 0)

    # Cumulative values (only ADA and WRT are kept)
    row["received_sum_ada"] = 0.0
    row["sent_sum_ada"] = 0.0
    row["received_sum_wrt"] = 0.0
    row["sent_sum_wrt"] = 0.0

    # Use sets to collect non-ADA units (helpful for later merging into holders)
    received_units = set()
    sent_units = set()

    # Parse received assets
    for item in info.get("received_sum", []):
        unit = item["unit"]
        qty = int(item["quantity"])
        if unit == "lovelace":
            row["received_sum_ada"] += qty / 1_000_000
        else:
            received_units.add(unit)
            if unit.startswith(WRT_POLICY):
                row["received_sum_wrt"] += qty / (10 ** WRT_DECIMALS)

    # Parse sent assets
    for item in info.get("sent_sum", []):
        unit = item["unit"]
        qty = int(item["quantity"])
        if unit == "lovelace":
            row["sent_sum_ada"] += qty / 1_000_000
        else:
            sent_units.add(unit)
            if unit.startswith(WRT_POLICY):
                row["sent_sum_wrt"] += qty / (10 ** WRT_DECIMALS)

    # Instead of counting tokens, store the list of token units (deduplicated)
    row["received_token_units"] = sorted(received_units)
    row["sent_token_units"] = sorted(sent_units)

    records.append(row)

# === Convert to DataFrame ===
wrt_holders_txs_summary_df = pd.DataFrame(records)

# ✅ Done
wrt_holders_txs_summary_df.head()


Unnamed: 0,bech32_address,tx_count,received_sum_ada,sent_sum_ada,received_sum_wrt,sent_sum_wrt,received_token_units,sent_token_units
0,addr1zxhew7fmsup08qvhdnkg8ccra88pw7q5trrncja3d...,3312,1616848899.715349,1616493478.006337,42779232925.73847,42763334914.664604,[6fdc63a1d71dc2c65502b79baae7fb543185702b12c3c...,[6fdc63a1d71dc2c65502b79baae7fb543185702b12c3c...
1,addr1wy9z0v8mrkhtyll43fu6mnhu0p87tna48xt4p5649...,2949,5504.49444,4956.38249,116743987.551725,102946903.802519,[c0ee29a85b13209423b10447d3c2e6a50641a15c57770...,[c0ee29a85b13209423b10447d3c2e6a50641a15c57770...
2,addr1q8fhnpzgdukm4fc8xatkpx6kmdev7h22ze8nm52l6...,1,1010.909314,0.0,8805566.666666,0.0,[6ec94af5190b33df99e49dba5cd7bb01fee025c6c0979...,[]
3,addr1qyr6ddcy3cc0srahvqxkzxctr4h84nml9ngtsdlue...,35,9954.666601,9592.956809,539527077.049635,531966259.38113,[06782c3e4eba019f64c34635735f1c307555e40490d67...,[06782c3e4eba019f64c34635735f1c307555e40490d67...
4,addr1qyk9t3uyjvk2v3eny2vv5mt93z4laqdcmsm9dpgnj...,58,1542551.271215,1540866.792671,768068915.976427,763163074.563795,[026a18d04a0c642759bb3d83b12e3344894e5c1c7b2ae...,[026a18d04a0c642759bb3d83b12e3344894e5c1c7b2ae...


In [10]:
wrt_holders_txs_summary_df["hex_address"] = wrt_holders_txs_summary_df["bech32_address"].apply(safe_bech32_to_hex)

# ✅ Done!
wrt_holders_txs_summary_df.head()

Unnamed: 0,bech32_address,tx_count,received_sum_ada,sent_sum_ada,received_sum_wrt,sent_sum_wrt,received_token_units,sent_token_units,hex_address
0,addr1zxhew7fmsup08qvhdnkg8ccra88pw7q5trrncja3d...,3312,1616848899.715349,1616493478.006337,42779232925.73847,42763334914.664604,[6fdc63a1d71dc2c65502b79baae7fb543185702b12c3c...,[6fdc63a1d71dc2c65502b79baae7fb543185702b12c3c...,11af97793b8702f381976cec83e303e9ce17781458c73c...
1,addr1wy9z0v8mrkhtyll43fu6mnhu0p87tna48xt4p5649...,2949,5504.49444,4956.38249,116743987.551725,102946903.802519,[c0ee29a85b13209423b10447d3c2e6a50641a15c57770...,[c0ee29a85b13209423b10447d3c2e6a50641a15c57770...,710a27b0fb1daeb27ff58a79adcefc784fe5cfb5399750...
2,addr1q8fhnpzgdukm4fc8xatkpx6kmdev7h22ze8nm52l6...,1,1010.909314,0.0,8805566.666666,0.0,[6ec94af5190b33df99e49dba5cd7bb01fee025c6c0979...,[],01d37984486f2dbaa7073757609b56db72cf5d4a164f3d...
3,addr1qyr6ddcy3cc0srahvqxkzxctr4h84nml9ngtsdlue...,35,9954.666601,9592.956809,539527077.049635,531966259.38113,[06782c3e4eba019f64c34635735f1c307555e40490d67...,[06782c3e4eba019f64c34635735f1c307555e40490d67...,0107a6b7048e30f80fb7600d611b0b1d6e7acf7f2cd0b8...
4,addr1qyk9t3uyjvk2v3eny2vv5mt93z4laqdcmsm9dpgnj...,58,1542551.271215,1540866.792671,768068915.976427,763163074.563795,[026a18d04a0c642759bb3d83b12e3344894e5c1c7b2ae...,[026a18d04a0c642759bb3d83b12e3344894e5c1c7b2ae...,012c55c784932ca647332298ca6d6588abfe81b8dc3656...


In [11]:
wrt_holders_txs_summary_df['hex_address'][0]

'11af97793b8702f381976cec83e303e9ce17781458c73c4bb16fe02b831ffbacc6c0d1ac76e1562d400a8e991eede2210c691fb433009171b0'

In [12]:
wrt_holders_txs_summary_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9947 entries, 0 to 9946
Data columns (total 9 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   bech32_address        9947 non-null   object 
 1   tx_count              9947 non-null   int64  
 2   received_sum_ada      9947 non-null   float64
 3   sent_sum_ada          9947 non-null   float64
 4   received_sum_wrt      9947 non-null   float64
 5   sent_sum_wrt          9947 non-null   float64
 6   received_token_units  9947 non-null   object 
 7   sent_token_units      9947 non-null   object 
 8   hex_address           9947 non-null   object 
dtypes: float64(4), int64(1), object(4)
memory usage: 699.5+ KB


In [13]:
wrt_holders_txs_summary_df['hex_address'].nunique()

9947

# Get Address Assets Summary

In [104]:
# Your API key
CARDANOSCAN_API_KEY = "520c718b-75dc-4898-aeca-199e059de866"

In [22]:
import requests
import json
import time
from tqdm.notebook import tqdm
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

# === Setup session with retry and backoff ===
session = requests.Session()
retries = Retry(
    total=5,
    backoff_factor=0.3,
    status_forcelist=[500, 502, 503, 504],
    allowed_methods=["GET"]
)
adapter = HTTPAdapter(max_retries=retries)
session.mount("https://", adapter)

# === API ===
CARDANOSCAN_API_KEY = "520c718b-75dc-4898-aeca-199e059de866"
HEADERS = {"apiKey": CARDANOSCAN_API_KEY}
BASE_URL = "https://api.cardanoscan.io/api/v1/asset/list/byAddress"

# === Addresses ===
hex_addresses = wrt_holders_txs_summary_df["hex_address"].dropna().unique().tolist()
address_assets = {}

print(f"🔍 Fetching asset lists for {len(hex_addresses)} addresses...")

for addr in tqdm(hex_addresses, desc="Fetching assets by address"):
    all_assets = []
    page = 1

    while True:
        url = f"{BASE_URL}?address={addr}&pageNo={page}"
        try:
            response = session.get(url, headers=HEADERS, timeout=15)

            if response.status_code == 200:
                try:
                    data = response.json()
                    tokens = data.get("tokens", [])
                except json.JSONDecodeError:
                    print(f"⚠️ JSON decode error for {addr} page {page}")
                    break

                if not tokens:
                    break  # ✅ No more tokens

                all_assets.extend(tokens)
                page += 1
                time.sleep(0.25)

            else:
                print(f"❌ Failed for {addr} | Status: {response.status_code} | Body: {response.text}")
                break

        except requests.exceptions.SSLError as e:
            print(f"🔒 SSL error for {addr} page {page}: {e}")
            break

        except requests.exceptions.RequestException as e:
            print(f"⚠️ Request exception for {addr}: {e}")
            break

    address_assets[addr] = all_assets

# === Save
with open("wrt_address_assets_by_hex.json", "w") as f:
    json.dump(address_assets, f, indent=2)

print("✅ Done! Saved all asset results to 'wrt_address_assets_by_hex.json'")


🔍 Fetching asset lists for 9947 addresses...


Fetching assets by address:   0%|          | 0/9947 [00:00<?, ?it/s]

✅ Done! Saved all asset results to 'wrt_address_assets_by_hex.json'


In [13]:
import json
import pandas as pd

# === Load JSON file ===
with open("wrt_address_assets_by_hex.json", "r") as f:
    address_assets = json.load(f)

# === WRT Asset ID and Decimals ===
WRT_ASSET_ID = "c0ee29a85b13209423b10447d3c2e6a50641a15c57770e27cb9d507357696e67526964657273"
WRT_DECIMALS = 6  # WRT usually has 6 decimals

# === Container for each row ===
records = []

# === Parse each address ===
for hex_address, tokens in address_assets.items():
    # Initialize only WRT balance
    wrt_balance = 0.0

    # Collect all assetIds (for later merging into holders)
    asset_ids = []

    for token in tokens:
        asset_id = token.get("assetId")
        balance_raw = int(token.get("balance", 0))
        asset_ids.append(asset_id)

        # Only process WRT balance
        if asset_id == WRT_ASSET_ID:
            wrt_balance = balance_raw / (10 ** WRT_DECIMALS)

    record = {
        "hex_address": hex_address,
        "wrt_balance": wrt_balance,
        "asset_ids": asset_ids  # list of assetIds instead of token_num
    }
    records.append(record)

# === Convert to DataFrame ===
wrt_holders_assets_summary_df = pd.DataFrame(records)

# ✅ Preview result
wrt_holders_assets_summary_df.head()


Unnamed: 0,hex_address,wrt_balance,asset_ids
0,11af97793b8702f381976cec83e303e9ce17781458c73c...,15906809.387003,[6fdc63a1d71dc2c65502b79baae7fb543185702b12c3c...
1,710a27b0fb1daeb27ff58a79adcefc784fe5cfb5399750...,13797083.749206,[c0ee29a85b13209423b10447d3c2e6a50641a15c57770...
2,01d37984486f2dbaa7073757609b56db72cf5d4a164f3d...,8805566.666666,[c0ee29a85b13209423b10447d3c2e6a50641a15c57770...
3,0107a6b7048e30f80fb7600d611b0b1d6e7acf7f2cd0b8...,7560817.668505,[06782c3e4eba019f64c34635735f1c307555e40490d67...
4,012c55c784932ca647332298ca6d6588abfe81b8dc3656...,4905841.412632,[c0ee29a85b13209423b10447d3c2e6a50641a15c57770...


In [14]:
wrt_holders_assets_summary_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9947 entries, 0 to 9946
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   hex_address  9947 non-null   object 
 1   wrt_balance  9947 non-null   float64
 2   asset_ids    9947 non-null   object 
dtypes: float64(1), object(2)
memory usage: 233.3+ KB


In [15]:
# === Merge the two dataframes on 'hex_address' ===
wrt_holders_txs_and_assets_summary_df = pd.merge(
    wrt_holders_txs_summary_df,
    wrt_holders_assets_summary_df,
    on="hex_address",
    how="left" 
)

wrt_holders_txs_and_assets_summary_df.head()


Unnamed: 0,bech32_address,tx_count,received_sum_ada,sent_sum_ada,received_sum_wrt,sent_sum_wrt,received_token_units,sent_token_units,hex_address,wrt_balance,asset_ids
0,addr1zxhew7fmsup08qvhdnkg8ccra88pw7q5trrncja3d...,3312,1616848899.715349,1616493478.006337,42779232925.73847,42763334914.664604,[6fdc63a1d71dc2c65502b79baae7fb543185702b12c3c...,[6fdc63a1d71dc2c65502b79baae7fb543185702b12c3c...,11af97793b8702f381976cec83e303e9ce17781458c73c...,15906809.387003,[6fdc63a1d71dc2c65502b79baae7fb543185702b12c3c...
1,addr1wy9z0v8mrkhtyll43fu6mnhu0p87tna48xt4p5649...,2949,5504.49444,4956.38249,116743987.551725,102946903.802519,[c0ee29a85b13209423b10447d3c2e6a50641a15c57770...,[c0ee29a85b13209423b10447d3c2e6a50641a15c57770...,710a27b0fb1daeb27ff58a79adcefc784fe5cfb5399750...,13797083.749206,[c0ee29a85b13209423b10447d3c2e6a50641a15c57770...
2,addr1q8fhnpzgdukm4fc8xatkpx6kmdev7h22ze8nm52l6...,1,1010.909314,0.0,8805566.666666,0.0,[6ec94af5190b33df99e49dba5cd7bb01fee025c6c0979...,[],01d37984486f2dbaa7073757609b56db72cf5d4a164f3d...,8805566.666666,[c0ee29a85b13209423b10447d3c2e6a50641a15c57770...
3,addr1qyr6ddcy3cc0srahvqxkzxctr4h84nml9ngtsdlue...,35,9954.666601,9592.956809,539527077.049635,531966259.38113,[06782c3e4eba019f64c34635735f1c307555e40490d67...,[06782c3e4eba019f64c34635735f1c307555e40490d67...,0107a6b7048e30f80fb7600d611b0b1d6e7acf7f2cd0b8...,7560817.668505,[06782c3e4eba019f64c34635735f1c307555e40490d67...
4,addr1qyk9t3uyjvk2v3eny2vv5mt93z4laqdcmsm9dpgnj...,58,1542551.271215,1540866.792671,768068915.976427,763163074.563795,[026a18d04a0c642759bb3d83b12e3344894e5c1c7b2ae...,[026a18d04a0c642759bb3d83b12e3344894e5c1c7b2ae...,012c55c784932ca647332298ca6d6588abfe81b8dc3656...,4905841.412632,[c0ee29a85b13209423b10447d3c2e6a50641a15c57770...


In [16]:
wrt_holders_txs_and_assets_summary_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9947 entries, 0 to 9946
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   bech32_address        9947 non-null   object 
 1   tx_count              9947 non-null   int64  
 2   received_sum_ada      9947 non-null   float64
 3   sent_sum_ada          9947 non-null   float64
 4   received_sum_wrt      9947 non-null   float64
 5   sent_sum_wrt          9947 non-null   float64
 6   received_token_units  9947 non-null   object 
 7   sent_token_units      9947 non-null   object 
 8   hex_address           9947 non-null   object 
 9   wrt_balance           9947 non-null   float64
 10  asset_ids             9947 non-null   object 
dtypes: float64(5), int64(1), object(5)
memory usage: 854.9+ KB


In [17]:
wrt_holders_txs_and_assets_summary_df['hex_address'][0]

'11af97793b8702f381976cec83e303e9ce17781458c73c4bb16fe02b831ffbacc6c0d1ac76e1562d400a8e991eede2210c691fb433009171b0'

# Get ADA Balance

In [33]:
!curl \
 --request GET 'https://api.cardanoscan.io/api/v1/address/balance?address=11af97793b8702f381976cec83e303e9ce17781458c73c4bb16fe02b831ffbacc6c0d1ac76e1562d400a8e991eede2210c691fb433009171b0' \
 --header "apiKey: 520c718b-75dc-4898-aeca-199e059de866"

{"hash":"11af97793b8702f381976cec83e303e9ce17781458c73c4bb16fe02b831ffbacc6c0d1ac76e1562d400a8e991eede2210c691fb433009171b0","balance":"355231554617"}

In [34]:
import requests
import time
import json
from tqdm import tqdm

# === Setup ===
API_KEY = "520c718b-75dc-4898-aeca-199e059de866"  
HEADERS = {"apiKey": API_KEY}
BASE_URL = "https://api.cardanoscan.io/api/v1/address/balance"

# === Get all unique hex addresses ===
hex_addresses = wrt_holders_txs_and_assets_summary_df["hex_address"].dropna().unique().tolist()

# === Container for results ===
ada_balance_dict = {}

# === Fetch ADA balance for each address ===
print(f"🔍 Fetching ADA balance for {len(hex_addresses)} addresses...")

for addr in tqdm(hex_addresses):
    for attempt in range(3):  # retry up to 3 times
        try:
            response = requests.get(f"{BASE_URL}?address={addr}", headers=HEADERS)
            if response.status_code == 200:
                data = response.json()
                balance = int(data.get("balance", 0))
                ada_balance_dict[addr] = balance
                break
            else:
                print(f"⚠️ {addr} | Status: {response.status_code}")
                time.sleep(1)
        except Exception as e:
            print(f"❌ {addr} | Error: {e}")
            time.sleep(1)
    time.sleep(0.2)  # rate limiting

# === Save as JSON file ===
with open("wrt_address_ada_balance.json", "w") as f:
    json.dump(ada_balance_dict, f, indent=2)

print("✅ Saved ADA balances to wrt_address_ada_balance.json")


🔍 Fetching ADA balance for 9947 addresses...


  7%|██▋                                   | 695/9947 [07:44<1:09:43,  2.21it/s]

❌ 011b48b573e4389db9a7eaaf09f5d2cd5c10b7abbadf3285118a68a6bc403915bc968937a6f72b7ac6b1db55507f82287af945a90b1dcab026 | Error: HTTPSConnectionPool(host='api.cardanoscan.io', port=443): Max retries exceeded with url: /api/v1/address/balance?address=011b48b573e4389db9a7eaaf09f5d2cd5c10b7abbadf3285118a68a6bc403915bc968937a6f72b7ac6b1db55507f82287af945a90b1dcab026 (Caused by SSLError(SSLEOFError(8, '[SSL: UNEXPECTED_EOF_WHILE_READING] EOF occurred in violation of protocol (_ssl.c:1006)')))


100%|█████████████████████████████████████| 9947/9947 [2:13:50<00:00,  1.24it/s]


✅ Saved ADA balances to wrt_address_ada_balance.json


In [18]:
import json

# Step 1: Load ADA balances from JSON
with open("wrt_address_ada_balance.json", "r") as f:
    ada_balance_dict = json.load(f)

# Step 2: Map and convert to ADA in one step
wrt_holders_txs_and_assets_summary_df["ada_balance"] = (
    wrt_holders_txs_and_assets_summary_df["hex_address"].map(ada_balance_dict) / 1_000_000
)

# Optional: Check result
wrt_holders_txs_and_assets_summary_df.head()


Unnamed: 0,bech32_address,tx_count,received_sum_ada,sent_sum_ada,received_sum_wrt,sent_sum_wrt,received_token_units,sent_token_units,hex_address,wrt_balance,asset_ids,ada_balance
0,addr1zxhew7fmsup08qvhdnkg8ccra88pw7q5trrncja3d...,3312,1616848899.715349,1616493478.006337,42779232925.73847,42763334914.664604,[6fdc63a1d71dc2c65502b79baae7fb543185702b12c3c...,[6fdc63a1d71dc2c65502b79baae7fb543185702b12c3c...,11af97793b8702f381976cec83e303e9ce17781458c73c...,15906809.387003,[6fdc63a1d71dc2c65502b79baae7fb543185702b12c3c...,355231.554617
1,addr1wy9z0v8mrkhtyll43fu6mnhu0p87tna48xt4p5649...,2949,5504.49444,4956.38249,116743987.551725,102946903.802519,[c0ee29a85b13209423b10447d3c2e6a50641a15c57770...,[c0ee29a85b13209423b10447d3c2e6a50641a15c57770...,710a27b0fb1daeb27ff58a79adcefc784fe5cfb5399750...,13797083.749206,[c0ee29a85b13209423b10447d3c2e6a50641a15c57770...,548.11195
2,addr1q8fhnpzgdukm4fc8xatkpx6kmdev7h22ze8nm52l6...,1,1010.909314,0.0,8805566.666666,0.0,[6ec94af5190b33df99e49dba5cd7bb01fee025c6c0979...,[],01d37984486f2dbaa7073757609b56db72cf5d4a164f3d...,8805566.666666,[c0ee29a85b13209423b10447d3c2e6a50641a15c57770...,1010.909314
3,addr1qyr6ddcy3cc0srahvqxkzxctr4h84nml9ngtsdlue...,35,9954.666601,9592.956809,539527077.049635,531966259.38113,[06782c3e4eba019f64c34635735f1c307555e40490d67...,[06782c3e4eba019f64c34635735f1c307555e40490d67...,0107a6b7048e30f80fb7600d611b0b1d6e7acf7f2cd0b8...,7560817.668505,[06782c3e4eba019f64c34635735f1c307555e40490d67...,361.709792
4,addr1qyk9t3uyjvk2v3eny2vv5mt93z4laqdcmsm9dpgnj...,58,1542551.271215,1540866.792671,768068915.976427,763163074.563795,[026a18d04a0c642759bb3d83b12e3344894e5c1c7b2ae...,[026a18d04a0c642759bb3d83b12e3344894e5c1c7b2ae...,012c55c784932ca647332298ca6d6588abfe81b8dc3656...,4905841.412632,[c0ee29a85b13209423b10447d3c2e6a50641a15c57770...,1684.478544


In [19]:
wrt_holders_txs_and_assets_summary_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9947 entries, 0 to 9946
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   bech32_address        9947 non-null   object 
 1   tx_count              9947 non-null   int64  
 2   received_sum_ada      9947 non-null   float64
 3   sent_sum_ada          9947 non-null   float64
 4   received_sum_wrt      9947 non-null   float64
 5   sent_sum_wrt          9947 non-null   float64
 6   received_token_units  9947 non-null   object 
 7   sent_token_units      9947 non-null   object 
 8   hex_address           9947 non-null   object 
 9   wrt_balance           9947 non-null   float64
 10  asset_ids             9947 non-null   object 
 11  ada_balance           9947 non-null   float64
dtypes: float64(6), int64(1), object(5)
memory usage: 932.7+ KB


In [20]:
wrt_holders_txs_and_assets_summary_df.head()

Unnamed: 0,bech32_address,tx_count,received_sum_ada,sent_sum_ada,received_sum_wrt,sent_sum_wrt,received_token_units,sent_token_units,hex_address,wrt_balance,asset_ids,ada_balance
0,addr1zxhew7fmsup08qvhdnkg8ccra88pw7q5trrncja3d...,3312,1616848899.715349,1616493478.006337,42779232925.73847,42763334914.664604,[6fdc63a1d71dc2c65502b79baae7fb543185702b12c3c...,[6fdc63a1d71dc2c65502b79baae7fb543185702b12c3c...,11af97793b8702f381976cec83e303e9ce17781458c73c...,15906809.387003,[6fdc63a1d71dc2c65502b79baae7fb543185702b12c3c...,355231.554617
1,addr1wy9z0v8mrkhtyll43fu6mnhu0p87tna48xt4p5649...,2949,5504.49444,4956.38249,116743987.551725,102946903.802519,[c0ee29a85b13209423b10447d3c2e6a50641a15c57770...,[c0ee29a85b13209423b10447d3c2e6a50641a15c57770...,710a27b0fb1daeb27ff58a79adcefc784fe5cfb5399750...,13797083.749206,[c0ee29a85b13209423b10447d3c2e6a50641a15c57770...,548.11195
2,addr1q8fhnpzgdukm4fc8xatkpx6kmdev7h22ze8nm52l6...,1,1010.909314,0.0,8805566.666666,0.0,[6ec94af5190b33df99e49dba5cd7bb01fee025c6c0979...,[],01d37984486f2dbaa7073757609b56db72cf5d4a164f3d...,8805566.666666,[c0ee29a85b13209423b10447d3c2e6a50641a15c57770...,1010.909314
3,addr1qyr6ddcy3cc0srahvqxkzxctr4h84nml9ngtsdlue...,35,9954.666601,9592.956809,539527077.049635,531966259.38113,[06782c3e4eba019f64c34635735f1c307555e40490d67...,[06782c3e4eba019f64c34635735f1c307555e40490d67...,0107a6b7048e30f80fb7600d611b0b1d6e7acf7f2cd0b8...,7560817.668505,[06782c3e4eba019f64c34635735f1c307555e40490d67...,361.709792
4,addr1qyk9t3uyjvk2v3eny2vv5mt93z4laqdcmsm9dpgnj...,58,1542551.271215,1540866.792671,768068915.976427,763163074.563795,[026a18d04a0c642759bb3d83b12e3344894e5c1c7b2ae...,[026a18d04a0c642759bb3d83b12e3344894e5c1c7b2ae...,012c55c784932ca647332298ca6d6588abfe81b8dc3656...,4905841.412632,[c0ee29a85b13209423b10447d3c2e6a50641a15c57770...,1684.478544


# Save Final DF：

In [21]:
wrt_holders_txs_and_assets_summary_df.to_csv("wrt_holders_txs_and_assets_summary_df_revised.csv", index=False)

# Get stake address:

In [16]:
path = "/home/jovyan/work/New Topic/wrt_holders_txs_and_assets_summary_df_revised.csv"
wrt_holders_txs_and_assets_summary_df = pd.read_csv(path)
wrt_holders_txs_and_assets_summary_df.head()

Unnamed: 0,bech32_address,tx_count,received_sum_ada,sent_sum_ada,received_sum_wrt,sent_sum_wrt,received_token_units,sent_token_units,hex_address,wrt_balance,asset_ids,ada_balance
0,addr1zxhew7fmsup08qvhdnkg8ccra88pw7q5trrncja3d...,3312,1616848899.715349,1616493478.006337,42779232925.73847,42763334914.664604,['6fdc63a1d71dc2c65502b79baae7fb543185702b12c3...,['6fdc63a1d71dc2c65502b79baae7fb543185702b12c3...,11af97793b8702f381976cec83e303e9ce17781458c73c...,15906809.387003,['6fdc63a1d71dc2c65502b79baae7fb543185702b12c3...,355231.554617
1,addr1wy9z0v8mrkhtyll43fu6mnhu0p87tna48xt4p5649...,2949,5504.49444,4956.38249,116743987.551725,102946903.802519,['c0ee29a85b13209423b10447d3c2e6a50641a15c5777...,['c0ee29a85b13209423b10447d3c2e6a50641a15c5777...,710a27b0fb1daeb27ff58a79adcefc784fe5cfb5399750...,13797083.749206,['c0ee29a85b13209423b10447d3c2e6a50641a15c5777...,548.11195
2,addr1q8fhnpzgdukm4fc8xatkpx6kmdev7h22ze8nm52l6...,1,1010.909314,0.0,8805566.666666,0.0,['6ec94af5190b33df99e49dba5cd7bb01fee025c6c097...,[],01d37984486f2dbaa7073757609b56db72cf5d4a164f3d...,8805566.666666,['c0ee29a85b13209423b10447d3c2e6a50641a15c5777...,1010.909314
3,addr1qyr6ddcy3cc0srahvqxkzxctr4h84nml9ngtsdlue...,35,9954.666601,9592.956809,539527077.049635,531966259.38113,['06782c3e4eba019f64c34635735f1c307555e40490d6...,['06782c3e4eba019f64c34635735f1c307555e40490d6...,0107a6b7048e30f80fb7600d611b0b1d6e7acf7f2cd0b8...,7560817.668505,['06782c3e4eba019f64c34635735f1c307555e40490d6...,361.709792
4,addr1qyk9t3uyjvk2v3eny2vv5mt93z4laqdcmsm9dpgnj...,58,1542551.271215,1540866.792671,768068915.976427,763163074.563795,['026a18d04a0c642759bb3d83b12e3344894e5c1c7b2a...,['026a18d04a0c642759bb3d83b12e3344894e5c1c7b2a...,012c55c784932ca647332298ca6d6588abfe81b8dc3656...,4905841.412632,['c0ee29a85b13209423b10447d3c2e6a50641a15c5777...,1684.478544


In [17]:
wrt_holders_txs_and_assets_summary_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9947 entries, 0 to 9946
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   bech32_address        9947 non-null   object 
 1   tx_count              9947 non-null   int64  
 2   received_sum_ada      9947 non-null   float64
 3   sent_sum_ada          9947 non-null   float64
 4   received_sum_wrt      9947 non-null   float64
 5   sent_sum_wrt          9947 non-null   float64
 6   received_token_units  9947 non-null   object 
 7   sent_token_units      9947 non-null   object 
 8   hex_address           9947 non-null   object 
 9   wrt_balance           9947 non-null   float64
 10  asset_ids             9947 non-null   object 
 11  ada_balance           9947 non-null   float64
dtypes: float64(6), int64(1), object(5)
memory usage: 932.7+ KB


In [18]:
import requests
import json
import time
from tqdm.notebook import tqdm  

# === API config ===
API_KEY = "mainnetD5tXDiNzInPOA8ABiVQjb9gBJxB2zqXu"
HEADERS = {"project_id": API_KEY}
BASE_URL = "https://cardano-mainnet.blockfrost.io/api/v0/addresses"

# === Load addresses ===
bech32_addresses = wrt_holders_txs_and_assets_summary_df["bech32_address"].dropna().unique().tolist()

# === Container for results ===
addr_to_stake = {}

# === Loop and call API with tqdm progress bar ===
for addr in tqdm(bech32_addresses, desc="🔍 Fetching stake addresses"):
    url = f"{BASE_URL}/{addr}/extended"
    try:
        response = requests.get(url, headers=HEADERS)
        if response.status_code == 200:
            data = response.json()
            stake_address = data.get("stake_address")
            addr_to_stake[addr] = stake_address
        else:
            addr_to_stake[addr] = None
    except Exception as e:
        addr_to_stake[addr] = None
    time.sleep(0.3)

# === Save to JSON ===
with open("wrt_holders_stake_address.json", "w") as f:
    json.dump(addr_to_stake, f, indent=2)

print(f"✅ Done! {len(addr_to_stake)} addresses processed.")


🔍 Fetching stake addresses:   0%|          | 0/9947 [00:00<?, ?it/s]

✅ Done! 9947 addresses processed.


In [22]:
with open("wrt_holders_stake_address.json", "r") as f:
    addr_to_stake = json.load(f)

# === Convert to DataFrame ===
wrt_stake_df = pd.DataFrame(list(addr_to_stake.items()), columns=["bech32_address", "stake_address"])

# === Show sample
wrt_stake_df.head()


Unnamed: 0,bech32_address,stake_address
0,addr1zxhew7fmsup08qvhdnkg8ccra88pw7q5trrncja3d...,stake1uy0lhtxxcrg6cahp2ck5qz5wny0wmc3pp353ldpn...
1,addr1wy9z0v8mrkhtyll43fu6mnhu0p87tna48xt4p5649...,
2,addr1q8fhnpzgdukm4fc8xatkpx6kmdev7h22ze8nm52l6...,stake1u82h4w48l3xkh3fy58np0c5ecmepjt8udjul32rr...
3,addr1qyr6ddcy3cc0srahvqxkzxctr4h84nml9ngtsdlue...,stake1u9ztutdcj2h7tar3kf897snjc0qstyh9pd2y7zld...
4,addr1qyk9t3uyjvk2v3eny2vv5mt93z4laqdcmsm9dpgnj...,stake1u83snfrry90efflq347ent9sy8kf3ug9ecnuvvy6...


In [23]:
# Count how wrt_stake_df stake_address are None (missing)
num_none = wrt_stake_df["stake_address"].isna().sum()
print(f"Number of addresses without stake_address: {num_none}")


Number of addresses without stake_address: 33


# Get script tag

In [18]:
path = "/home/jovyan/work/New Topic/wrt_holders_txs_and_assets_summary_df_revised.csv"
wrt_holders_txs_and_assets_summary_df = pd.read_csv(path)
wrt_holders_txs_and_assets_summary_df.head()

Unnamed: 0,bech32_address,tx_count,received_sum_ada,sent_sum_ada,received_sum_wrt,sent_sum_wrt,received_token_units,sent_token_units,hex_address,wrt_balance,asset_ids,ada_balance
0,addr1zxhew7fmsup08qvhdnkg8ccra88pw7q5trrncja3d...,3312,1616848899.715349,1616493478.006337,42779232925.73847,42763334914.664604,['6fdc63a1d71dc2c65502b79baae7fb543185702b12c3...,['6fdc63a1d71dc2c65502b79baae7fb543185702b12c3...,11af97793b8702f381976cec83e303e9ce17781458c73c...,15906809.387003,['6fdc63a1d71dc2c65502b79baae7fb543185702b12c3...,355231.554617
1,addr1wy9z0v8mrkhtyll43fu6mnhu0p87tna48xt4p5649...,2949,5504.49444,4956.38249,116743987.551725,102946903.802519,['c0ee29a85b13209423b10447d3c2e6a50641a15c5777...,['c0ee29a85b13209423b10447d3c2e6a50641a15c5777...,710a27b0fb1daeb27ff58a79adcefc784fe5cfb5399750...,13797083.749206,['c0ee29a85b13209423b10447d3c2e6a50641a15c5777...,548.11195
2,addr1q8fhnpzgdukm4fc8xatkpx6kmdev7h22ze8nm52l6...,1,1010.909314,0.0,8805566.666666,0.0,['6ec94af5190b33df99e49dba5cd7bb01fee025c6c097...,[],01d37984486f2dbaa7073757609b56db72cf5d4a164f3d...,8805566.666666,['c0ee29a85b13209423b10447d3c2e6a50641a15c5777...,1010.909314
3,addr1qyr6ddcy3cc0srahvqxkzxctr4h84nml9ngtsdlue...,35,9954.666601,9592.956809,539527077.049635,531966259.38113,['06782c3e4eba019f64c34635735f1c307555e40490d6...,['06782c3e4eba019f64c34635735f1c307555e40490d6...,0107a6b7048e30f80fb7600d611b0b1d6e7acf7f2cd0b8...,7560817.668505,['06782c3e4eba019f64c34635735f1c307555e40490d6...,361.709792
4,addr1qyk9t3uyjvk2v3eny2vv5mt93z4laqdcmsm9dpgnj...,58,1542551.271215,1540866.792671,768068915.976427,763163074.563795,['026a18d04a0c642759bb3d83b12e3344894e5c1c7b2a...,['026a18d04a0c642759bb3d83b12e3344894e5c1c7b2a...,012c55c784932ca647332298ca6d6588abfe81b8dc3656...,4905841.412632,['c0ee29a85b13209423b10447d3c2e6a50641a15c5777...,1684.478544


In [31]:
import requests
import json
import time
from tqdm.notebook import tqdm

# === API config ===
API_KEY = "mainnetD5tXDiNzInPOA8ABiVQjb9gBJxB2zqXu"
HEADERS = {"project_id": API_KEY}
BASE_URL = "https://cardano-mainnet.blockfrost.io/api/v0/addresses"

# === Load addresses ===
bech32_addresses = wrt_holders_txs_and_assets_summary_df["bech32_address"].dropna().unique().tolist()

# === Container for full results ===
address_info_dict = {}

# === Loop and call API ===
for addr in tqdm(bech32_addresses, desc="🔍 Fetching full address info"):
    url = f"{BASE_URL}/{addr}"
    try:
        response = requests.get(url, headers=HEADERS)
        if response.status_code == 200:
            data = response.json()
            address_info_dict[addr] = data  # ⬅️ Save the full response
        else:
            address_info_dict[addr] = {"error": f"Status code {response.status_code}"}
    except Exception as e:
        address_info_dict[addr] = {"error": str(e)}
    time.sleep(0.3)  # ⏱️ Respect rate limits

# === Save full data to JSON ===
with open("wrt_holders_full_address_info.json", "w") as f:
    json.dump(address_info_dict, f, indent=2)

print(f"✅ Done! Saved info for {len(address_info_dict)} addresses.")


🔍 Fetching full address info:   0%|          | 0/9947 [00:00<?, ?it/s]

✅ Done! Saved info for 9947 addresses.


In [24]:
import json
import pandas as pd

# === Step 1: Load full JSON data ===
with open("wrt_holders_full_address_info.json", "r") as f:
    full_data = json.load(f)

# === Step 2: Extract bech32_address and script ===
records = []

for addr, info in full_data.items():
    # Some records might have error
    if isinstance(info, dict) and "script" in info:
        records.append({
            "bech32_address": addr,
            "script": info["script"]
        })

# === Step 3: Convert to DataFrame ===
wrt_script_df = pd.DataFrame(records)

# ✅ Optional: Save to CSV
wrt_script_df.to_csv("wrt_address_script_flag.csv", index=False)

# 🔍 Preview
wrt_script_df.head()


Unnamed: 0,bech32_address,script
0,addr1zxhew7fmsup08qvhdnkg8ccra88pw7q5trrncja3d...,True
1,addr1wy9z0v8mrkhtyll43fu6mnhu0p87tna48xt4p5649...,True
2,addr1q8fhnpzgdukm4fc8xatkpx6kmdev7h22ze8nm52l6...,False
3,addr1qyr6ddcy3cc0srahvqxkzxctr4h84nml9ngtsdlue...,False
4,addr1qyk9t3uyjvk2v3eny2vv5mt93z4laqdcmsm9dpgnj...,False


In [25]:
import pandas as pd

# === Step 1: Merge stake address into the main DataFrame ===
wrt_with_stake_df = wrt_holders_txs_and_assets_summary_df.merge(
    wrt_stake_df, on="bech32_address", how="left"
)

# (Optional) If you also have wrt_script_df with a boolean 'script' per bech32 address:
try:
    wrt_with_stake_df = wrt_with_stake_df.merge(
        wrt_script_df, on="bech32_address", how="left"
    )
except NameError:
    # wrt_script_df not provided; ignore
    pass

# === Step 2: Create unified holder ID ===
wrt_with_stake_df["final_holder_id"] = wrt_with_stake_df.apply(
    lambda row: row["stake_address"] if pd.notna(row["stake_address"]) 
    else f"holder::{row['bech32_address']}",
    axis=1
)

# ✅ Step 2.5: keep all bech32 addresses per holder for reference
address_lookup_df = (
    wrt_with_stake_df.groupby("final_holder_id")["bech32_address"]
    .agg(list)
    .reset_index()
    .rename(columns={"bech32_address": "all_bech32_addresses"})
)

# === Step 3: Build aggregation plan ===
# Columns that should be summed (use only those that actually exist)
sum_cols_wanted = [
    "tx_count",
    "received_sum_ada", "sent_sum_ada",
    "received_sum_wrt", "sent_sum_wrt",
    "wrt_balance", "ada_balance",
]
sum_cols = [c for c in sum_cols_wanted if c in wrt_with_stake_df.columns]

# List-like columns that must be unioned (deduplicated)
list_cols_wanted = ["received_token_units", "sent_token_units", "asset_ids"]
list_cols = [c for c in list_cols_wanted if c in wrt_with_stake_df.columns]

# Optional boolean flags (e.g., script addresses); aggregate by any()
bool_cols = []
if "script" in wrt_with_stake_df.columns:
    bool_cols.append("script")

# Helper aggregators
def union_list(series):
    """Union of lists (handles None); returns sorted unique list."""
    out = set()
    for v in series:
        if isinstance(v, list):
            out.update(v)
    return sorted(out)

agg_dict = {}
agg_dict.update({c: "sum" for c in sum_cols})
agg_dict.update({c: union_list for c in list_cols})
agg_dict.update({c: "any" for c in bool_cols})

# === Step 4: Aggregate by holder ===
aggregated_holder_df = (
    wrt_with_stake_df
    .groupby("final_holder_id", as_index=False)
    .agg(agg_dict)
)

# === Step 5: Derive counts from the unioned lists ===
if "received_token_units" in aggregated_holder_df.columns:
    aggregated_holder_df["received_token_count"] = aggregated_holder_df["received_token_units"].apply(len)
if "sent_token_units" in aggregated_holder_df.columns:
    aggregated_holder_df["sent_token_count"] = aggregated_holder_df["sent_token_units"].apply(len)
if "asset_ids" in aggregated_holder_df.columns:
    aggregated_holder_df["token_num"] = aggregated_holder_df["asset_ids"].apply(lambda xs: len(set(xs)))

# === Step 6: Flag virtual holders (non-staking) ===
aggregated_holder_df["is_virtual"] = aggregated_holder_df["final_holder_id"].str.startswith("holder::")

# === Step 7: Merge back the address list for traceability ===
aggregated_holder_df = aggregated_holder_df.merge(address_lookup_df, on="final_holder_id", how="left")


# Preview
aggregated_holder_df.head()


Unnamed: 0,final_holder_id,tx_count,received_sum_ada,sent_sum_ada,received_sum_wrt,sent_sum_wrt,wrt_balance,ada_balance,received_token_units,sent_token_units,asset_ids,script,received_token_count,sent_token_count,token_num,is_virtual,all_bech32_addresses
0,holder::addr1qydypcg5n7tkdpyvhag832wg5zasdlp5p...,943,4121.318116,1211.4,1.0,0.0,1.0,2909.918116,[007394e3117755fbb0558b93c54ce3bc6c85770920044...,[],[007394e3117755fbb0558b93c54ce3bc6c85770920044...,False,168,0,168,True,[addr1qydypcg5n7tkdpyvhag832wg5zasdlp5pf3fd7sm...
1,holder::addr1v80rc443e7kdya5epx90grwapc0h8cdxp...,113,17743.052489,17724.161759,1.0,0.0,1.0,18.89073,[15642bb40962b79700c693ecff664af0ebfa154af8844...,[ec2e4e396fd178c1d44831a1f15efc86d1fa81c5396b0...,[15642bb40962b79700c693ecff664af0ebfa154af8844...,False,154,2,152,True,[addr1v80rc443e7kdya5epx90grwapc0h8cdxp9wd6hyz...
2,holder::addr1v83gkkw3nqzakg5xynlurqcfqhgd65vkf...,43246,731511685.127116,731222757.137427,3352.0,3350.0,2.0,333928.01231,[000ffeb007da43324aefe044555fbe5bc469c38aa5063...,[000ffeb007da43324aefe044555fbe5bc469c38aa5063...,[000ffeb007da43324aefe044555fbe5bc469c38aa5063...,False,168,167,168,True,[addr1v83gkkw3nqzakg5xynlurqcfqhgd65vkfvf5xv8t...
3,holder::addr1v8aqm9nrzrnymauz0lj99eq83ufpwndw5...,8,73.20424,0.0,2.0,0.0,2.0,73.20424,[000dbe0b66e9ceb8357eeb7eacbdc6e2707345924ba86...,[],[000dbe0b66e9ceb8357eeb7eacbdc6e2707345924ba86...,False,155,0,155,True,[addr1v8aqm9nrzrnymauz0lj99eq83ufpwndw5hmtp8vn...
4,holder::addr1v8ph9r88z8ahuj7erx7w00pgg5p0wnsch...,1,1.06026,0.0,1975.0,0.0,1975.0,1.06026,[c0ee29a85b13209423b10447d3c2e6a50641a15c57770...,[],[c0ee29a85b13209423b10447d3c2e6a50641a15c57770...,False,1,0,1,True,[addr1v8ph9r88z8ahuj7erx7w00pgg5p0wnschjju707h...


In [26]:
aggregated_holder_df.tail()

Unnamed: 0,final_holder_id,tx_count,received_sum_ada,sent_sum_ada,received_sum_wrt,sent_sum_wrt,wrt_balance,ada_balance,received_token_units,sent_token_units,asset_ids,script,received_token_count,sent_token_count,token_num,is_virtual,all_bech32_addresses
9418,stake1uyztlensspqxs8wnsu87lan3t60ax5pjchnmj2u4...,162,65408.786948,60401.298072,4.0,3.0,1.0,5007.488876,[007394e3117755fbb0558b93c54ce3bc6c85770920044...,[007394e3117755fbb0558b93c54ce3bc6c85770920044...,[007394e3117755fbb0558b93c54ce3bc6c85770920044...,False,99,93,75,False,[addr1qyt8zhgzvegvx524ylw4g9l80wq8qht0uk7j274t...
9419,stake1uyzvjz597wyes2d0y03nd5ljca5x7r4jm942vtct...,218,80460.464405,80180.235,2.0,1.0,1.0,280.229405,[0171c997b8853fde686763d93b36ab8e04ce947bb6aa0...,[0171c997b8853fde686763d93b36ab8e04ce947bb6aa0...,[0171c997b8853fde686763d93b36ab8e04ce947bb6aa0...,False,174,174,156,False,[addr1q8ydgdadaadcjcl2d4y3nxmzlqgqua0wu0tyver7...
9420,stake1uyzvr90pudtfm9n8aa77nl987lev0dk0g9ahfrkp...,32,5855.27235,5682.51775,2.0,1.0,1.0,172.7546,[007394e3117755fbb0558b93c54ce3bc6c85770920044...,[078eafce5cd7edafdf63900edef2c1ea759e77f30ca81...,[007394e3117755fbb0558b93c54ce3bc6c85770920044...,False,178,82,174,False,[addr1qxynwukgruzxh9qpe9jme5t8skejl33fevm8elau...
9421,stake1uyzwc3h86zezga6fu8udyg3298729dwp46y7lckt...,1296,516530.951961,516309.286308,29526.463207,29266.291725,260.171482,221.665653,[000dbe0b66e9ceb8357eeb7eacbdc6e2707345924ba86...,[000dbe0b66e9ceb8357eeb7eacbdc6e2707345924ba86...,[000dbe0b66e9ceb8357eeb7eacbdc6e2707345924ba86...,False,310,298,199,False,[addr1qyn7yugl6djwyaynxwndlzhvkg4pqnujslg4jskg...
9422,stake1uyzzs58j0053v4fns5d9clw0rpfq7d7wx6xn4crw...,1,15.68931,0.0,1.635335,0.0,1.635335,15.68931,[007394e3117755fbb0558b93c54ce3bc6c85770920044...,[],[007394e3117755fbb0558b93c54ce3bc6c85770920044...,False,79,0,79,False,[addr1qxamzzj5y8djsxjckykswlx94yw24zde7n43awnw...


In [27]:
# Step 1: Create lookup dictionary from wrt_script_df
addr_to_script = dict(zip(wrt_script_df["bech32_address"], wrt_script_df["script"]))

# Step 2: Define tagging function
def has_script_address(address_list):
    """
    Return True if any address in the list is marked as script=True.
    Unknown addresses default to False.
    """
    return any(addr_to_script.get(addr, False) for addr in address_list)

# Step 3: Apply to aggregated_holder_df
aggregated_holder_df["has_script_address"] = aggregated_holder_df["all_bech32_addresses"].apply(has_script_address)

# Optional: Preview the result
print(aggregated_holder_df["has_script_address"].value_counts())


has_script_address
False    9102
True      321
Name: count, dtype: int64


# Save final csv

In [28]:
aggregated_holder_df.to_csv("wrt_aggregated_holder_df_revised.csv", index=False)