### Only supports EVM chains for now.

In [15]:
from web3 import Web3
from dotenv import load_dotenv
import os
import json
import requests
import json
from datetime import datetime, timedelta
import math

# Load environment variables from .env file
load_dotenv()

blacklisted_wallets = [
    "0xdead000000000000000042069420694206942069",
    "0x0000000000000000000000000000000000000000",
]

web3 = Web3(Web3.HTTPProvider(os.getenv("ALCHEMY_ETHEREUM_RPC_URL")))

with open("abis/ERC20.json", "r") as f:
    ERC20_ABI = json.load(f)

SHIB_INU = "0x95aD61b0a150d79219dCF64E1E6Cc01f0B64C4cE"

### General Methods

In [4]:
def is_contract_address(address: str) -> bool:
    """
    Check if an address is a contract address

    Args:
        address: Ethereum address to check

    Returns:
        bool: True if contract address, False if EOA (externally owned account)
    """
    # First verify it's a valid address
    if not web3.isAddress(address):
        return False

    # Get the code at the address
    code = web3.eth.get_code(web3.toChecksumAddress(address))

    # If there's code at the address, it's a contract
    # If no code (b'0x' or empty bytes), it's an EOA
    return code != b"" and code != b"0x"


def get_bitquery_access_token() -> str:
    url = "https://oauth2.bitquery.io/oauth2/token"

    payload = f'grant_type=client_credentials&client_id={os.getenv("BITQUERY_CLIENT_ID")}&client_secret={os.getenv("BITQUERY_CLIENT_SECRET")}&scope=api'

    headers = {"Content-Type": "application/x-www-form-urlencoded"}

    response = requests.request("POST", url, headers=headers, data=payload)
    resp = json.loads(response.text)

    return resp["access_token"]


def get_token_holders(token_address: str, limit: int = 10) -> dict:
    """
    Get top token holders for a given token address using BitQuery API

    Args:
        token_address: Ethereum token contract address
        limit: Number of top holders to return (default 10)

    Returns:
        dict: Response containing token holder data
    """

    access_token = get_bitquery_access_token()
    url = "https://streaming.bitquery.io/graphql"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {access_token}",
    }

    payload = json.dumps(
        {
            "query": """
        {
      EVM(network: eth, dataset: archive) {
        TokenHolders(
          date: "%s"
          tokenSmartContract: "%s"
          limit: {count: %d}
          orderBy: {descendingByField: "Balance_Amount"}
        ) {
          Balance {
            Amount
          }
          Holder {
            Address
          }
        }
      }
    }
        """
            % (datetime.now().strftime("%Y-%m-%d"), token_address, limit)
        }
    )

    try:
        response = requests.post(url, headers=headers, data=payload)
        holders = response.json()["data"]["EVM"]["TokenHolders"]
        return [
            {
                "balance": float(h["Balance"]["Amount"]),
                "address": h["Holder"]["Address"],
            }
            for h in holders
        ]
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        return None


def assert_token_address(token_address: str):
    assert web3.isAddress(token_address), "Invalid token address"


## Whale Monitoring

In [18]:
def get_recent_transfers(token_address: str, wallet_address: str, days: int = 7):
    """
    Get the transfers of a wallet for a given token in the last n days

    Args:
        token_address: The token contract address
        wallet_address: The wallet address to get transfers for
        days: The number of days to get transfers for

    Returns:
        list: A list of transfers
    """
    assert_token_address(token_address)
    assert_token_address(wallet_address)

    # Create contract instance
    token_contract = web3.eth.contract(address=token_address, abi=ERC20_ABI)

    # Get current block
    current_block = web3.eth.block_number

    # Calculate block from n days ago (assuming 12.07 seconds per block)
    blocks_per_day = 24 * 60 * 60 // 12.07
    from_block = current_block - int(blocks_per_day * days)

    try:
        # Get transfers FROM the wallet
        outgoing_filter = token_contract.events.Transfer.createFilter(
            fromBlock=from_block,
            toBlock="latest",
            argument_filters={"from": wallet_address},
        )
        outgoing_events = outgoing_filter.get_all_entries()

        # Get transfers TO the wallet
        incoming_filter = token_contract.events.Transfer.createFilter(
            fromBlock=from_block,
            toBlock="latest",
            argument_filters={"to": wallet_address},
        )
        incoming_events = incoming_filter.get_all_entries()

        # Combine and format transfers
        all_transfers = []

        for event in outgoing_events + incoming_events:
            transfer = {
                "blockNumber": event["blockNumber"],
                "transactionHash": event["transactionHash"].hex(),
                "from": event["args"]["from"],
                "to": event["args"]["to"],
                "value": event["args"]["value"],
                "type": (
                    "out"
                    if event["args"]["from"].lower() == wallet_address.lower()
                    else "in"
                ),
            }
            all_transfers.append(transfer)

        # Sort by block number (most recent first)
        all_transfers.sort(key=lambda x: x["blockNumber"], reverse=True)

        return all_transfers

    except Exception as e:
        print(f"Error fetching transfers: {str(e)}")
        return []


def monitor_whale_wallets(token_address: str, days: int = 7):
    """
    Monitor whale wallets for a given token, analyzing their balances, transfers, and activity patterns
    
    Args:
        token_address (str): The token contract address to analyze
        days (int, optional): Number of days to look back for transfer history. Defaults to 7
        
    Returns:
        dict: A dictionary containing detailed whale monitoring data with the following structure:
            {
                'token_info': {
                    'address': str,         # The token's contract address
                    'symbol': str,          # The token's symbol (e.g., 'SHIB', 'UNI')
                    'total_supply': float,  # Total token supply adjusted for decimals
                    'whale_threshold': float # Minimum balance to be considered a whale (1% of supply)
                },
                
                'analysis_timeframe': str,  # Description of analysis period (e.g., "Last 7 days")
                
                'whales': [                 # List of whale wallet data
                    {
                        'address': str,     # Whale's wallet address
                        'current_balance': float,  # Current token balance
                        'percentage_of_supply': float,  # Percentage of total supply held
                        
                        'transfer_activity': {
                            'total_transfers': int,    # Total number of transfers
                            'outgoing_transfers': int, # Number of outgoing transfers
                            'incoming_transfers': int, # Number of incoming transfers
                            'total_outgoing_amount': float,  # Total tokens sent
                            'total_incoming_amount': float,  # Total tokens received
                            'net_flow': float,        # Net token flow (incoming - outgoing)
                        },
                        
                        'recent_transfers': [         # List of recent transfers
                            {
                                'block_number': int,  # Block number of transfer
                                'transaction_hash': str,  # Transaction hash
                                'type': str,         # 'in' or 'out'
                                'counterparty': str, # Address of sender/receiver
                                'amount': float,     # Transfer amount
                                'direction': str     # 'incoming' or 'outgoing'
                            },
                            ...
                        ]
                    },
                    ...
                ]
            }
            
            If there's an error:
            {
                'error': str,  # Error message
                'token_address': str  # The token address that was analyzed
            }
    
    Example:
        >>> data = monitor_whale_wallets("0x95aD61b0a150d79219dCF64E1E6Cc01f0B64C4cE")
        >>> print(f"Number of whales: {len(data['whales'])}")
        >>> print(f"Largest whale balance: {data['whales'][0]['current_balance']}")
        >>> print(f"Recent activity: {data['whales'][0]['transfer_activity']}")
    
    Notes:
        - Whale Definition:
          * Minimum 1% of total token supply
          * Must be an EOA (externally owned account), not a contract
          * Not in blacklisted addresses
        
        - Transfer Activity Analysis:
          * Tracks both incoming and outgoing transfers
          * Calculates net token flow
          * Identifies patterns of accumulation or distribution
        
        - Important Metrics:
          * percentage_of_supply: Individual whale's market control
          * net_flow: Whether whale is accumulating or distributing
          * transfer_activity: Overall trading behavior
        
        - Blacklisted addresses include:
          * Dead addresses (0xdead...)
          * Zero address (0x0000...)
          * Known burn addresses
    """
    assert_token_address(token_address)
    token_contract = web3.eth.contract(address=token_address, abi=ERC20_ABI)

    # Get token metadata
    decimals = token_contract.functions.decimals().call()
    total_supply = token_contract.functions.totalSupply().call() / (10**decimals)
    token_symbol = token_contract.functions.symbol().call()

    # Define whale threshold (>1% of supply)
    MIN_WHALE_HOLDINGS = total_supply * 0.01

    # Get all token holders
    holders = get_token_holders(token_address, 25)

    # Filter holders and get their data
    whales_data = {
        "token_info": {
            "address": token_address,
            "symbol": token_symbol,
            "total_supply": total_supply,
            "whale_threshold": MIN_WHALE_HOLDINGS,
        },
        "analysis_timeframe": f"Last {days} days",
        "whales": [],
    }

    # Filter and process whale data
    for holder in holders:
        if (
            holder["balance"] > MIN_WHALE_HOLDINGS
            and not is_contract_address(holder["address"])
            and holder["address"] not in blacklisted_wallets
        ):

            # Get recent transfers for this whale
            recent_transfers = get_recent_transfers(
                token_address, holder["address"], days
            )

            # Calculate transfer statistics
            outgoing_transfers = [t for t in recent_transfers if t["type"] == "out"]
            incoming_transfers = [t for t in recent_transfers if t["type"] == "in"]

            total_outgoing = sum(t["value"] for t in outgoing_transfers) / (
                10**decimals
            )
            total_incoming = sum(t["value"] for t in incoming_transfers) / (
                10**decimals
            )

            whale_data = {
                "address": holder["address"],
                "current_balance": holder["balance"],
                "percentage_of_supply": (holder["balance"] / total_supply) * 100,
                "transfer_activity": {
                    "total_transfers": len(recent_transfers),
                    "outgoing_transfers": len(outgoing_transfers),
                    "incoming_transfers": len(incoming_transfers),
                    "total_outgoing_amount": total_outgoing,
                    "total_incoming_amount": total_incoming,
                    "net_flow": total_incoming - total_outgoing,
                },
                "recent_transfers": [
                    {
                        "block_number": transfer["blockNumber"],
                        "transaction_hash": transfer["transactionHash"],
                        "type": transfer["type"],
                        "counterparty": (
                            transfer["to"]
                            if transfer["type"] == "out"
                            else transfer["from"]
                        ),
                        "amount": transfer["value"] / (10**decimals),
                        "direction": (
                            "outgoing" if transfer["type"] == "out" else "incoming"
                        ),
                    }
                    for transfer in recent_transfers
                ],
            }

            whales_data["whales"].append(whale_data)

    # Sort whales by balance
    whales_data["whales"].sort(key=lambda x: x["current_balance"], reverse=True)

    return whales_data


## Token Distribution Analysis

In [16]:
def calculate_gini_coefficient(holders: list) -> float:
    """
    Calculate the Gini coefficient for token distribution
    A measure of inequality where 0 = perfect equality and 1 = perfect inequality
    
    Args:
        holders: List of dictionaries containing holder balances
        
    Returns:
        float: Gini coefficient between 0 and 1
    """
    if not holders:
        return 0
    
    # Sort balances in ascending order
    balances = sorted([holder['balance'] for holder in holders])
    n = len(balances)
    
    # Calculate cumulative sum
    cumsum = [sum(balances[0:i+1]) for i in range(n)]
    total = cumsum[-1]
    
    # Calculate Gini coefficient using the formula
    gini = (n + 1 - 2 * sum([(n - i) * balance / total for i, balance in enumerate(balances)])) / n
    
    return max(0, min(1, gini))  # Ensure result is between 0 and 1


def calculate_nakamoto_coefficient(holders: list, total_supply: float) -> int:
    """
    Calculate the Nakamoto Coefficient - the minimum number of entities required to reach 51% of token supply
    
    Args:
        holders: List of dictionaries containing holder balances
        total_supply: Total token supply
        
    Returns:
        int: Nakamoto coefficient (number of holders needed for 51% control)
    """
    if not holders:
        return 0
        
    cumulative_percentage = 0
    for i, holder in enumerate(sorted(holders, key=lambda x: x['balance'], reverse=True)):
        cumulative_percentage += (holder['balance'] / total_supply) * 100
        if cumulative_percentage >= 51:
            return i + 1
    return len(holders)

def calculate_theil_index(holders: list) -> float:
    """
    Calculate the Theil Index - a measure of economic inequality
    The index ranges from 0 (perfect equality) to ln(n) (perfect inequality)
    
    Args:
        holders: List of dictionaries containing holder balances
        
    Returns:
        float: Theil index value
    """
    if not holders:
        return 0
        
    balances = [holder['balance'] for holder in holders]
    n = len(balances)
    
    if n == 1:
        return 0
        
    # Calculate mean balance
    mean_balance = sum(balances) / n
    
    # Calculate Theil index
    theil = 0
    for balance in balances:
        if balance > 0:  # Avoid log(0)
            ratio = balance / mean_balance
            theil += (ratio * math.log(ratio)) / n
            
    return max(0, theil)  # Ensure non-negative


# potential method to use later. Not using it now since its too slow
def get_new_holders_24h(token_address: str, max_transfers: int = 100) -> int:
    """
    Get the number of new token holders in the last 24 hours using web3.py events
    
    Args:
        token_address (str): The token contract address
        max_transfers (int, optional): Maximum number of recent transfers to analyze. Defaults to 100.
        
    Returns:
        int: Number of new holders in last 24 hours
    """
    assert_token_address(token_address)
    
    # Create contract instance
    token_contract = web3.eth.contract(address=token_address, abi=ERC20_ABI)
    
    # Calculate block from 24 hours ago (assuming 12 seconds per block)
    current_block = web3.eth.block_number
    blocks_per_day = 24 * 60 * 60 // 12  # Approximately 7200 blocks per day
    from_block = current_block - blocks_per_day
    
    try:
        # Create filter for Transfer events
        transfer_filter = token_contract.events.Transfer().createFilter(
            fromBlock=from_block,
            toBlock='latest'
        )
        
        # Get all transfer events
        transfers = transfer_filter.get_all_entries()
        
        # Track potential new holders and checked addresses
        potential_new_holders = set()
        checked_addresses = set()  # Track addresses we've already checked
        
        # Get last N transfers (most recent first)
        for transfer in sorted(transfers, key=lambda x: x['blockNumber'], reverse=True)[:max_transfers]:
            receiver = transfer['args']['to']
            
            # Skip if we've already checked this address
            if receiver.lower() in checked_addresses:
                continue
                
            # Skip if receiver is a contract 
            if (is_contract_address(receiver)):
                checked_addresses.add(receiver.lower())  # Mark as checked
                continue
                
            # Check if this address had any balance before this transfer
            try:
                # Get block number just before this transfer
                prev_block = transfer['blockNumber'] - 1
                
                # Check balance at previous block
                prev_balance = token_contract.functions.balanceOf(receiver).call(
                    block_identifier=prev_block
                )
                
                # Mark address as checked
                checked_addresses.add(receiver.lower())
                
                # If balance was 0 before this transfer, it's a new holder
                if prev_balance == 0:
                    potential_new_holders.add(receiver.lower())
                    
            except Exception as e:
                print(f"Error checking previous balance: {str(e)}")
                continue
        
        return len(potential_new_holders)
        
    except Exception as e:
        print(f"Error fetching new holders: {str(e)}")
        return 0

In [17]:
def analyze_token_distribution(token_address: str) -> dict:
    """
    Analyze the token distribution metrics including Gini coefficient and holder concentration
    
    Args:
        token_address (str): The token contract address to analyze
        
    Returns:
        dict: A dictionary containing detailed token distribution analysis with the following structure:
            {
                'token_info': {
                    'address': str,  # The token's contract address
                    'symbol': str,   # The token's symbol (e.g., 'SHIB', 'UNI')
                    'total_supply': float  # Total token supply adjusted for decimals
                },
                'distribution_metrics': {
                    'gini_coefficient': float,  # Value between 0-1, where:
                                              # 0 = perfect equality
                                              # 1 = perfect inequality
                                              # Higher values indicate more concentrated ownership
                    
                    'top_holder_concentration': {
                        'top_1_percentage': float,  # Percentage of supply held by largest holder
                        'top_5_percentage': float,  # Percentage of supply held by top 5 holders
                        'top_10_percentage': float  # Percentage of supply held by top 10 holders
                    },

                    'concentration_metrics': {
                        'nakamoto_interpretation': str,  # Interpretation of Nakamoto coefficient
                        'theil_interpretation': str      # Interpretation of Theil index
                    },
                    
                    'holder_statistics': {
                        'total_holders_analyzed': int,  # Number of holders analyzed (max 100)
                        'non_contract_holders': int,    # Number of holders that are EOAs (not contracts)
                    }
                },
                'timestamp': str  # ISO format timestamp of when analysis was performed
            }
            
            If there's an error fetching data:
            {
                'error': str,  # Error message
                'token_address': str  # The token address that was analyzed
            }
            
    Example:
        >>> data = analyze_token_distribution("0x95aD61b0a150d79219dCF64E1E6Cc01f0B64C4cE")
        >>> print(data['distribution_metrics']['gini_coefficient'])
        0.8411298486981609
        >>> print(data['distribution_metrics']['top_holder_concentration']['top_1_percentage'])
        41.04368089149804
    
    Notes:
        - Gini coefficient interpretation:
          * 0.0-0.3: Low concentration
          * 0.3-0.6: Moderate concentration
          * 0.6-0.8: High concentration
          * 0.8-1.0: Very high concentration
        
        - Top holder concentration interpretation:
          * >50% by top 1: Potential centralization risk
          * >70% by top 10: High centralization
          
        - Non-contract holders ratio can indicate institutional vs retail ownership
    """
    assert_token_address(token_address)
    
    # Create contract instance
    token_contract = web3.eth.contract(address=token_address, abi=ERC20_ABI)
    
    # Get token metadata
    decimals = token_contract.functions.decimals().call()
    total_supply = token_contract.functions.totalSupply().call() / (10**decimals)
    token_symbol = token_contract.functions.symbol().call()
    
    # Get all holders (limited to top 100 for practical purposes)
    holders = get_token_holders(token_address, 100)
    if not holders:
        return {
            'error': 'Failed to fetch holder data',
            'token_address': token_address
        }
    
    # Calculate Gini coefficient
    gini = calculate_gini_coefficient(holders)
    
    # Calculate top holder concentrations
    top_10_holders = holders[:10] if len(holders) >= 10 else holders
    top_10_balance = sum(holder['balance'] for holder in top_10_holders)
    top_10_percentage = (top_10_balance / total_supply) * 100
    
    # Calculate other concentration metrics
    top_1_percentage = (holders[0]['balance'] / total_supply * 100) if holders else 0
    top_5_percentage = (sum(h['balance'] for h in holders[:5]) / total_supply * 100) if len(holders) >= 5 else 0
    
    # Count non-contract holders
    non_contract_holders = sum(1 for holder in holders if not is_contract_address(holder['address']))

    # Calculate additional concentration metrics
    nakamoto_coef = calculate_nakamoto_coefficient(holders, total_supply)
    theil_index = calculate_theil_index(holders)
    
    return {
        'token_info': {
            'address': token_address,
            'symbol': token_symbol,
            'total_supply': total_supply
        },
        'distribution_metrics': {
            'gini_coefficient': gini,
            'top_holder_concentration': {
                'top_1_percentage': top_1_percentage,
                'top_5_percentage': top_5_percentage,
                'top_10_percentage': top_10_percentage
            },
            'concentration_metrics': {
                'nakamoto_interpretation': 'High centralization' if nakamoto_coef < 4 else 
                                        'Moderate centralization' if nakamoto_coef < 10 else 
                                        'Decentralized',
                'theil_interpretation': 'High inequality' if theil_index > 1 else
                                      'Moderate inequality' if theil_index > 0.5 else
                                      'Low inequality'
            },
            'holder_statistics': {
                'total_holders_analyzed': len(holders),
                'non_contract_holders': non_contract_holders
            }
        },
        'timestamp': datetime.now().isoformat()
    }

In [28]:
analyze_token_distribution(SHIB_INU)

{'token_info': {'address': '0x95aD61b0a150d79219dCF64E1E6Cc01f0B64C4cE',
  'symbol': 'SHIB',
  'total_supply': 999982339482145.2},
 'distribution_metrics': {'gini_coefficient': 0.8411298486981609,
  'top_holder_concentration': {'top_1_percentage': 41.04368089149804,
   'top_5_percentage': 57.20194959238959,
   'top_10_percentage': 61.413550410113594},
  'concentration_metrics': {'nakamoto_interpretation': 'Moderate centralization',
   'theil_interpretation': 'High inequality'},
  'holder_statistics': {'total_holders_analyzed': 100,
   'non_contract_holders': 91}},
 'timestamp': '2025-02-16T18:15:28.114920'}

## Volume Pattern Analysis