# ü§ñ Machine Learning for Crypto Airdrop Fraud Detection

## Role in the Project
This notebook analyzes wallet behavior and trains machine learning models
to detect potential airdrop abuse.

## Business Function
Using engineered behavioral features, the model identifies suspicious wallets
that may be farming rewards through Sybil attacks.

## Responsibilities
- Exploratory Data Analysis (EDA)
- Behavioral pattern analysis
- Feature validation
- Model training & evaluation
- Fraud prediction insights

## Output
Predictive model and insights into key fraud signals.


## üöÄ Setup and Library Installation

This section handles the initial setup, including installing necessary Python libraries and importing them for use in the notebook. We're installing `requests` for making HTTP requests, `pandas` for data manipulation, and `tqdm` for progress bars.

In [1]:
# Install nothing fancy, just the essentials
!pip install requests pandas tqdm -q

import requests
import pandas as pd
import json
from tqdm import tqdm
from datetime import datetime
import time

print("‚úÖ Locked and loaded. Let's hunt some farmers.")

‚úÖ Locked and loaded. Let's hunt some farmers.


In [2]:
!pip install moralis



In [3]:
from moralis import evm_api
import json

api_key = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJub25jZSI6ImZmNTAzMTFhLWU5YjMtNGYzNi05MzUzLTRjZTkzNjllMzg4MSIsIm9yZ0lkIjoiNDg4MTM5IiwidXNlcklkIjoiNTAyMjMzIiwidHlwZUlkIjoiOWZkNmQwMGQtY2FhOS00MDlhLTg4NTYtMjY2MjhiOWQ3OTZjIiwidHlwZSI6IlBST0pFQ1QiLCJpYXQiOjE3NjcxNjI5ODMsImV4cCI6NDkyMjkyMjk4M30.z25lPLZbxr_zWff_UclzgZouDBefgRVTit3XBKeb6UI"

params = {
  "chain": "eth",
  "address": "0xcB1C1FdE09f811B294172696404e88E658659905"
}

result = evm_api.wallets.get_wallet_token_balances_price(
  api_key=api_key,
  params=params,
)

print(json.dumps(result, indent=4))

{
    "cursor": null,
    "page": 0,
    "page_size": 100,
    "block_number": 24131185,
    "result": [
        {
            "token_address": "0xeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee",
            "symbol": "ETH",
            "name": "Ether",
            "logo": "https://cdn.moralis.io/eth/0x.png",
            "thumbnail": "https://cdn.moralis.io/eth/0x_thumb.png",
            "decimals": 18,
            "balance": "1709495362615127",
            "possible_spam": false,
            "verified_contract": true,
            "total_supply": null,
            "total_supply_formatted": null,
            "percentage_relative_to_total_supply": null,
            "security_score": 99,
            "balance_formatted": "0.001709495362615127",
            "usd_price": 2976.3066714973143,
            "usd_price_24hr_percent_change": 0.8036163641936319,
            "usd_price_24hr_usd_change": 23.716669961299885,
            "usd_value": 5.087982452645123,
            "usd_value_24hr_usd_change":

In [4]:
import requests
import json

url = "https://deep-index.moralis.io/api/v2.2/0xcB1C1FdE09f811B294172696404e88E658659905/erc20?chain=eth"

headers = {
  "Accept": "application/json",
  "X-API-Key": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJub25jZSI6ImZmNTAzMTFhLWU5YjMtNGYzNi05MzUzLTRjZTkzNjllMzg4MSIsIm9yZ0lkIjoiNDg4MTM5IiwidXNlcklkIjoiNTAyMjMzIiwidHlwZUlkIjoiOWZkNmQwMGQtY2FhOS00MDlhLTg4NTYtMjY2MjhiOWQ3OTZjIiwidHlwZSI6IlBST0pFQ1QiLCJpYXQiOjE3NjcxNjI5ODMsImV4cCI6NDkyMjkyMjk4M30.z25lPLZbxr_zWff_UclzgZouDBefgRVTit3XBKeb6UI"
}

response = requests.request("GET", url, headers=headers)

# Parse the JSON response and pretty print it
json_data = json.loads(response.text)
print(json.dumps(json_data, indent=4))

[
    {
        "token_address": "0xfc98e825a2264d890f9a1e68ed50e1526abccacd",
        "symbol": "MCO2",
        "name": "Moss Carbon Credit",
        "logo": "https://logo.moralis.io/0x1_0xfc98e825a2264d890f9a1e68ed50e1526abccacd_07fd6700093c3960f9c3e029a4561ed9.webp",
        "thumbnail": "https://logo.moralis.io/0x1_0xfc98e825a2264d890f9a1e68ed50e1526abccacd_07fd6700093c3960f9c3e029a4561ed9.webp",
        "decimals": 18,
        "balance": "1200000000000000000",
        "possible_spam": false,
        "verified_contract": true,
        "total_supply": "2789330677756627123382104",
        "total_supply_formatted": "2789330.677756627123382104",
        "percentage_relative_to_total_supply": 4.3021073462868e-05,
        "security_score": 51
    },
    {
        "token_address": "0xae7ab96520de3a18e5e111b5eaab095312d7fe84",
        "symbol": "stETH",
        "name": "Liquid staked Ether 2.0",
        "logo": "https://logo.moralis.io/0x1_0xae7ab96520de3a18e5e111b5eaab095312d7fe84_daa6e44

In [5]:
# Your API key goes here
MORALIS_API_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJub25jZSI6ImZmNTAzMTFhLWU5YjMtNGYzNi05MzUzLTRjZTkzNjllMzg4MSIsIm9yZ0lkIjoiNDg4MTM5IiwidXNlcklkIjoiNTAyMjMzIiwidHlwZUlkIjoiOWZkNmQwMGQtY2FhOS00MDlhLTg4NTYtMjY2MjhiOWQ3OTZjIiwidHlwZSI6IlBST0pFQ1QiLCJpYXQiOjE3NjcxNjI5ODMsImV4cCI6NDkyMjkyMjk4M30.z25lPLZbxr_zWff_UclzgZouDBefgRVTit3XBKeb6UI"  # Replace with your actual key

# Test the connection
def test_moralis():
    url = "https://deep-index.moralis.io/api/v2.2/erc20/metadata"
    headers = {
        "Accept": "application/json",
        "X-API-Key": MORALIS_API_KEY
    }
    # Added a sample ERC20 token address (BUSD) to the params for metadata retrieval
    params = {"chain": "eth", "addresses": ["0x4fabb145d64652a948d72533023f6e7a623c7c53"]}

    response = requests.get(url, headers=headers, params=params)

    if response.status_code == 200:
        # For metadata, it returns a list of token metadata, not a block number
        data = response.json()
        if data:
            print(f"üéØ Connected! Retrieved metadata for {len(data)} ERC20 tokens.")
            print(f"Sample token: {data[0]['name']} ({data[0]['symbol']})")
        else:
            print("üéØ Connected, but no metadata returned for the provided address.")
        return True
    else:
        print(f"üíÄ Failed: {response.status_code} - {response.text}")
        return False

test_moralis()


üéØ Connected! Retrieved metadata for 1 ERC20 tokens.
Sample token: BUSD (BUSD)


True

In [6]:
# Uniswap token contract
UNI_TOKEN_CONTRACT = "0x1f9840a85d5aF5bf1D1762F925BDADdC4201F984"

# The airdrop distributor contract (where claims happened)
UNI_AIRDROP_CONTRACT = "0x090D4613473dEE047c3f2706764f49E0821D256e"

print(f"üéØ Target locked: UNI airdrop contract")
print(f"üìç Contract: {UNI_AIRDROP_CONTRACT}")

üéØ Target locked: UNI airdrop contract
üìç Contract: 0x090D4613473dEE047c3f2706764f49E0821D256e


In [7]:
def get_airdrop_claimers(contract_address, sample_size=100):
    """
    Get wallets that interacted with the airdrop contract.
    We'll sample because the full dataset is huge.
    """

    url = f"https://deep-index.moralis.io/api/v2.2/{contract_address}"
    headers = {
        "Accept": "application/json",
        "X-API-Key": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJub25jZSI6ImZmNTAzMTFhLWU5YjMtNGYzNi05MzUzLTRjZTkzNjllMzg4MSIsIm9yZ0lkIjoiNDg4MTM5IiwidXNlcklkIjoiNTAyMjMzIiwidHlwZUlkIjoiOWZkNmQwMGQtY2FhOS00MDlhLTg4NTYtMjY2MjhiOWQ3OTZjIiwidHlwZSI6IlBST0pFQ1QiLCJpYXQiOjE3NjcxNjI5ODMsImV4cCI6NDkyMjkyMjk4M30.z25lPLZbxr_zWff_UclzgZouDBefgRVTit3XBKeb6UI"
    }

    # Get transactions to this contract
    transactions = []
    cursor = None

    print("üîç Fetching airdrop claimers...")

    # Increased page limit to allow fetching up to 1000 transactions (10 pages * 100 limit)
    for page in range(10):
        params = {
            "chain": "eth",
            "limit": 100
        }
        if cursor:
            params["cursor"] = cursor

        response = requests.get(url, headers=headers, params=params)

        if response.status_code == 200:
            data = response.json()
            transactions.extend(data.get("result", []))
            cursor = data.get("cursor")
            print(f"  üì¶ Fetched {len(transactions)} transactions so far...")

            if not cursor or len(transactions) >= sample_size:
                break

            time.sleep(0.3)  # Rate limit respect
        else:
            print(f"‚ö†Ô∏è  Error: {response.status_code}")
            break

    # Extract unique wallet addresses
    wallets = list(set([tx["from_address"] for tx in transactions]))

    print(f"‚úÖ Found {len(wallets)} unique wallets")

    return wallets[:sample_size], transactions

# Execute
claimer_wallets, claim_txs = get_airdrop_claimers(UNI_AIRDROP_CONTRACT, sample_size=1000)
print(f"\nüéØ Locked onto {len(claimer_wallets)} wallets to analyze")

üîç Fetching airdrop claimers...
  üì¶ Fetched 100 transactions so far...
  üì¶ Fetched 200 transactions so far...
  üì¶ Fetched 300 transactions so far...
  üì¶ Fetched 400 transactions so far...
  üì¶ Fetched 500 transactions so far...
  üì¶ Fetched 600 transactions so far...
  üì¶ Fetched 700 transactions so far...
  üì¶ Fetched 800 transactions so far...
  üì¶ Fetched 900 transactions so far...
  üì¶ Fetched 1000 transactions so far...
‚úÖ Found 459 unique wallets

üéØ Locked onto 459 wallets to analyze


In [8]:
from moralis import evm_api # Import evm_api

def get_wallet_transactions(wallet_address, limit=100):
    """
    Get transaction history for a single wallet using Moralis REST API.
    """
    url = f"https://deep-index.moralis.io/api/v2.2/{wallet_address}"
    headers = {
        "Accept": "application/json",
        "X-API-Key": MORALIS_API_KEY
    }
    params = {
        "chain": "eth",
        "limit": limit
    }

    try:
        response = requests.get(url, headers=headers, params=params)
        response.raise_for_status()  # Raise an exception for HTTP errors
        data = response.json()
        return data.get("result", [])
    except requests.exceptions.RequestException as e:
        print(f"Error fetching transactions for {wallet_address}: {e}")
        return []

# Fetch transaction histories
wallet_data = {}

print("üïµÔ∏è Pulling transaction histories (this takes a few minutes)...")

for wallet in tqdm(claimer_wallets[:100]):  # Process up to 100 wallets to respect rate limits and keep execution time reasonable
    txs = get_wallet_transactions(wallet)
    wallet_data[wallet] = txs
    time.sleep(0.5)  # Respect rate limits (crucial!)

print(f"‚úÖ Collected data for {len(wallet_data)} wallets")

üïµÔ∏è Pulling transaction histories (this takes a few minutes)...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [03:02<00:00,  1.83s/it]

‚úÖ Collected data for 100 wallets





In [9]:
# Peek at one wallet's data
sample_wallet = list(wallet_data.keys())[10]
print(f"Sample wallet: {sample_wallet}")
print(f"Transaction count: {len(wallet_data[sample_wallet])}")
print(f"First transaction: {wallet_data[sample_wallet][10] if wallet_data[sample_wallet] else 'None'}")

Sample wallet: 0xd9176e84898a0054680aec3f7c056b200c3d96c3
Transaction count: 100
First transaction: {'hash': '0xabd4888592d93a6364f4a94b94c67c694c45ddafd4524e8b3d032e87bbcf0bc0', 'nonce': '1138', 'transaction_index': '157', 'from_address_entity': None, 'from_address_entity_logo': None, 'from_address': '0xd9176e84898a0054680aec3f7c056b200c3d96c3', 'from_address_label': None, 'to_address_entity': None, 'to_address_entity_logo': None, 'to_address': '0x3014ca10b91cb3d0ad85fef7a3cb95bcac9c0f79', 'to_address_label': 'Fuse: Native Bridge', 'value': '0', 'gas': '231134', 'gas_price': '25000000000', 'input': '0x232a2c1d000000000000000000000000000000000000000000000000000000000000008000000000000000000000000000000000000000000000000000000000000000c000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000140000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000000

In [10]:
def extract_features(wallet_address, transactions):
    """
    Turn transaction history into fraud signals.
    """

    if not transactions:
        return None

    features = {
        "wallet": wallet_address,
        "total_txs": len(transactions),
        "unique_contracts": len(set([tx.get("to_address", "") for tx in transactions if tx.get("to_address")])),
        "total_value_eth": sum([float(tx.get("value", 0)) / 1e18 for tx in transactions]),
        "gas_spent_eth": sum([
            float(tx.get("gas_price", 0)) * float(tx.get("receipt_gas_used", 0)) / 1e18
            for tx in transactions if tx.get("gas_price") and tx.get("receipt_gas_used")
        ]),
    }

    # Temporal features
    timestamps = [tx.get("block_timestamp") for tx in transactions if tx.get("block_timestamp")]
    if timestamps:
        timestamps.sort()
        features["first_tx_date"] = timestamps[0]
        features["last_tx_date"] = timestamps[-1]

        # Wallet age in days
        first = datetime.fromisoformat(timestamps[0].replace("Z", "+00:00"))
        last = datetime.fromisoformat(timestamps[-1].replace("Z", "+00:00"))
        features["wallet_age_days"] = (last - first).days
    else:
        features["wallet_age_days"] = 0

    # Activity patterns
    features["avg_tx_value"] = features["total_value_eth"] / features["total_txs"] if features["total_txs"] > 0 else 0
    features["tx_diversity"] = features["unique_contracts"] / features["total_txs"] if features["total_txs"] > 0 else 0

    return features

# Build feature dataset
feature_list = []

print("üîß Engineering features...")

for wallet, txs in tqdm(wallet_data.items()):
    feats = extract_features(wallet, txs)
    if feats:
        feature_list.append(feats)

df = pd.DataFrame(feature_list)
print(f"‚úÖ Feature matrix ready: {df.shape}")
df.head()

üîß Engineering features...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:00<00:00, 5102.37it/s]

‚úÖ Feature matrix ready: (100, 10)





Unnamed: 0,wallet,total_txs,unique_contracts,total_value_eth,gas_spent_eth,first_tx_date,last_tx_date,wallet_age_days,avg_tx_value,tx_diversity
0,0x9feb86622846d2567b6b8b83306203ef9eafd7f2,52,19,13.727778,0.116447,2020-07-31T00:55:35.000Z,2024-12-07T21:27:47.000Z,1590,0.263996,0.365385
1,0xc35d6adb365d9a4418f8386fa522722e61dc7a42,43,19,0.167111,0.022665,2019-07-08T18:46:28.000Z,2025-12-22T00:23:23.000Z,2358,0.003886,0.44186
2,0x247241f1d7477b9de10e95f12e8b7850c48485ca,18,8,0.081995,0.007185,2020-05-12T06:35:42.000Z,2025-12-24T17:49:35.000Z,2052,0.004555,0.444444
3,0xfc8cdc227a0bd99c0fc9b273a929969a86f9b21e,11,4,0.052988,0.00097,2025-04-17T16:27:35.000Z,2025-04-26T09:53:11.000Z,8,0.004817,0.363636
4,0xd9744dd06a705f5360ab530bcbfb018699f09bcd,80,17,2.138718,0.223436,2020-08-04T12:30:33.000Z,2025-11-03T13:16:11.000Z,1917,0.026734,0.2125


In [11]:
def label_suspicious(row):
    """
    Rule-based labeling: if wallet hits 3+ red flags, it's suspicious.
    """

    red_flags = 0
    # total transactions
    if row["total_txs"] < 20:
        red_flags += 1

    # wallet age
    if row["wallet_age_days"] < 30:
        red_flags += 1

    # transaction diversity
    if row["tx_diversity"] < 0.3:
        red_flags += 1

    # gas spent
    if row["gas_spent_eth"] < 0.05:
        red_flags += 1

    return 1 if red_flags >= 3 else 0

df["is_suspicious"] = df.apply(label_suspicious, axis=1)

print("üö© Suspicious wallet distribution:")
print(df["is_suspicious"].value_counts())
print(f"\nüìä Fraud rate: {df['is_suspicious'].mean():.1%}")

üö© Suspicious wallet distribution:
is_suspicious
0    93
1     7
Name: count, dtype: int64

üìä Fraud rate: 7.0%


In [12]:
print("=" * 60)
print("üü¢ CLEAN WALLET EXAMPLES")
print("=" * 60)
print(df[df["is_suspicious"] == 0][["wallet", "total_txs", "wallet_age_days", "tx_diversity", "gas_spent_eth"]].head(3))

print("\n" + "=" * 60)
print("üî¥ SUSPICIOUS WALLET EXAMPLES")
print("=" * 60)
print(df[df["is_suspicious"] == 1][["wallet", "total_txs", "wallet_age_days", "tx_diversity", "gas_spent_eth"]].head(3))

üü¢ CLEAN WALLET EXAMPLES
                                       wallet  total_txs  wallet_age_days  \
0  0x9feb86622846d2567b6b8b83306203ef9eafd7f2         52             1590   
1  0xc35d6adb365d9a4418f8386fa522722e61dc7a42         43             2358   
2  0x247241f1d7477b9de10e95f12e8b7850c48485ca         18             2052   

   tx_diversity  gas_spent_eth  
0      0.365385       0.116447  
1      0.441860       0.022665  
2      0.444444       0.007185  

üî¥ SUSPICIOUS WALLET EXAMPLES
                                        wallet  total_txs  wallet_age_days  \
3   0xfc8cdc227a0bd99c0fc9b273a929969a86f9b21e         11                8   
14  0xc4b8a2f28c24c6e63c5865e9543ee22c52897405          7                3   
30  0x6ca17e9e0caa2f2c24f918a145ef7521eba0477b          5               11   

    tx_diversity  gas_spent_eth  
3       0.363636       0.000970  
14      0.571429       0.000390  
30      1.000000       0.003853  


In [13]:
# Save to CSV
df.to_csv("airdrop_fraud_features_v1.csv", index=False)

print("üíæ Saved to airdrop_fraud_features_v1.csv")
print(f"üìä Final dataset: {df.shape[0]} wallets, {df.shape[1]} features")
print(f"üö© Suspicious wallets: {df['is_suspicious'].sum()}")

üíæ Saved to airdrop_fraud_features_v1.csv
üìä Final dataset: 100 wallets, 11 features
üö© Suspicious wallets: 7
