In [None]:
#!/usr/bin/env python3

from dotenv import load_dotenv
import os

import re, sys, time, json, warnings
from datetime import datetime, timezone
from collections import defaultdict
import numpy as np
import pandas as pd
import requests
from sklearn.preprocessing import RobustScaler, MaxAbsScaler

load_dotenv()
API_KEY = os.getenv("COVALENT_API_KEY")
CHAIN_ID  = "1"                          
PAGE_SIZE = 100
RATE_WAIT = 0.15                         
WALLETS_CSV = "wallets.csv"

TOKENLIST_URL = "https://raw.githubusercontent.com/compound-finance/token-list/master/compound.tokenlist.json"

EVENT_MAP = {
    "mint": "supply",       "supply": "withdraw",   "withdraw": "withdraw",
    "borrow": "borrow",
    "repayborrow": "repay", "repay": "repay",
    "liquidateborrow": "liquidate", "absorb": "liquidate",
}

FEATURE_WEIGHTS = {
    "borrow_supply_ratio": 25.0,
    "liquidation_cnt": 20.0,
    "total_liquidated_usd": 18.0,
    "net_borrowed_usd": 15.0,
    "days_since_last_interaction": 12.0,
    "max_single_action_usd": 10.0,
    "compound_risk_flag": 8.0,
    "exposure_tier": 7.0,
    "repayment_ratio": -10.0,
    "total_supplied_usd": -8.0,
    "account_age_days": -6.0,
    "interaction_frequency": -5.0,
}

ALL_FEATURES = [
    "total_supplied_usd", "total_borrowed_usd", "total_repaid_usd", 
    "total_liquidated_usd", "liquidation_cnt", "borrow_supply_ratio",
    "repayment_ratio", "net_borrowed_usd", "account_age_days",
    "days_since_last_interaction", "max_single_action_usd", 
    "interaction_frequency", "compound_risk_flag", "exposure_tier"
]

ETH_RE = re.compile(r"^0x[a-fA-F0-9]{40}$")

def is_eth(addr: str) -> bool:
    return isinstance(addr, str) and bool(ETH_RE.match(addr.strip()))

def load_wallets(csv_path=WALLETS_CSV):
    try:
        df = pd.read_csv(csv_path, header=None)
        col = df.iloc[:, 0].astype(str).str.strip()
        
        if not is_eth(col.iloc[0]):
            col = col.iloc[1:]
        
        wallets = col[col.apply(is_eth)].str.lower().unique().tolist()
        
        if not wallets:
            sys.exit("No valid Ethereum addresses found in wallets.csv")
        
        return wallets
        
    except FileNotFoundError:
        sys.exit(f"File '{csv_path}' not found")
    except Exception as e:
        sys.exit(f"Error reading wallets.csv: {e}")

def refresh_compound_contracts():
    try:
        response = requests.get(TOKENLIST_URL, timeout=20)
        response.raise_for_status()
        
        tokens = response.json()["tokens"]
        ethereum_tokens = [t for t in tokens if t["chainId"] == 1]
        addresses = {t["address"].lower() for t in ethereum_tokens}
        
        addresses.update({
            "0x3d9819210a31b4961b30ef54be2aed79b9c9cd3b",  # Comptroller
            "0xc00e94cb662c3520282e6f5717214004a7f26888",  # COMP token
        })
        
        return addresses
        
    except Exception:
        return {
            "0x3d9819210a31b4961b30ef54be2aed79b9c9cd3b",  # Comptroller
            "0xc00e94cb662c3520282e6f5717214004a7f26888",  # COMP
            "0x4ddc2d193948926d02f9b1fe9e1daa0718270ed5",  # cETH
            "0x5d3a536e4d6dbd6114cc1ead35777bab948e3643",  # cDAI
            "0x39aa39c021dfbae8fac545936693ac917d5e7563",  # cUSDC
            "0x6c8c6b02e7b2be14d4fa6022dfd6dbe0a5d3d6d3",  # cBAT
            "0xf5dce57282a584d2746faf1593d3121fcac444dc",  # cUSDT
        }

COMPOUND_CONTRACTS = refresh_compound_contracts()

def fetch_all_transactions(wallet: str):
    transactions, page = [], 0
    max_retries = 3
    
    while True:
        url = f"https://api.covalenthq.com/v1/{CHAIN_ID}/address/{wallet}/transactions_v2/"
        params = {"key": API_KEY, "page-size": PAGE_SIZE, "page-number": page}
        
        for attempt in range(max_retries):
            try:
                response = requests.get(url, params=params, timeout=30)
                response.raise_for_status()
                
                data = response.json()
                if not data or "data" not in data:
                    return transactions
                    
                items = data["data"].get("items", [])
                break
                
            except requests.RequestException:
                if attempt == max_retries - 1:
                    return transactions
                time.sleep(2 ** attempt)
        else:
            return transactions
            
        if not items:
            break
            
        transactions.extend(items)
        if len(items) < PAGE_SIZE:
            break
            
        page += 1
        time.sleep(RATE_WAIT)
        
    return transactions

def safe_float(value, default=0.0):
    try:
        return float(value) if value is not None else default
    except (ValueError, TypeError):
        return default

def safe_get_params(decoded_event):
    if not isinstance(decoded_event, dict):
        return []
    
    params = decoded_event.get("params")
    if params is None or not isinstance(params, list):
        return []
    
    return params

def extract_wallet_features(wallet: str, transactions: list) -> dict:
    features = {feature: 0.0 for feature in ALL_FEATURES}
    features["wallet_id"] = wallet
    
    if not transactions:
        return features
    
    now_timestamp = datetime.now(timezone.utc).timestamp()
    timestamps, usd_values = [], []
    
    try:
        for tx in transactions:
            to_address = (tx.get("to_address") or "").lower()
            log_events = tx.get("log_events", [])
            
            is_compound_tx = (
                to_address in COMPOUND_CONTRACTS or
                any((event.get("sender_address") or "").lower() in COMPOUND_CONTRACTS
                    for event in log_events if isinstance(event, dict))
            )
            
            if not is_compound_tx:
                continue
            
            try:
                tx_timestamp = pd.to_datetime(tx["block_signed_at"]).timestamp()
                timestamps.append(tx_timestamp)
            except (KeyError, TypeError, ValueError):
                continue
            
            for event in log_events:
                if not isinstance(event, dict):
                    continue
                
                sender = (event.get("sender_address") or "").lower()
                if sender not in COMPOUND_CONTRACTS:
                    continue
                
                decoded = event.get("decoded")
                if not isinstance(decoded, dict):
                    continue
                
                event_name = decoded.get("name", "").lower()
                action_type = EVENT_MAP.get(event_name)
                if not action_type:
                    continue
                
                usd_value = safe_float(event.get("value_quote"))
                
                if usd_value == 0:
                    params = safe_get_params(decoded)
                    for param in params:
                        if not isinstance(param, dict):
                            continue
                        param_name = param.get("name", "").lower()
                        if param_name in ("amount", "seizetokens", "borrowamount", "repayamount"):
                            usd_value = safe_float(param.get("value_quote"))
                            if usd_value > 0:
                                break
                
                usd_values.append(usd_value)
                
                if action_type == "supply":
                    features["total_supplied_usd"] += usd_value
                elif action_type == "borrow":
                    features["total_borrowed_usd"] += usd_value
                    features["max_single_action_usd"] = max(
                        features["max_single_action_usd"], usd_value
                    )
                elif action_type == "repay":
                    features["total_repaid_usd"] += usd_value
                elif action_type == "liquidate":
                    features["total_liquidated_usd"] += usd_value
                    features["liquidation_cnt"] += 1
        
        if not timestamps:
            return features
        
        first_timestamp = min(timestamps)
        last_timestamp = max(timestamps)
        
        features["account_age_days"] = (now_timestamp - first_timestamp) / 86400
        features["days_since_last_interaction"] = (now_timestamp - last_timestamp) / 86400
        features["interaction_frequency"] = len(timestamps) / max(features["account_age_days"], 1)
        
        features["borrow_supply_ratio"] = (
            features["total_borrowed_usd"] / (features["total_supplied_usd"] + 1e-9)
        )
        features["repayment_ratio"] = (
            features["total_repaid_usd"] / (features["total_borrowed_usd"] + 1e-9)
        )
        features["net_borrowed_usd"] = max(0, 
            features["total_borrowed_usd"] - features["total_repaid_usd"]
        )
        
        features["compound_risk_flag"] = (
            (features["liquidation_cnt"] > 0) * 3 +
            (features["borrow_supply_ratio"] > 0.8) * 2 +
            (features["repayment_ratio"] < 0.5) * 2 +
            (features["days_since_last_interaction"] > 365) * 1
        )
        
        log_borrowed = np.log1p(features["total_borrowed_usd"])
        if log_borrowed > 15:
            features["exposure_tier"] = 4
        elif log_borrowed > 10:
            features["exposure_tier"] = 3
        elif log_borrowed > 5:
            features["exposure_tier"] = 2
        elif log_borrowed > 0:
            features["exposure_tier"] = 1
        else:
            features["exposure_tier"] = 0
        
    except Exception:
        pass
    
    return features

def advanced_feature_scaling(dataframe):
    scaled_df = dataframe.copy()
    
    usd_features = [
        "total_supplied_usd", "total_borrowed_usd", "total_liquidated_usd", 
        "net_borrowed_usd", "max_single_action_usd"
    ]
    
    for feature in usd_features:
        if feature in dataframe.columns and dataframe[feature].sum() > 0:
            log_values = np.log1p(dataframe[feature])
            scaler = RobustScaler()
            scaled_values = scaler.fit_transform(log_values.values.reshape(-1, 1)).flatten()
            scaled_df[f"{feature}_scaled"] = np.clip(scaled_values * 20 + 50, 0, 100)
        else:
            scaled_df[f"{feature}_scaled"] = 0
    
    count_features = ["liquidation_cnt"]
    for feature in count_features:
        if feature in dataframe.columns and dataframe[feature].sum() > 0:
            scaler = MaxAbsScaler()
            scaled_values = scaler.fit_transform(dataframe[feature].values.reshape(-1, 1)).flatten()
            scaled_df[f"{feature}_scaled"] = scaled_values * 100
        else:
            scaled_df[f"{feature}_scaled"] = 0
    
    ratio_features = ["borrow_supply_ratio", "repayment_ratio"]
    for feature in ratio_features:
        if feature in dataframe.columns:
            scaled_df[f"{feature}_scaled"] = dataframe[feature].rank(pct=True) * 100
        else:
            scaled_df[f"{feature}_scaled"] = 0
    
    time_features = ["account_age_days", "days_since_last_interaction", "interaction_frequency"]
    for feature in time_features:
        if feature in dataframe.columns and dataframe[feature].sum() > 0:
            scaler = RobustScaler()
            scaled_values = scaler.fit_transform(dataframe[feature].values.reshape(-1, 1)).flatten()
            scaled_df[f"{feature}_scaled"] = np.clip(scaled_values * 20 + 50, 0, 100)
        else:
            scaled_df[f"{feature}_scaled"] = 0
    
    composite_features = ["compound_risk_flag", "exposure_tier"]
    for feature in composite_features:
        if feature in dataframe.columns:
            max_value = dataframe[feature].max()
            if max_value > 0:
                scaled_df[f"{feature}_scaled"] = (dataframe[feature] / max_value) * 100
            else:
                scaled_df[f"{feature}_scaled"] = 0
        else:
            scaled_df[f"{feature}_scaled"] = 0
    
    return scaled_df

def calculate_risk_scores(raw_dataframe):
    scaled_df = advanced_feature_scaling(raw_dataframe)
    
    total_scores = np.zeros(len(raw_dataframe))
    
    for feature, weight in FEATURE_WEIGHTS.items():
        scaled_feature = f"{feature}_scaled"
        if scaled_feature in scaled_df.columns:
            contribution = scaled_df[scaled_feature] * (weight / 10)
            total_scores += contribution
    
    if total_scores.max() > total_scores.min():
        min_score = total_scores.min()
        max_score = total_scores.max()
        normalized_scores = (total_scores - min_score) / (max_score - min_score)
        final_scores = (normalized_scores * 1000).round().astype(int)
    else:
        final_scores = np.zeros(len(raw_dataframe), dtype=int)
    
    risk_boost = (
        (raw_dataframe["liquidation_cnt"] > 0) * 150 +
        (raw_dataframe["borrow_supply_ratio"] > 0.7) * 100 +
        (raw_dataframe["total_liquidated_usd"] > 0) * 125 +
        (raw_dataframe["compound_risk_flag"] > 5) * 75
    ).clip(0, 300)
    
    final_scores = np.maximum(final_scores, risk_boost).clip(0, 1000)
    
    return final_scores

def main():
    wallet_addresses = load_wallets()
    
    wallet_features = []
    
    for wallet in wallet_addresses:
        try:
            transactions = fetch_all_transactions(wallet)
            features = extract_wallet_features(wallet, transactions)
            wallet_features.append(features)
        except Exception:
            default_features = {feature: 0.0 for feature in ALL_FEATURES}
            default_features["wallet_id"] = wallet
            wallet_features.append(default_features)
    
    raw_df = pd.DataFrame(wallet_features).fillna(0)
    risk_scores = calculate_risk_scores(raw_df)
    
    results_df = pd.DataFrame({
        "wallet_id": raw_df["wallet_id"],
        "score": risk_scores
    })
    
    results_df.to_csv("risk_scores.csv", index=False)

if __name__ == "__main__":
    main()
