In [4]:
import pandas as pd
import numpy as np
import os

# Set paths
PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), "../.."))
input_path = os.path.join(PROJECT_ROOT, "data/engineered/engineered_with_ratios.csv")
output_path = os.path.join(PROJECT_ROOT, "data/engineered/engineered_extended.csv")

def extend_features(df):
    eps = 1e-5  # small constant to avoid division by zero or log(0)

    # 1. Flags wallets in the top 5% of account_balance 
    threshold = df["account_balance"].quantile(0.95)
    df["is_high_balance"] = (df["account_balance"] > threshold).astype(int)

    # 2. Flags wallets that sends/receives very frequently (<2 min between transactions)
    df["high_txn_freq_flag"] = (
        (df["transaction_frequency_sent"] < 2) |
        (df["transaction_frequency_received"] < 2)
    ).astype(int)

    # 3. Net flow of ETH (+ means net inflow of ETH, - means draining behaviour)
    df["net_flow"] = df["total_received"] - df["total_sent"]

    # 4. Spread in transactions (volatility)
    df["spread_sent"] = df["max_transaction_sent"] - df["min_transaction_sent"]
    df["spread_received"] = df["avg_transaction_received"] - df["min_transaction_received"]

    # 5. Contract activity flag (1 if wallet either interacted with or created smart contracts)
    df["contract_activity_flag"] = (
        (df["contract_creation"] > 0) |
        (df["contract_interaction"] > 0)
    ).astype(int)

    # 6. Log transformations for skewed data
    df["log_balance"] = np.log1p(df["account_balance"].clip(lower=0))
    df["log_txn_count"] = np.log1p(
        df["transaction_frequency_sent"] + df["transaction_frequency_received"]
    )

    return df

def main():
    # Load input dataset
    df = pd.read_csv(input_path)

    # Extend with new features
    df_extended = extend_features(df)

    # Save output
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    df_extended.to_csv(output_path, index=False)
    print(f"Saved extended features to: {output_path}")

if __name__ == "__main__":
    main()


Saved extended features to: /Users/jadaloau/Desktop/y2/sem2/is4303/project/project_code/Ethereum-Fraud-Detection/data/engineered/engineered_extended.csv
