In [2]:
import json
import pandas as pd
import numpy as np

def json_to_dataframe(json_path):
    with open(json_path, 'r') as file:
        data = json.load(file)

    records = []
    for entry in data:
        flat = {
            "userWallet": entry.get("userWallet"),
            "network": entry.get("network"),
            "protocol": entry.get("protocol"),
            "txHash": entry.get("txHash"),
            "logId": entry.get("logId"),
            "timestamp": entry.get("timestamp"),
            "blockNumber": entry.get("blockNumber"),
            "action": entry.get("action").lower(),  # normalize action names
        }

        
        action_data = entry.get("actionData", {})
        flat.update({
            "assetSymbol": action_data.get("assetSymbol"),
            "amount_raw": action_data.get("amount"),
            "assetPriceUSD": float(action_data.get("assetPriceUSD", 0)),
            "poolId": action_data.get("poolId"),
        })

        if action_data.get("amount"):
            flat["amount_usd"] = (int(action_data["amount"]) / 1e6) * flat["assetPriceUSD"]
        else:
            flat["amount_usd"] = 0.0

        records.append(flat)

    
    df = pd.DataFrame(records)

    
    df["timestamp"] = pd.to_datetime(df["timestamp"], unit="s")

    return df

def extract_credit_features(df):
    user_features = {}

    for wallet, group in df.groupby("userWallet"):
        total_deposit = group.loc[group["action"] == "deposit", "amount_usd"].sum()
        total_borrow = group.loc[group["action"] == "borrow", "amount_usd"].sum()
        total_repay = group.loc[group["action"] == "repay", "amount_usd"].sum()
        total_redeem = group.loc[group["action"] == "redeemunderlying", "amount_usd"].sum()
        total_liquidation = group.loc[group["action"] == "liquidationcall", "amount_usd"].sum()

        num_deposit = (group["action"] == "deposit").sum()
        num_borrow = (group["action"] == "borrow").sum()
        num_repay = (group["action"] == "repay").sum()
        num_redeem = (group["action"] == "redeemunderlying").sum()
        num_liquidation = (group["action"] == "liquidationcall").sum()

        repay_ratio = total_repay / (total_borrow + 1)  # avoid divide by zero
        redeem_ratio = total_redeem / (total_deposit + 1)
        liquidation_ratio = total_liquidation / (total_borrow + 1)

        wallet_features = {
            "total_deposit_usd": total_deposit,
            "total_borrow_usd": total_borrow,
            "total_repay_usd": total_repay,
            "total_redeem_usd": total_redeem,
            "total_liquidation_usd": total_liquidation,

            "num_deposits": num_deposit,
            "num_borrows": num_borrow,
            "num_repays": num_repay,
            "num_redeems": num_redeem,
            "num_liquidations": num_liquidation,

            "repay_to_borrow_ratio": repay_ratio,
            "redeem_to_deposit_ratio": redeem_ratio,
            "liquidation_ratio": liquidation_ratio
        }

        user_features[wallet] = wallet_features

    return user_features

def compute_credit_score(user_features_dict):
    scores = {}

    for wallet, features in user_features_dict.items():
        score = 500  # base score

        # 📈 Positive behaviors
        score += min(features["total_deposit_usd"] / 100, 200)      # large deposits
        score += min(features["repay_to_borrow_ratio"] * 100, 200)  # repay behavior
        score += min(features["redeem_to_deposit_ratio"] * 100, 100)  # redeemed usage

        # ⚠️ Negative behaviors
        score -= min(features["liquidation_ratio"] * 500, 200)       # penalize liquidations
        score -= min((features["total_borrow_usd"] - features["total_repay_usd"]) / 100, 100)  # unpaid borrow

        # Clamp to [0, 1000]
        score = max(0, min(1000, int(score)))
        scores[wallet] = score

    return scores

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score

def train_credit_score_model(user_features_dict):
    # Convert dict to DataFrame
    df = pd.DataFrame.from_dict(user_features_dict, orient="index")
    df["userWallet"] = df.index

    # Generate pseudo-labels using rule-based scoring
    df["credit_score"] = list(compute_credit_score(user_features_dict).values())

    # Separate features and labels
    X = df.drop(columns=["userWallet", "credit_score"])
    y = df["credit_score"]
    user_wallets = df["userWallet"]

    # Train/test split
    X_train, X_test, y_train, y_test, wallet_train, wallet_test = train_test_split(
        X, y, user_wallets, test_size=0.2, random_state=42
    )

    # Train model
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    return model, X_test, y_test, wallet_test




def main(json_file, output_csv="wallet_scores.csv"):
    df = json_to_dataframe(json_file)
    features_df = extract_credit_features(df)

    # Rule-based scoring
    credit_scores = compute_credit_score(features_df)
    score_df = pd.DataFrame(list(credit_scores.items()), columns=["userWallet", "credit_score"])
    score_df.to_csv(output_csv, index=False)
    print(f"✅ Scoring complete. Saved to {output_csv}")

    # ML-based scoring
    model, X_test, y_test, wallet_test = train_credit_score_model(features_df)
    y_pred = model.predict(X_test)
    y_pred = [max(0, min(1000, int(score))) for score in y_pred]

    # Test results
    test_scores_df = pd.DataFrame({
        "userWallet": wallet_test.values,
        "actual_score": y_test.values,
        "predicted_score": y_pred
    })

    # Evaluation metrics
    print("\n📊 ML Model Evaluation:")
    print("MAE:", mean_absolute_error(y_test, y_pred))
    print("R²:", r2_score(y_test, y_pred))
    print("\n🔍 Sample Predictions:")
    print(test_scores_df.head())

    return model, test_scores_df

main("user-wallet-transactions.json")



✅ Scoring complete. Saved to wallet_scores.csv

📊 ML Model Evaluation:
MAE: 2.2671428571428573
R²: 0.9943919436682517

🔍 Sample Predictions:
                                   userWallet  actual_score  predicted_score
0  0x02c3df430a4125a05feeb5fee31c0ee911f952e1           622              623
1  0x043a7a9f331b701828842bac0ee760ea984cf1fd           779              780
2  0x03caaa7a6e8053ca69981975c38c5273e3fc0ebd           700              700
3  0x034b5155a9a8a8513bc27a77e1036bc18f4cf5b6           500              500
4  0x0070bc9033adf1e49517f98304f41f778d370208          1000             1000


(RandomForestRegressor(random_state=42),
                                      userWallet  actual_score  predicted_score
 0    0x02c3df430a4125a05feeb5fee31c0ee911f952e1           622              623
 1    0x043a7a9f331b701828842bac0ee760ea984cf1fd           779              780
 2    0x03caaa7a6e8053ca69981975c38c5273e3fc0ebd           700              700
 3    0x034b5155a9a8a8513bc27a77e1036bc18f4cf5b6           500              500
 4    0x0070bc9033adf1e49517f98304f41f778d370208          1000             1000
 ..                                          ...           ...              ...
 695  0x057313f0809f338763ddfc005e43ca7d9fac0a10           899              898
 696  0x013618ba67d882af8df870d8704e4d317b710dda           500              500
 697  0x00fc9abc1ae9d1b71b89fd285cb6c73129f6da72           700              700
 698  0x029a3d224e8c5daf8d5126f35502d7aecd23fb02           700              700
 699  0x03c1f86a157ef2f7baaa6fa37351833d7833c4e9           631              631