In [6]:
import json
import zipfile
import os
import numpy as np
import pandas as pd
from collections import defaultdict
from datetime import datetime
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

# Unzip and Load JSON
zip_path = "user-wallet-transactions.json.zip"
unzip_dir = "unzipped"
os.makedirs(unzip_dir, exist_ok=True)

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(unzip_dir)

json_file_path = os.path.join(unzip_dir, "user-wallet-transactions.json")
with open(json_file_path, 'r') as f:
    data = json.load(f)

# Feature Engineering
def compute_usd_value(amount_str, price_str):
    try:
        return float(amount_str) * float(price_str)
    except:
        return 0.0

wallet_features = defaultdict(lambda: {
    "num_deposits": 0,
    "total_deposit_usd": 0.0,
    "num_borrows": 0,
    "total_borrow_usd": 0.0,
    "num_repays": 0,
    "total_repay_usd": 0.0,
    "num_redeems": 0,
    "total_redeem_usd": 0.0,
    "num_liquidations": 0,
    "timestamps": [],
    "assets": set()
})

for tx in data:
    wallet = tx["userWallet"]
    action = tx["action"].lower()
    action_data = tx.get("actionData", {})
    timestamp = tx.get("timestamp", None)
    usd_value = compute_usd_value(action_data.get("amount", "0"), action_data.get("assetPriceUSD", "1"))

    if action == "deposit":
        wallet_features[wallet]["num_deposits"] += 1
        wallet_features[wallet]["total_deposit_usd"] += usd_value
    elif action == "borrow":
        wallet_features[wallet]["num_borrows"] += 1
        wallet_features[wallet]["total_borrow_usd"] += usd_value
    elif action == "repay":
        wallet_features[wallet]["num_repays"] += 1
        wallet_features[wallet]["total_repay_usd"] += usd_value
    elif action == "redeemunderlying":
        wallet_features[wallet]["num_redeems"] += 1
        wallet_features[wallet]["total_redeem_usd"] += usd_value
    elif action == "liquidationcall":
        wallet_features[wallet]["num_liquidations"] += 1

    if timestamp:
        wallet_features[wallet]["timestamps"].append(timestamp)
    if "assetSymbol" in action_data:
        wallet_features[wallet]["assets"].add(action_data["assetSymbol"])

processed_records = []

for wallet, feats in wallet_features.items():
    timestamps = sorted(feats["timestamps"])
    tx_intervals = np.diff(timestamps) if len(timestamps) > 1 else [0]
    avg_txn_interval = np.mean(tx_intervals) / 86400 if len(tx_intervals) > 0 else 0
    active_days = len(set(datetime.utcfromtimestamp(ts).date() for ts in timestamps))
    repay_borrow_ratio = (feats["total_repay_usd"] / feats["total_borrow_usd"]) if feats["total_borrow_usd"] > 0 else 0

    processed_records.append({
        "wallet": wallet,
        "num_deposits": feats["num_deposits"],
        "total_deposit_usd": feats["total_deposit_usd"],
        "num_borrows": feats["num_borrows"],
        "total_borrow_usd": feats["total_borrow_usd"],
        "num_repays": feats["num_repays"],
        "total_repay_usd": feats["total_repay_usd"],
        "repay_to_borrow_ratio": repay_borrow_ratio,
        "num_liquidations": feats["num_liquidations"],
        "num_redeems": feats["num_redeems"],
        "total_redeem_usd": feats["total_redeem_usd"],
        "avg_txn_interval_days": avg_txn_interval,
        "active_days": active_days,
        "asset_diversity": len(feats["assets"]),
        "total_tx_count": len(timestamps)
    })

features_df = pd.DataFrame(processed_records)

# Unsupervised ML Scoring (KMeans Clustering)
X = features_df.drop(columns=["wallet"])
X_scaled = StandardScaler().fit_transform(X)

kmeans = KMeans(n_clusters=5, random_state=42)
clusters = kmeans.fit_predict(X_scaled)
features_df['cluster'] = clusters

# Score mapping from cluster ID to score bucket
cluster_score_map = {
    0: 200,
    1: 400,
    2: 600,
    3: 800,
    4: 1000
}
features_df['credit_score'] = features_df['cluster'].map(cluster_score_map)

# Save to CSV
features_df.to_csv("wallet_credit_scores.csv", index=False)
print("Credit scores with clustering saved to wallet_credit_scores.csv")



Credit scores with clustering saved to wallet_credit_scores.csv
