In [1]:
pip install gdown




In [2]:
import gdown

# Google Drive shared file ID
file_id = "1ISFbAXxadMrt7Zl96rmzzZmEKZnyW7FS"
url = f"https://drive.google.com/uc?id={file_id}"

# Download
output_file = "user_transactions.json"
gdown.download(url, output_file, quiet=False)


Downloading...
From: https://drive.google.com/uc?id=1ISFbAXxadMrt7Zl96rmzzZmEKZnyW7FS
To: /content/user_transactions.json
100%|██████████| 91.2M/91.2M [00:00<00:00, 99.9MB/s]


'user_transactions.json'

In [3]:
import json

# Load JSON file
with open("user_transactions.json", "r") as f:
    data = json.load(f)

# Print high-level structure
print(f"Type: {type(data)}")
print(f"Total records: {len(data)}")

# Print first item
import pprint
pprint.pprint(data[0], depth=3)


Type: <class 'list'>
Total records: 100000
{'__v': 0,
 '_id': {'$oid': '681d38fed63812d4655f571a'},
 'action': 'deposit',
 'actionData': {'amount': '2000000000',
                'assetPriceUSD': '0.9938318274296357543568636362026045',
                'assetSymbol': 'USDC',
                'poolId': '0x2791bca1f2de4661ed88a30c99a7a9449aa84174',
                'type': 'Deposit',
                'userId': '0x00000000001accfa9cef68cf5371a23025b6d4b6'},
 'blockNumber': 1629178166,
 'createdAt': {'$date': '2025-05-08T23:06:39.465Z'},
 'logId': '0x695c69acf608fbf5d38e48ca5535e118cc213a89e3d6d2e66e6b0e3b2e8d4190_Deposit',
 'network': 'polygon',
 'protocol': 'aave_v2',
 'timestamp': 1629178166,
 'txHash': '0x695c69acf608fbf5d38e48ca5535e118cc213a89e3d6d2e66e6b0e3b2e8d4190',
 'updatedAt': {'$date': '2025-05-08T23:06:39.465Z'},
 'userWallet': '0x00000000001accfa9cef68cf5371a23025b6d4b6'}


In [4]:
import csv
from collections import defaultdict, Counter
from decimal import Decimal
from datetime import datetime

# Step 1: Group transactions by userWallet
wallet_tx_map = defaultdict(list)
wallet_time_map = defaultdict(list)

for tx in data:
    wallet = tx.get("userWallet")
    if wallet:
        wallet_tx_map[wallet].append(tx)
        ts = tx.get("timestamp")
        if ts:
            wallet_time_map[wallet].append(ts)

# Prepare output CSV path
output_csv = "wallet_summary.csv"

# Step 2: Collect all unique action types
action_types = set(tx['action'] for tx in data)

# Step 3: Build the summary per wallet
wallet_summary = []

for wallet, tx_list in wallet_tx_map.items():
    row = {
        'wallet_address': wallet,
        'num_transactions': len(tx_list)
    }

    # Count action types
    action_count = Counter(tx['action'] for tx in tx_list)
    for action in action_types:
        row[f'action_{action}'] = action_count.get(action, 0)

    # Estimate USD per action
    usd_values = defaultdict(float)
    for tx in tx_list:
        action = tx['action']
        try:
            amt = Decimal(tx['actionData']['amount']) / Decimal(1e6)  # USDC 6 decimals
            price = Decimal(tx['actionData']['assetPriceUSD'])
            usd_values[action] += float(amt * price)
        except:
            continue

    for action in action_types:
        row[f'usd_{action}'] = round(usd_values.get(action, 0.0), 2)

    # Compute activity times
    timestamps = wallet_time_map.get(wallet, [])
    if timestamps:
        first_ts = min(timestamps)
        last_ts = max(timestamps)
        active_days = round((last_ts - first_ts) / (60 * 60 * 24), 2)
        row['active_days'] = active_days
        row['first_seen'] = datetime.utcfromtimestamp(first_ts).strftime('%Y-%m-%d %H:%M:%S')
        row['last_seen'] = datetime.utcfromtimestamp(last_ts).strftime('%Y-%m-%d %H:%M:%S')
    else:
        row['active_days'] = 0
        row['first_seen'] = ''
        row['last_seen'] = ''

    wallet_summary.append(row)

# Step 4: Write to CSV
fieldnames = ['wallet_address', 'num_transactions']
fieldnames += [f'action_{a}' for a in sorted(action_types)]
fieldnames += [f'usd_{a}' for a in sorted(action_types)]
fieldnames += ['active_days', 'first_seen', 'last_seen']

with open(output_csv, "w", newline="") as f:
    writer = csv.DictWriter(f, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(wallet_summary)

print(f"Saved summary for {len(wallet_summary)} wallets to '{output_csv}'")


Saved summary for 3497 wallets to 'wallet_summary.csv'


In [5]:
from collections import defaultdict, Counter
from decimal import Decimal
from datetime import datetime

# Set the wallet address you want to inspect (case-insensitive)
target_wallet = "0x0000000002032370b971dabd36d72f3e5a7bf1ee".lower()

# Filter transactions for the wallet
wallet_txs = [tx for tx in data if tx.get("userWallet", "").lower() == target_wallet]

if not wallet_txs:
    print(f"No transactions found for wallet {target_wallet}")
else:
    print(f"Wallet Address: {target_wallet}")
    print(f"Total Transactions: {len(wallet_txs)}")

    # Count actions
    action_counts = Counter(tx['action'] for tx in wallet_txs)
    print("\nAction Breakdown:")
    for action, count in action_counts.items():
        print(f"  {action}: {count} txs")

    # Calculate USD per action
    usd_summary = defaultdict(float)

    def calc_usd(tx):
        try:
            amt = Decimal(tx['actionData']['amount']) / Decimal(1e6)
            price = Decimal(tx['actionData']['assetPriceUSD'])
            return float(amt * price)
        except:
            return 0.0

    for tx in wallet_txs:
        action = tx['action']
        usd_summary[action] += calc_usd(tx)

    print("\nEstimated USD Value per Action:")
    for action, total_usd in usd_summary.items():
        print(f"  {action}: ${total_usd:,.2f}")

    # First and last activity timestamps
    timestamps = [tx.get("timestamp") for tx in wallet_txs if "timestamp" in tx]
    if timestamps:
        first_ts = min(timestamps)
        last_ts = max(timestamps)

        first_time = datetime.utcfromtimestamp(first_ts).strftime("%Y-%m-%d %H:%M:%S")
        last_time = datetime.utcfromtimestamp(last_ts).strftime("%Y-%m-%d %H:%M:%S")

        print(f"\nFirst Activity: {first_time} UTC")
        print(f"Last Activity:  {last_time} UTC")


Wallet Address: 0x0000000002032370b971dabd36d72f3e5a7bf1ee
Total Transactions: 399

Action Breakdown:
  redeemunderlying: 130 txs
  deposit: 250 txs
  borrow: 15 txs
  repay: 4 txs

Estimated USD Value per Action:
  redeemunderlying: $112,874,520,279,761,888.00
  deposit: $206,577,330,842,852,352.00
  borrow: $60,297,620,862,140,784.00
  repay: $70,150.26

First Activity: 2021-04-21 21:28:30 UTC
Last Activity:  2021-09-01 18:15:24 UTC


In [6]:
pip install xgboost




In [7]:
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error

# Load wallet summary
df = pd.read_csv("wallet_summary.csv")

# Fill missing values
df.fillna({
    'usd_repay': 0.0,
    'usd_borrow': 0.0,
    'usd_deposit': 0.0,
    'usd_redeemunderlying': 0.0,
    'action_repay': 0,
    'action_borrow': 0,
    'action_deposit': 0,
    'action_redeemunderlying': 0,
    'active_days': 0.0
}, inplace=True)

# Feature engineering
df['repay_to_borrow'] = df['usd_repay'] / (df['usd_borrow'] + 1e-6)
df['redeem_to_deposit'] = df['usd_redeemunderlying'] / (df['usd_deposit'] + 1e-6)

df['repay_ratio'] = df['repay_to_borrow'].clip(0, 1)
df['redeem_ratio'] = df['redeem_to_deposit'].clip(0, 1)

# Count how many borrows are not repaid at all
df['unrepaid_borrows'] = ((df['usd_borrow'] > 0) & (df['usd_repay'] < 1e-3)).astype(int)

# Score formula
df['credit_score'] = (
    0.4 * (df['usd_deposit'] > 0).astype(float) +                  # reward depositors
    0.2 * df['repay_ratio'] +                                     # reward repayment % if borrowed
    0.15 * (df['active_days'] / 100).clip(0, 1) +                 # reward active wallets
    0.1 * df['redeem_ratio'] -                                    # slight reward for redeem
    0.1 * df['unrepaid_borrows'] -                                # penalize borrow with 0 repay
    0.05 * ((df['usd_borrow'] > 0) & (df['repay_ratio'] < 0.5)).astype(float)  # partial repayment penalty
) * 1000

df['credit_score'] = df['credit_score'].clip(0, 1000)


# Features and labels
features = [
    'num_transactions',
    'action_deposit', 'action_borrow', 'action_repay', 'action_redeemunderlying',
    'usd_deposit', 'usd_borrow', 'usd_repay', 'usd_redeemunderlying',
    'repay_to_borrow', 'redeem_to_deposit', 'active_days'
]

scaler = MinMaxScaler()
X = scaler.fit_transform(df[features])
y = df['credit_score']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train XGBoost model
xgb_model = XGBRegressor(
    n_estimators=300,
    max_depth=8,
    learning_rate=0.05,
    subsample=0.9,
    colsample_bytree=0.9,
    random_state=42,
    objective='reg:squarederror',
    verbosity=1
)

xgb_model.fit(X_train, y_train)

# Evaluate
y_pred = xgb_model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
print(f"\n XGBoost MAE (Mean Absolute Error): {mae:.4f}")



 XGBoost MAE (Mean Absolute Error): 3.6542


In [8]:
import random

# Pick a random index from test set
random_idx = random.randint(0, len(X_test) - 1)

# Get corresponding original DataFrame index (from test set)
test_indices = y_test.index.to_list()
wallet_idx = test_indices[random_idx]

# Extract the wallet details
wallet_row = df.loc[wallet_idx]

print("\n Wallet Address:", wallet_row['wallet_address'])
print("Synthetic (True) Credit Score:", round(wallet_row['credit_score'], 2))

# Prepare the scaled feature row
X_sample = X_test[random_idx].reshape(1, -1)
predicted_score = xgb_model.predict(X_sample)[0]

print("Predicted Score by XGBoost:", round(predicted_score, 2))

# Additional context
print("\nAction Breakdown:")
print(f"  Deposits: {wallet_row['action_deposit']}")
print(f"  Borrows: {wallet_row['action_borrow']}")
print(f"  Repays: {wallet_row['action_repay']}")
print(f"  Redeems: {wallet_row['action_redeemunderlying']}")
print("\nUSD Activity:")
print(f"  Deposit Value: ${wallet_row['usd_deposit']:,.2f}")
print(f"  Borrow Value:  ${wallet_row['usd_borrow']:,.2f}")
print(f"  Repay Value:   ${wallet_row['usd_repay']:,.2f}")
print(f"  Redeem Value:  ${wallet_row['usd_redeemunderlying']:,.2f}")



 Wallet Address: 0x004af1171e5783d1223056a700e992b0eccf6191
Synthetic (True) Credit Score: 400.0
Predicted Score by XGBoost: 399.99

Action Breakdown:
  Deposits: 1
  Borrows: 0
  Repays: 0
  Redeems: 0

USD Activity:
  Deposit Value: $54,810,936,012,301.06
  Borrow Value:  $0.00
  Repay Value:   $0.00
  Redeem Value:  $0.00


In [9]:
# Save wallet addresses and their corresponding scores
score_df = df[['wallet_address', 'credit_score']].copy()
score_df.to_csv("wallet_scores.csv", index=False)
print("Saved credit scores to wallet_scores.csv")


Saved credit scores to wallet_scores.csv


FRONT UI USING GRADIO


In [10]:
!pip install gradio xgboost scikit-learn pandas




In [11]:
import gradio as gr
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.preprocessing import MinMaxScaler

# Load data
df = pd.read_csv("wallet_summary.csv")

# Fill missing values
df.fillna({
    'usd_repay': 0.0,
    'usd_borrow': 0.0,
    'usd_deposit': 0.0,
    'usd_redeemunderlying': 0.0,
    'action_repay': 0,
    'action_borrow': 0,
    'action_deposit': 0,
    'action_redeemunderlying': 0,
    'active_days': 0.0
}, inplace=True)

# Feature engineering
df['repay_to_borrow'] = df['usd_repay'] / (df['usd_borrow'] + 1e-6)
df['redeem_to_deposit'] = df['usd_redeemunderlying'] / (df['usd_deposit'] + 1e-6)
df['repay_ratio'] = df['repay_to_borrow'].clip(0, 1)
df['redeem_ratio'] = df['redeem_to_deposit'].clip(0, 1)
df['unrepaid_borrows'] = ((df['usd_borrow'] > 0) & (df['usd_repay'] < 1e-3)).astype(int)

# Synthetic score
df['credit_score'] = (
    0.4 * (df['usd_deposit'] > 0).astype(float) +
    0.2 * df['repay_ratio'] +
    0.15 * (df['active_days'] / 100).clip(0, 1) +
    0.1 * df['redeem_ratio'] -
    0.1 * df['unrepaid_borrows'] -
    0.05 * ((df['usd_borrow'] > 0) & (df['repay_ratio'] < 0.5)).astype(float)
) * 1000
df['credit_score'] = df['credit_score'].clip(0, 1000)

# ML training
features = [
    'num_transactions',
    'action_deposit', 'action_borrow', 'action_repay', 'action_redeemunderlying',
    'usd_deposit', 'usd_borrow', 'usd_repay', 'usd_redeemunderlying',
    'repay_to_borrow', 'redeem_to_deposit', 'active_days'
]

scaler = MinMaxScaler()
X = scaler.fit_transform(df[features])
y = df['credit_score']

model = XGBRegressor()
model.fit(X, y)

# Prediction function
def predict_score(wallet_address):
    wallet_address = wallet_address.lower()
    row = df[df['wallet_address'].str.lower() == wallet_address]

    if row.empty:
        return ["Not Found"] * 13

    row = row.iloc[0]
    X_input = scaler.transform([row[features]])
    pred = model.predict(X_input)[0]

    return [
        round(row['credit_score'], 2),
        round(pred, 2),
        int(row['action_deposit']),
        int(row['action_borrow']),
        int(row['action_repay']),
        int(row['action_redeemunderlying']),
        f"${row['usd_deposit']:,.2f}",
        f"${row['usd_borrow']:,.2f}",
        f"${row['usd_repay']:,.2f}",
        f"${row['usd_redeemunderlying']:,.2f}",
        round(row['active_days'], 2),
        row.get('first_seen', 'N/A'),
        row.get('last_seen', 'N/A'),
    ]

# Gradio interface
demo = gr.Interface(
    fn=predict_score,
    inputs=gr.Textbox(label="Enter Wallet Address"),
    outputs=[
        gr.Textbox(label="Synthetic Score"),
        gr.Textbox(label="Predicted Score"),
        gr.Textbox(label="Deposits"),
        gr.Textbox(label="Borrows"),
        gr.Textbox(label="Repays"),
        gr.Textbox(label="Redeems"),
        gr.Textbox(label="USD Deposits"),
        gr.Textbox(label="USD Borrows"),
        gr.Textbox(label="USD Repays"),
        gr.Textbox(label="USD Redeems"),
        gr.Textbox(label="Active Days"),
        gr.Textbox(label="First Seen"),
        gr.Textbox(label="Last Seen"),
    ],
    title="🧮 Wallet Credit Score Estimator",
    description="Enter a wallet address to estimate credit score based on on-chain activity"
)

demo.launch()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://f3b2c055f19dc46a50.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


