<a href="https://colab.research.google.com/github/ayush6233/Credit_score_assigner/blob/main/creditscore_for_defi.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [43]:
import json
import argparse
from collections import defaultdict

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import IsolationForest


def parse_json_transactions(filepath: str) -> pd.DataFrame:
    objs = []
    buffer = ""
    brace_count = 0
    with open(filepath, "r") as f:
        for line in f:
            if not line.strip():
                continue
            brace_count += line.count("{") - line.count("}")
            buffer += line
            if brace_count == 0 and buffer.strip():
                chunk = buffer.strip().rstrip(",")
                try:
                    obj = json.loads(chunk)
                    objs.append(obj)
                except json.JSONDecodeError:
                    pass
                buffer = ""
    df = pd.json_normalize(objs)
    return df


def engineer_features(df: pd.DataFrame) -> pd.DataFrame:
    # Flatten actionData
    ad_cols = [c for c in df.columns if c.startswith("actionData.")]
    if ad_cols:
        rename_map = {c: c.split('.', 1)[1] for c in ad_cols}
        df = df.rename(columns=rename_map)
    amt_col = [c for c in df.columns if c == 'amount' or 'amount' in c and 'Price' not in c][0]
    price_col = [c for c in df.columns if 'assetPriceUSD' in c][0]

    df['amount'] = df[amt_col].astype(float)
    df['assetPriceUSD'] = df[price_col].astype(float)
    df['amountUSD'] = df['amount'] * df['assetPriceUSD']
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
    now = df['timestamp'].max()

    agg = df.groupby('userWallet').agg(
        total_deposit_usd=('amountUSD', lambda x: x[df.loc[x.index, 'action']=='deposit'].sum()),
        total_borrow_usd =('amountUSD', lambda x: x[df.loc[x.index, 'action']=='borrow'].sum()),
        total_repay_usd  =('amountUSD', lambda x: x[df.loc[x.index, 'action']=='repay'].sum()),
        borrow_count     =('action', lambda x: (x=='borrow').sum()),
        repay_count      =('action', lambda x: (x=='repay').sum()),
        liquidation_count=('action', lambda x: (x=='liquidationcall').sum()),
        tx_count         =('action', 'count'),
        last_ts          =('timestamp', 'max')
    )

    # Ratios and flags
    agg['repay_borrow_ratio'] = agg['total_repay_usd'] / (agg['total_borrow_usd'] + 1e-9)
    agg['liquidation_ratio']  = agg['liquidation_count'] / (agg['borrow_count'] + 1e-9)

    # Flash-loan rate
    flash_rates = []
    for wallet, grp in df.groupby('userWallet'):
        borrows = grp[grp.action=='borrow']['blockNumber']
        repays  = grp[grp.action=='repay']['blockNumber']
        rate = borrows.isin(repays).mean() if not borrows.empty else 0.0
        flash_rates.append((wallet, rate))
    flash_df = pd.DataFrame(flash_rates, columns=['userWallet','flashloan_rate']).set_index('userWallet')
    agg = agg.join(flash_df)

    # Recency score
    days_since = (now - agg['last_ts']).dt.days
    agg['recency_score'] = np.exp(-0.1 * days_since)

    # Activity volume
    agg['activity_volume'] = np.log1p(agg['tx_count'])

    # Select features for modeling
    features = agg[['repay_borrow_ratio','liquidation_ratio',
                    'flashloan_rate','recency_score','activity_volume']].fillna(0)
    return features

def score_with_isolation_forest(features: pd.DataFrame,
                                 contamination: float = 0.01,
                                 n_estimators: int = 200) -> np.ndarray:
    """
    Trains an Isolation Forest on the feature set (unsupervised) and returns
    a credit score between 0 and 1000 for each row in `features`.
    """
    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(features)

    model = IsolationForest(
        n_estimators=n_estimators,
        max_samples='auto',
        contamination=contamination,
        random_state=42
    )
    model.fit(X_scaled)

    raw_scores = model.decision_function(X_scaled)  # higher = more normal
    # Normalize raw_scores to [0,1]
    min_s, max_s = raw_scores.min(), raw_scores.max()
    norm_scores = (raw_scores - min_s) / (max_s - min_s)

    # Scale to [0,1000]
    credit_scores = (norm_scores * 1000).round().astype(int)
    return credit_scores

parser = argparse.ArgumentParser(
    description='Compute 0–1000 credit scores for Aave v2 wallets from transaction history'
)
parser.add_argument('input_json', help='/content/drive/MyDrive/user-wallet-transactions.json')
parser.add_argument('output_json', help='/content/drive')
parser.add_argument('--contamination', type=float, default=0.01,
                    help=0.05)
parser.add_argument('--n_estimators', type=int, default=200,
                    help=100)
args = parser.parse_args()
df_raw = parse_json_transactions(args.input_json)
features = engineer_features(df_raw)

scores = score_with_isolation_forest(
    features,
    contamination=0.01,
    n_estimators=100
)

result = pd.DataFrame({
    'userWallet': features.index,
    'credit_score': scores
})
result.to_json(args.output_json, orient='records')
print(f"Scored {len(result)} wallets; output written to {args.output_json}")



usage: colab_kernel_launcher.py [-h] [--contamination CONTAMINATION]
                                [--n_estimators N_ESTIMATORS]
                                input_json output_json
colab_kernel_launcher.py: error: the following arguments are required: output_json


SystemExit: 2