# Task

# Install Requirements

In [15]:
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from collections import defaultdict, Counter

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.ensemble import IsolationForest
from sklearn.metrics import silhouette_score, silhouette_samples

import pickle
import os
import sys

In [16]:
import warnings
warnings.filterwarnings('ignore')

# Load and Process the Data

In [17]:
def load_data(file_paths):
    """Load and parse the JSON transaction data from multiple files."""
    all_data = {
        'deposits': [],
        'borrows': [],
        'repays': [],
        'withdraws': [],
        'liquidations': []
    }
    
    for file_path in file_paths:
        print(f"Loading {file_path}...")
        with open(file_path, 'r') as f:
            data = json.load(f)
            
        for transaction_type in all_data.keys():
            if transaction_type in data:
                all_data[transaction_type].extend(data[transaction_type])
    
    return all_data

file_paths = [
    'Task Files/compoundV2_transactions_ethereum_chunk_0.json',
    'Task Files/compoundV2_transactions_ethereum_chunk_1.json',
    'Task Files/compoundV2_transactions_ethereum_chunk_2.json'
]

# Load Data
transaction_data = load_data(file_paths)

# Print transaction counts
for tx_type, transactions in transaction_data.items():
    print(f"{tx_type}: {len(transactions)} transactions")


Loading Task Files/compoundV2_transactions_ethereum_chunk_0.json...
Loading Task Files/compoundV2_transactions_ethereum_chunk_1.json...
Loading Task Files/compoundV2_transactions_ethereum_chunk_2.json...
deposits: 30000 transactions
borrows: 30000 transactions
repays: 30000 transactions
withdraws: 30000 transactions
liquidations: 0 transactions


## Convert into Structed DataFrames

In [18]:
def create_transaction_df(transactions, tx_type):
    """Convert transaction list to a DataFrame with consistent structure."""
    df_rows = []
    
    for tx in transactions:
        row = {
            'wallet_id': tx['account']['id'],
            'timestamp': int(tx['timestamp']),
            'datetime': datetime.fromtimestamp(int(tx['timestamp'])),
            'amount': float(tx['amount']),
            'amountUSD': float(tx['amountUSD']) if 'amountUSD' in tx else 0,
            'asset_id': tx['asset']['id'],
            'asset_symbol': tx['asset']['symbol'],
            'hash': tx['hash'],
            'tx_id': tx['id'],
            'tx_type': tx_type
        }
        
        # Add liquidation-specific fields
        if tx_type == 'liquidations':
            row['liquidator'] = tx.get('liquidator', {}).get('id', '')
            row['liquidatee'] = tx.get('liquidatee', {}).get('id', '')
        else:
            # For non-liquidation transactions, set these fields to empty strings
            row['liquidator'] = ''
            row['liquidatee'] = ''
            
        df_rows.append(row)
    
    return pd.DataFrame(df_rows)

In [19]:
# Create DataFrames for each transaction type
dfs = {}
for tx_type, transactions in transaction_data.items():
    dfs[tx_type] = create_transaction_df(transactions, tx_type)
    print(f"{tx_type} DataFrame shape: {dfs[tx_type].shape}")

# Combine all transactions into a single DataFrame
all_transactions = pd.concat(dfs.values(), ignore_index=True)
print(f"Combined DataFrame shape: {all_transactions.shape}")

# Sort by timestamp
all_transactions = all_transactions.sort_values('timestamp')

deposits DataFrame shape: (30000, 12)
borrows DataFrame shape: (30000, 12)
repays DataFrame shape: (30000, 12)
withdraws DataFrame shape: (30000, 12)
liquidations DataFrame shape: (0, 0)
Combined DataFrame shape: (120000, 12)


## Data Exploration (Basic)

In [20]:

print("\nTransaction types:")
print(all_transactions['tx_type'].value_counts())

print("\nAsset types:")
print(all_transactions['asset_symbol'].value_counts())

print("\nTime range:")
print(f"Start: {all_transactions['datetime'].min()}")
print(f"End: {all_transactions['datetime'].max()}")


Transaction types:
tx_type
deposits     30000
borrows      30000
withdraws    30000
repays       30000
Name: count, dtype: int64

Asset types:
asset_symbol
DAI     62721
ETH     23934
USDC    22331
BAT      3601
ZRX      2829
REP      2414
WBTC     2170
Name: count, dtype: int64

Time range:
Start: 2019-05-07 07:11:22
End: 2020-04-15 06:24:24


# Feature Engineering (For each Wallet)

In [21]:
def engineer_wallet_features(transactions_df):
    """Extract wallet-level features from transaction data."""
    # Sort by timestamp
    wallet_groups = transactions_df.groupby('wallet_id')
    wallet_features = []
    
    for wallet_id, wallet_txs in wallet_groups:
        # Skip wallets with very few transactions (likely noise)
        if len(wallet_txs) < 3:
            continue
            
        # Group transactions by type
        deposits = wallet_txs[wallet_txs['tx_type'] == 'deposits']
        borrows = wallet_txs[wallet_txs['tx_type'] == 'borrows']
        repays = wallet_txs[wallet_txs['tx_type'] == 'repays']
        withdraws = wallet_txs[wallet_txs['tx_type'] == 'withdraws']
        
        # For liquidations, we need to check both as liquidator and liquidatee
        liquidations = wallet_txs[wallet_txs['tx_type'] == 'liquidations']
        liquidations_as_liquidatee = liquidations[liquidations['liquidatee'] == wallet_id]
        liquidations_as_liquidator = liquidations[liquidations['liquidator'] == wallet_id]
        
        # Time-based features
        first_tx_time = wallet_txs['timestamp'].min()
        last_tx_time = wallet_txs['timestamp'].max()
        account_age_days = (last_tx_time - first_tx_time) / (60 * 60 * 24)
        
        # Transaction frequency
        if account_age_days > 0:
            tx_frequency = len(wallet_txs) / account_age_days
        else:
            tx_frequency = 0
            
        # Volume metrics
        total_deposit_usd = deposits['amountUSD'].sum()
        total_borrow_usd = borrows['amountUSD'].sum()
        total_repay_usd = repays['amountUSD'].sum()
        total_withdraw_usd = withdraws['amountUSD'].sum()
        total_liquidated_usd = liquidations_as_liquidatee['amountUSD'].sum()
        
        # Repayment behavior
        if total_borrow_usd > 0:
            repay_ratio = total_repay_usd / total_borrow_usd
        else:
            repay_ratio = 0
            
        # Liquidation risk
        if total_borrow_usd > 0:
            liquidation_ratio = total_liquidated_usd / total_borrow_usd
        else:
            liquidation_ratio = 0
            
        # Asset diversity
        unique_assets = wallet_txs['asset_symbol'].nunique()
        
        # Transaction patterns
        borrow_deposit_ratio = total_borrow_usd / (total_deposit_usd + 1)  # Add 1 to avoid division by zero
        withdraw_deposit_ratio = total_withdraw_usd / (total_deposit_usd + 1)
        
        # Time between transactions
        if len(wallet_txs) > 1:
            wallet_txs_sorted = wallet_txs.sort_values('timestamp')
            time_diffs = wallet_txs_sorted['timestamp'].diff().dropna()
            avg_time_between_txs = time_diffs.mean() / (60 * 60)  # in hours
            std_time_between_txs = time_diffs.std() / (60 * 60) if len(time_diffs) > 1 else 0
            
            # Volatility in transaction timing
            time_cv = std_time_between_txs / (avg_time_between_txs + 1)
            
            # Volatility in transaction amounts
            amount_volatility = wallet_txs['amountUSD'].std() / (wallet_txs['amountUSD'].mean() + 1)
        else:
            avg_time_between_txs = 0
            std_time_between_txs = 0
            time_cv = 0
            amount_volatility = 0
            
        # Check for liquidator behavior
        is_liquidator = len(liquidations_as_liquidator) > 0
        
        # Borrowing efficiency
        if len(borrows) > 0:
            repay_to_borrow_ratio = len(repays) / len(borrows)
        else:
            repay_to_borrow_ratio = 0
            
        # Recency feature
        current_time = datetime.now().timestamp()
        days_since_last_tx = (current_time - last_tx_time) / (60 * 60 * 24)
        
        # Collect features
        features = {
            'wallet_id': wallet_id,
            'tx_count': len(wallet_txs),
            'deposit_count': len(deposits),
            'borrow_count': len(borrows),
            'repay_count': len(repays),
            'withdraw_count': len(withdraws),
            'liquidated_count': len(liquidations_as_liquidatee),
            'liquidator_count': len(liquidations_as_liquidator),
            'is_liquidator': is_liquidator,
            'account_age_days': account_age_days,
            'tx_frequency': tx_frequency,
            'total_deposit_usd': total_deposit_usd,
            'total_borrow_usd': total_borrow_usd,
            'total_repay_usd': total_repay_usd,
            'total_withdraw_usd': total_withdraw_usd,
            'total_liquidated_usd': total_liquidated_usd,
            'repay_ratio': repay_ratio,
            'liquidation_ratio': liquidation_ratio,
            'unique_assets': unique_assets,
            'borrow_deposit_ratio': borrow_deposit_ratio,
            'withdraw_deposit_ratio': withdraw_deposit_ratio,
            'avg_time_between_txs': avg_time_between_txs,
            'std_time_between_txs': std_time_between_txs,
            'first_tx_time': first_tx_time,
            'last_tx_time': last_tx_time,
            'time_consistency': time_cv,
            'amount_volatility': amount_volatility,
            'repay_to_borrow_ratio': repay_to_borrow_ratio,
            'days_since_last_tx': days_since_last_tx
        }
        
        # Add asset-specific metrics
        asset_counts = wallet_txs['asset_symbol'].value_counts()
        for asset, count in asset_counts.items():
            features[f'asset_{asset}_count'] = count
            
        # Add transaction type percentages
        for tx_type in ['deposits', 'borrows', 'repays', 'withdraws', 'liquidations']:
            features[f'{tx_type}_pct'] = len(wallet_txs[wallet_txs['tx_type'] == tx_type]) / len(wallet_txs)
            
        wallet_features.append(features)
        
    return pd.DataFrame(wallet_features)

In [22]:
# Generate wallet features
wallet_features_df = engineer_wallet_features(all_transactions)
print(f"Generated features for {len(wallet_features_df)} wallets")
print(f"Feature columns: {wallet_features_df.columns.tolist()}")

# Save the features
wallet_features_df.to_csv("Deliverables/wallet_features.csv", index=False)
print("Wallet features saved to wallet_features.csv")

Generated features for 6607 wallets
Feature columns: ['wallet_id', 'tx_count', 'deposit_count', 'borrow_count', 'repay_count', 'withdraw_count', 'liquidated_count', 'liquidator_count', 'is_liquidator', 'account_age_days', 'tx_frequency', 'total_deposit_usd', 'total_borrow_usd', 'total_repay_usd', 'total_withdraw_usd', 'total_liquidated_usd', 'repay_ratio', 'liquidation_ratio', 'unique_assets', 'borrow_deposit_ratio', 'withdraw_deposit_ratio', 'avg_time_between_txs', 'std_time_between_txs', 'first_tx_time', 'last_tx_time', 'time_consistency', 'amount_volatility', 'repay_to_borrow_ratio', 'days_since_last_tx', 'asset_DAI_count', 'asset_ETH_count', 'asset_USDC_count', 'asset_BAT_count', 'asset_ZRX_count', 'asset_WBTC_count', 'deposits_pct', 'borrows_pct', 'repays_pct', 'withdraws_pct', 'liquidations_pct', 'asset_REP_count']
Wallet features saved to wallet_features.csv


# Detect Anomalied

In [23]:
def detect_anomalies(df, contamination=0.05):
    """Detect anomalous wallets using Isolation Forest."""
    
    anomaly_features = [
        'tx_count', 'deposit_count', 'borrow_count', 'repay_count', 
        'withdraw_count', 'liquidated_count', 'tx_frequency',
        'total_deposit_usd', 'total_borrow_usd', 'total_repay_usd', 
        'total_withdraw_usd', 'repay_ratio', 'borrow_deposit_ratio',
        'withdraw_deposit_ratio', 'avg_time_between_txs', 'amount_volatility'
    ]
    
    # Ensure all features exist
    anomaly_features = [f for f in anomaly_features if f in df.columns]
    
    # Scale the features
    scaler = StandardScaler()
    scaled_features = scaler.fit_transform(df[anomaly_features])
    
    # Apply Isolation Forest for anomaly detection
    isoforest = IsolationForest(contamination=contamination, random_state=42, n_estimators=100)
    df['anomaly_score'] = isoforest.fit_predict(scaled_features)
    
    # Add anomaly probability score (higher means more anomalous)
    df['anomaly_probability'] = -isoforest.score_samples(scaled_features)
    
    return df

# Credit Scoring Model

In [24]:
def create_credit_scoring_model(wallet_features):
    """Create a credit scoring model based on wallet features."""

    df = wallet_features.copy()
    
    # Handle missing values
    df = df.fillna(0)
    
    # Detect anomalies
    df = detect_anomalies(df)
    
    # 1. Repayment behavior (30% of score)
    repayment_score = 30 * np.where(df['repay_ratio'] > 0.95, 1, 
                     np.where(df['repay_ratio'] > 0.8, 0.8,
                     np.where(df['repay_ratio'] > 0.6, 0.6,
                     np.where(df['repay_ratio'] > 0.4, 0.4,
                     np.where(df['repay_ratio'] > 0.2, 0.2, 0)))))
    
    # 2. Liquidation risk (25% of score)
    liquidation_score = 25 * (1 - np.clip(df['liquidation_ratio'] * 5, 0, 1))
    
    # 3. Account stability (15% of score)
    stability_score = 15 * np.where(df['account_age_days'] > 180, 1,
                      np.where(df['account_age_days'] > 90, 0.8,
                      np.where(df['account_age_days'] > 30, 0.6,
                      np.where(df['account_age_days'] > 7, 0.4, 0.2))))
    
    # 4. Transaction patterns (15% of score)
    # Identify potential bots or exploitative behavior
    df['potential_bot'] = (df['tx_frequency'] > df['tx_frequency'].quantile(0.95)) & \
                          (df['std_time_between_txs'] < df['std_time_between_txs'].quantile(0.10))
    
    # Identify high-risk behavior
    df['high_borrow_risk'] = df['borrow_deposit_ratio'] > df['borrow_deposit_ratio'].quantile(0.75)
    df['high_withdraw_risk'] = df['withdraw_deposit_ratio'] > df['withdraw_deposit_ratio'].quantile(0.75)
    
    tx_pattern_score = 15 * (1 - 
                           np.where(df['potential_bot'] == True, 0.8, 0) -
                           np.where(df['high_borrow_risk'] == True, 0.4, 0) -
                           np.where(df['high_withdraw_risk'] == True, 0.2, 0))
    
    # 5. Asset diversity (15% of score)
    diversity_score = 15 * np.where(df['unique_assets'] > 4, 1,
                      np.where(df['unique_assets'] > 2, 0.7,
                      np.where(df['unique_assets'] > 1, 0.4, 0.2)))
    
    # Combine all components
    df['credit_score'] = (repayment_score + liquidation_score + stability_score + 
                          tx_pattern_score + diversity_score)
    
    # Apply anomaly detection penalty
    df.loc[df['anomaly_score'] == -1, 'credit_score'] *= 0.5
    
    # Ensure score is between 0 and 100
    df['credit_score'] = df['credit_score'].clip(0, 100).round().astype(int)
    
    return df

In [25]:
wallet_scores = create_credit_scoring_model(wallet_features_df)

In [26]:
# Save the full results
wallet_scores.to_csv("Deliverables/all_wallet_scores.csv", index=False)

# Visualization and Results

In [27]:
def generate_visualizations(wallet_scores):
    """Generate visualizations for the credit scoring analysis."""
    # Score distribution
    plt.figure(figsize=(12, 8))
    
    # Main plot - overall distribution
    plt.subplot(2, 2, 1)
    sns.histplot(wallet_scores['credit_score'], bins=20, kde=True)
    plt.title('Distribution of Wallet Credit Scores')
    plt.xlabel('Credit Score')
    plt.ylabel('Count')
    
    # Feature importance visualization
    plt.subplot(2, 2, 2)
    feature_importance = {
        'Repayment Behavior': 30,
        'Liquidation Risk': 25,
        'Account Stability': 15,
        'Transaction Patterns': 15,
        'Asset Diversity': 15
    }
    plt.pie(feature_importance.values(), labels=feature_importance.keys(), autopct='%1.1f%%')
    plt.title('Credit Score Component Weights')
    
    # Score by transaction count
    plt.subplot(2, 2, 3)
    sns.scatterplot(data=wallet_scores, x='tx_count', y='credit_score', alpha=0.5)
    plt.title('Credit Score vs Transaction Count')
    plt.xlabel('Number of Transactions')
    plt.ylabel('Credit Score')
    
    # Score by account age
    plt.subplot(2, 2, 4)
    sns.scatterplot(data=wallet_scores, x='account_age_days', y='credit_score', alpha=0.5)
    plt.title('Credit Score vs Account Age')
    plt.xlabel('Account Age (days)')
    plt.ylabel('Credit Score')
    
    plt.tight_layout()
    plt.savefig('Deliverables/score_analysis.png')
    plt.close()

In [28]:
generate_visualizations(wallet_scores)

# Analysis of Highest and Lowest Scoring Wallets

In [29]:
# Analysis of Highest and Lowest Scoring Wallets
def get_wallet_strengths(wallet):
    """Identify key strengths of a high-scoring wallet."""
    strengths = []
    
    # Check for healthy repayment behavior
    if wallet['repay_ratio'] > 0.95:
        strengths.append("Excellent repayment behavior")
        
    # Check for no liquidations
    if wallet['liquidated_count'] == 0:
        strengths.append("No liquidation history")
        
    # Check for active user
    if wallet['tx_count'] > 10:
        strengths.append("Active and consistent protocol usage")
        
    # Check for diversified assets
    if wallet['unique_assets'] > 3:
        strengths.append("Diversified asset portfolio")
        
    # Check for long-term participation
    if wallet['account_age_days'] > 90:
        strengths.append("Long-term protocol participant")
        
    # Check for normal transaction patterns
    if wallet['anomaly_score'] == 1:
        strengths.append("Normal transaction patterns")
        
    return strengths

def get_wallet_weaknesses(wallet):
    """Identify key weaknesses of a low-scoring wallet."""
    weaknesses = []
    
    # Check for poor repayment behavior
    if wallet['repay_ratio'] < 0.5:
        weaknesses.append("Poor repayment behavior")
        
    # Check for liquidations
    if wallet['liquidated_count'] > 0:
        weaknesses.append(f"Has been liquidated {wallet['liquidated_count']} times")
        
    # Check for bot-like behavior
    if wallet.get('potential_bot', False):
        weaknesses.append("Shows bot-like transaction patterns")
        
    # Check for high borrowing risk
    if wallet.get('high_borrow_risk', False):
        weaknesses.append("High borrowing relative to deposits")
        
    # Check for high withdrawal risk
    if wallet.get('high_withdraw_risk', False):
        weaknesses.append("Excessive withdrawals relative to deposits")
        
    # Check for liquidator behavior
    if wallet['is_liquidator']:
        weaknesses.append("Acts as a liquidator (potentially predatory)")
        
    # Check for anomalous behavior
    if wallet['anomaly_score'] == -1:
        weaknesses.append("Exhibits anomalous transaction patterns")
        
    return weaknesses

def analyze_wallet_examples(wallet_scores, all_transactions, n=5):
    """Analyze examples of high and low scoring wallets."""
    # Get top and bottom n wallets
    top_wallets = wallet_scores.sort_values('credit_score', ascending=False).head(n)
    bottom_wallets = wallet_scores.sort_values('credit_score').head(n)
    
    analysis = {'high_scoring_wallets': [], 'low_scoring_wallets': []}
    
    # Analyze high scoring wallets
    for _, wallet in top_wallets.iterrows():
        wallet_id = wallet['wallet_id']
        wallet_txs = all_transactions[all_transactions['wallet_id'] == wallet_id]
        
        # Calculate additional metrics for analysis
        asset_distribution = wallet_txs['asset_symbol'].value_counts(normalize=True).to_dict()
        avg_tx_size = wallet_txs['amountUSD'].mean()
        max_tx_size = wallet_txs['amountUSD'].max()
        
        # Time-based patterns
        wallet_txs_sorted = wallet_txs.sort_values('timestamp')
        tx_times = pd.to_datetime(wallet_txs_sorted['timestamp'], unit='s')
        hour_distribution = tx_times.dt.hour.value_counts(normalize=True).to_dict()
        weekday_distribution = tx_times.dt.dayofweek.value_counts(normalize=True).to_dict()
        
        analysis['high_scoring_wallets'].append({
            'wallet_id': wallet_id,
            'credit_score': wallet['credit_score'],
            'transaction_count': len(wallet_txs),
            'unique_assets': wallet_txs['asset_symbol'].nunique(),
            'transaction_types': wallet_txs['tx_type'].value_counts().to_dict(),
            'total_volume_usd': wallet_txs['amountUSD'].sum(),
            'repay_ratio': wallet['repay_ratio'],
            'liquidation_ratio': wallet['liquidation_ratio'],
            'account_age_days': wallet['account_age_days'],
            'is_liquidator': wallet['is_liquidator'],
            'key_strengths': get_wallet_strengths(wallet),
            'asset_distribution': asset_distribution,
            'avg_tx_size': avg_tx_size,
            'max_tx_size': max_tx_size,
            'hour_distribution': hour_distribution,
            'weekday_distribution': weekday_distribution
        })
    
    # Analyze low scoring wallets
    for _, wallet in bottom_wallets.iterrows():
        wallet_id = wallet['wallet_id']
        wallet_txs = all_transactions[all_transactions['wallet_id'] == wallet_id]
        
        # Calculate additional metrics for analysis
        asset_distribution = wallet_txs['asset_symbol'].value_counts(normalize=True).to_dict()
        avg_tx_size = wallet_txs['amountUSD'].mean()
        max_tx_size = wallet_txs['amountUSD'].max()
        
        # Time-based patterns
        wallet_txs_sorted = wallet_txs.sort_values('timestamp')
        tx_times = pd.to_datetime(wallet_txs_sorted['timestamp'], unit='s')
        hour_distribution = tx_times.dt.hour.value_counts(normalize=True).to_dict()
        weekday_distribution = tx_times.dt.dayofweek.value_counts(normalize=True).to_dict()
        
        analysis['low_scoring_wallets'].append({
            'wallet_id': wallet_id,
            'credit_score': wallet['credit_score'],
            'transaction_count': len(wallet_txs),
            'unique_assets': wallet_txs['asset_symbol'].nunique(),
            'transaction_types': wallet_txs['tx_type'].value_counts().to_dict(),
            'total_volume_usd': wallet_txs['amountUSD'].sum(),
            'repay_ratio': wallet['repay_ratio'],
            'liquidation_ratio': wallet['liquidation_ratio'],
            'account_age_days': wallet['account_age_days'],
            'is_liquidator': wallet['is_liquidator'],
            'key_weaknesses': get_wallet_weaknesses(wallet),
            'asset_distribution': asset_distribution,
            'avg_tx_size': avg_tx_size,
            'max_tx_size': max_tx_size,
            'hour_distribution': hour_distribution,
            'weekday_distribution': weekday_distribution
        })
    
    with open('Deliverables/wallet_analysis.json', 'w') as f:
        json.dump(analysis, f, indent=2)
        
    # Print a summary of the analysis
    print("\nScoring Wallet Examples:")
    for wallet in analysis['high_scoring_wallets']:
        print(f"Wallet {wallet['wallet_id'][:10]}... Score: {wallet['credit_score']}")
        print(f"Strengths: {', '.join(wallet['key_strengths'])}")
        
    print("\nScoring Wallet Examples:")
    for wallet in analysis['low_scoring_wallets']:
        print(f"Wallet {wallet['wallet_id'][:10]}... Score: {wallet['credit_score']}")
        print(f"Weaknesses: {', '.join(wallet['key_weaknesses'])}")
        
    return analysis

In [30]:
wallet_analysis = analyze_wallet_examples(wallet_scores, all_transactions)


Scoring Wallet Examples:
Wallet 0x003c52a7... Score: 100
Strengths: Excellent repayment behavior, No liquidation history, Active and consistent protocol usage, Diversified asset portfolio, Long-term protocol participant, Normal transaction patterns
Wallet 0xd19604ef... Score: 100
Strengths: Excellent repayment behavior, No liquidation history, Active and consistent protocol usage, Diversified asset portfolio, Long-term protocol participant, Normal transaction patterns
Wallet 0x869a032b... Score: 100
Strengths: Excellent repayment behavior, No liquidation history, Active and consistent protocol usage, Diversified asset portfolio, Long-term protocol participant, Normal transaction patterns
Wallet 0xc95be285... Score: 100
Strengths: Excellent repayment behavior, No liquidation history, Diversified asset portfolio, Long-term protocol participant, Normal transaction patterns
Wallet 0x5445f987... Score: 100
Strengths: Excellent repayment behavior, No liquidation history, Active and consiste

In [31]:
# Save top 1000 wallets to CSV
top_1000_wallets = wallet_scores.sort_values('credit_score', ascending=False).head(1000)
top_1000_wallets[['wallet_id', 'credit_score']].to_csv("Deliverables/top1000wallets.csv", index=False)
print("Top 1000 wallets saved to top1000wallets.csv")

Top 1000 wallets saved to top1000wallets.csv
