<a href="https://colab.research.google.com/github/daniely67/Credit-Scoring-Task/blob/main/CreditScoring_Autoencoder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [17]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.callbacks import EarlyStopping


In [4]:
# Load your transactions JSON (adjust filename as needed)
df = pd.read_json('user-wallet-transactions.json')

In [6]:
# Parse numeric amount
df['amount'] = pd.to_numeric(df['actionData'].apply(lambda x: x.get('amount', np.nan)), errors='coerce')

# Aggregate per wallet
wallet_features = df.groupby('userWallet').agg(
    total_deposits = pd.NamedAgg(column='amount', aggfunc=lambda x: x[df.loc[x.index, 'action'] == 'deposit'].sum()),
    num_deposits = pd.NamedAgg(column='action', aggfunc=lambda x: (x == 'deposit').sum()),
    total_withdrawals = pd.NamedAgg(column='amount', aggfunc=lambda x: x[df.loc[x.index, 'action'] == 'redeemunderlying'].sum()),
    num_withdrawals = pd.NamedAgg(column='action', aggfunc=lambda x: (x == 'redeemunderlying').sum()),
    total_borrows = pd.NamedAgg(column='amount', aggfunc=lambda x: x[df.loc[x.index, 'action'] == 'borrow'].sum()),
    num_borrows = pd.NamedAgg(column='action', aggfunc=lambda x: (x == 'borrow').sum()),
    total_repays = pd.NamedAgg(column='amount', aggfunc=lambda x: x[df.loc[x.index, 'action'] == 'repay'].sum()),
    num_repays = pd.NamedAgg(column='action', aggfunc=lambda x: (x == 'repay').sum()),
    total_liquidations = pd.NamedAgg(column='amount', aggfunc=lambda x: x[df.loc[x.index, 'action'] == 'liquidationcall'].sum()),
    num_liquidations = pd.NamedAgg(column='action', aggfunc=lambda x: (x == 'liquidationcall').sum())
).fillna(0).reset_index()

wallet_features['withdrawal_deposit_ratio'] = (
    wallet_features['total_withdrawals'] / wallet_features['total_deposits'].replace(0, 1)
)



In [7]:
feature_columns = [
    'total_deposits', 'num_deposits', 'total_withdrawals', 'num_withdrawals',
    'total_borrows', 'num_borrows', 'total_repays', 'num_repays',
    'total_liquidations', 'num_liquidations', 'withdrawal_deposit_ratio'
]
X = wallet_features[feature_columns].values

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [8]:
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Dense

input_dim = X_scaled.shape[1]

input_layer = Input(shape=(input_dim,))
encoded = Dense(8, activation='relu')(input_layer)
encoded = Dense(4, activation='relu')(encoded)
bottleneck = Dense(2, activation='relu')(encoded)
decoded = Dense(4, activation='relu')(bottleneck)
decoded = Dense(8, activation='relu')(decoded)
output_layer = Dense(input_dim, activation='linear')(decoded)

autoencoder = Model(inputs=input_layer, outputs=output_layer)
autoencoder.compile(optimizer='adam', loss='mse')

# Split for robust training
X_train, X_test = train_test_split(X_scaled, test_size=0.2, random_state=42)

early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True, verbose=1)
autoencoder.fit(X_train, X_train,
                epochs=100,
                batch_size=32,
                shuffle=True,
                validation_data=(X_test, X_test),
                callbacks=[early_stop],
                verbose=1)


Epoch 1/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - loss: 0.5257 - val_loss: 1.2384
Epoch 2/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.7222 - val_loss: 1.1937
Epoch 3/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.6335 - val_loss: 1.0957
Epoch 4/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.3173 - val_loss: 1.0685
Epoch 5/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 1.2351 - val_loss: 1.0613
Epoch 6/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.4886 - val_loss: 1.0582
Epoch 7/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.5391 - val_loss: 1.0578
Epoch 8/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1.0591 - val_loss: 1.0579
Epoch 9/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x798ee96a1210>

In [11]:
reconstructed = autoencoder.predict(X_scaled)
mse = np.mean(np.square(X_scaled - reconstructed), axis=1)

# Invert and scale the error: smaller error = higher score (max = 1000, min = 0)
score_norm = 1000 * (1 - (mse - mse.min()) / (mse.max() - mse.min()))
wallet_features['creditScore_autoencoder'] = score_norm.round(2)


[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step


In [12]:
wallet_features[['userWallet', 'creditScore_autoencoder']].to_csv('wallet_credit_scores_autoencoder.csv', index=False)


In [15]:
# Highest scoring (most "normal" or reliable) wallets
wallet_features.sort_values('creditScore_autoencoder', ascending=False).tail(20)


Unnamed: 0,userWallet,total_deposits,num_deposits,total_withdrawals,num_withdrawals,total_borrows,num_borrows,total_repays,num_repays,total_liquidations,num_liquidations,withdrawal_deposit_ratio,creditScore_autoencoder
3298,0x05c18ffc1c74cb67cb26bb5222aaf3355b74bbc3,1.026667e+24,328,7.709248e+23,213,6.6562e+22,27,1.807215e+22,15,0.0,1,0.7509004,988.83
2873,0x04fb136989106430e56f24c3d6a473488235480e,4.68292e+22,279,4.096772e+18,15,159001200000.0,160,19103950000.0,20,0.0,0,8.748328e-05,988.2
447,0x00aac20f271c4731591cca07913e994d6f1075c0,1.014584e+21,289,2.307137e+19,79,5.091632e+21,89,1.392786e+21,176,0.0,0,0.02273974,987.64
2936,0x051ba1cf67593ea9e697a1039c549cc94660c4b2,1.54961e+22,9,0.0,0,7.136e+21,8,0.0,0,0.0,7,0.0,985.41
2579,0x047a96ef72d7ee6a3f193bdb92e998fb300265df,2.429375e+22,179,3.434081e+22,234,1.517891e+23,200,1.518574e+23,207,0.0,0,1.413566,984.81
1399,0x0256b3d533b384fd24bfef8f61491ceea5426b5e,2.742031e+22,31,2.101398e+18,8,9.666308e+21,18,1.224329e+21,10,0.0,8,7.663654e-05,981.35
2571,0x0476f3ee277eb20568ee2369b337f3ce55bc558a,3.242516e+24,511,8.629378e+22,62,2.005945e+24,107,381391900000.0,83,0.0,0,0.02661322,980.01
499,0x00c2e5aa91c2b6927a4e8533482c8217b450a6a1,8.894098e+24,83,7.853321e+24,23,7.00335e+24,35,6.170976e+24,15,0.0,0,0.8829812,978.84
1733,0x02eca8cc78b7d30c1ac5e16988ed2c8a9da658d6,1.212572e+25,32,4.428421e+24,43,9.008065e+24,21,2.013961e+24,71,0.0,0,0.3652088,977.76
2793,0x04d9f6ecd792e48a09fa5dc2138baed8e628a7e5,1.052219e+22,411,7.759629e+21,209,5.065979e+21,178,4.899993e+21,291,0.0,0,0.7374535,970.16
