In [2]:
import os, json
import pandas as pd
import numpy as np
from glob import glob
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

In [3]:
# 1) Load & preprocess (same as before)

def load_event_data(data_dir='../data', top_n=3):
    files = glob(os.path.join(data_dir, '*.json'))
    files = sorted(files, key=lambda f: os.path.getsize(f), reverse=True)[:top_n]
    df_list = []
    for fp in files:
        data = json.load(open(fp))
        for k, v in data.items():
            if isinstance(v, list) and v and isinstance(v[0], dict):
                df = pd.json_normalize(v)
                df['event_type'] = k
                df_list.append(df)
    return pd.concat(df_list, ignore_index=True)

In [4]:
def preprocess(df):
    df = df.rename(columns={'account.id':'wallet','amountUSD':'usd'})
    df['usd'] = df['usd'].astype(float)
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
    return df


In [5]:
def feature_eng(df):
    counts = df.pivot_table(index='wallet', columns='event_type',
                             values='id', aggfunc='count', fill_value=0).add_suffix('_cnt')
    sums   = df.pivot_table(index='wallet', columns='event_type',
                             values='usd', aggfunc='sum', fill_value=0).add_suffix('_usd')
    stats  = df.groupby('wallet')['usd'] \
               .agg(total_usd='sum', avg_usd='mean', std_usd='std').fillna(0)
    g      = df.sort_values(['wallet','timestamp']).groupby('wallet')
    first  = g['timestamp'].first().rename('first')
    last   = g['timestamp'].last().rename('last')
    tx_cnt = g.size().rename('tx_cnt')
    days   = g['timestamp'].apply(lambda x: x.dt.date.nunique()).rename('days')
    dt_avg = g['timestamp'].apply(lambda x: x.diff().dt.total_seconds().mean()) \
              .fillna(0).rename('dt_avg')
    diversity = df.groupby('wallet')['asset.symbol'].nunique().rename('asset_div')

    feat = pd.concat([counts, sums, stats, first, last,
                      tx_cnt, days, dt_avg, diversity], axis=1).fillna(0)
    # derived
    feat['borrow_repay']   = feat.get('borrow_usd',0) / (feat.get('repay_usd',0)+1e-6)
    feat['withdraw_deposit']= feat.get('withdraw_usd',0)/(feat.get('deposit_usd',0)+1e-6)
    feat['liquidation_rate']= feat.get('liquidation_cnt',0)/(feat['tx_cnt']+1e-6)
    feat['age_days']       = (feat['last'] - feat['first']).dt.total_seconds()/86400
    return feat


In [6]:
def train_autoencoder(X_scaled, encoding_dim=16, epochs=50, batch_size=32):
    input_dim = X_scaled.shape[1]
    model = Sequential([
        Dense(64, activation='relu', input_shape=(input_dim,)),
        Dense(encoding_dim, activation='relu'),
        Dense(64, activation='relu'),
        Dense(input_dim, activation='linear')
    ])
    model.compile(optimizer=Adam(1e-3), loss='mse')
    model.fit(X_scaled, X_scaled,
              epochs=epochs, batch_size=batch_size,
              validation_split=0.1, verbose=1)
    return model


In [None]:
raw = load_event_data('../data', top_n=3)
df  = preprocess(raw)
feat= feature_eng(df)

scaler = StandardScaler()
X = scaler.fit_transform(feat.select_dtypes(include=[np.number]))

ae = train_autoencoder(X, encoding_dim=16, epochs=30)

X_rec = ae.predict(X)
mse   = np.mean(np.square(X - X_rec), axis=1)
err_norm = (mse - mse.min()) / (mse.max() - mse.min() + 1e-8)
scores = (1 - err_norm) * 100

out = pd.DataFrame({
    'wallet': feat.index,
    'credit_score': scores
}).sort_values('credit_score', ascending=False).head(1000)

out.to_csv('wallet_scores_dl.csv', index=False)


  df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
I0000 00:00:1746392949.878593   28407 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1746392950.266539   28407 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1746392950.266704   28407 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1746392950.286136   28407 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1746392950.286353   28407 cuda_executor.cc:1001] could not ope

Epoch 1/30


I0000 00:00:1746392952.843190   28779 service.cc:146] XLA service 0x7f22d00173f0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1746392952.843258   28779 service.cc:154]   StreamExecutor device (0): NVIDIA GeForce RTX 3050 6GB Laptop GPU, Compute Capability 8.6
2025-05-04 21:09:12.870603: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-05-04 21:09:13.029479: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 90101


[1m 39/380[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 4ms/step - loss: 0.4029

I0000 00:00:1746392954.986421   28779 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m370/380[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - loss: 0.3904

In [8]:
main()

ValueError: No objects to concatenate