In [1]:
import os, json
import pandas as pd
import numpy as np
from glob import glob
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

ModuleNotFoundError: No module named 'tensorflow'

In [3]:
# 1) Load & preprocess (same as before)

def load_event_data(data_dir='data', top_n=3):
    files = glob(os.path.join(data_dir, '*.json'))
    files = sorted(files, key=lambda f: os.path.getsize(f), reverse=True)[:top_n]
    df_list = []
    for fp in files:
        data = json.load(open(fp))
        for k, v in data.items():
            if isinstance(v, list) and v and isinstance(v[0], dict):
                df = pd.json_normalize(v)
                df['event_type'] = k
                df_list.append(df)
    return pd.concat(df_list, ignore_index=True)

In [4]:
def preprocess(df):
    df = df.rename(columns={'account.id':'wallet','amountUSD':'usd'})
    df['usd'] = df['usd'].astype(float)
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
    return df


In [5]:
def feature_eng(df):
    counts = df.pivot_table(index='wallet', columns='event_type',
                             values='id', aggfunc='count', fill_value=0).add_suffix('_cnt')
    sums   = df.pivot_table(index='wallet', columns='event_type',
                             values='usd', aggfunc='sum', fill_value=0).add_suffix('_usd')
    stats  = df.groupby('wallet')['usd'] \
               .agg(total_usd='sum', avg_usd='mean', std_usd='std').fillna(0)
    g      = df.sort_values(['wallet','timestamp']).groupby('wallet')
    first  = g['timestamp'].first().rename('first')
    last   = g['timestamp'].last().rename('last')
    tx_cnt = g.size().rename('tx_cnt')
    days   = g['timestamp'].apply(lambda x: x.dt.date.nunique()).rename('days')
    dt_avg = g['timestamp'].apply(lambda x: x.diff().dt.total_seconds().mean()) \
              .fillna(0).rename('dt_avg')
    diversity = df.groupby('wallet')['asset.symbol'].nunique().rename('asset_div')

    feat = pd.concat([counts, sums, stats, first, last,
                      tx_cnt, days, dt_avg, diversity], axis=1).fillna(0)
    # derived
    feat['borrow_repay']   = feat.get('borrow_usd',0) / (feat.get('repay_usd',0)+1e-6)
    feat['withdraw_deposit']= feat.get('withdraw_usd',0)/(feat.get('deposit_usd',0)+1e-6)
    feat['liquidation_rate']= feat.get('liquidation_cnt',0)/(feat['tx_cnt']+1e-6)
    feat['age_days']       = (feat['last'] - feat['first']).dt.total_seconds()/86400
    return feat


In [None]:
def train_autoencoder(X_scaled, encoding_dim=16, epochs=50, batch_size=32):
    input_dim = X_scaled.shape[1]
    model = Sequential([
        Dense(64, activation='relu', input_shape=(input_dim,)),
        Dense(encoding_dim, activation='relu'),
        Dense(64, activation='relu'),
        Dense(input_dim, activation='linear')
    ])
    model.compile(optimizer=Adam(1e-3), loss='mse')
    model.fit(X_scaled, X_scaled,
              epochs=epochs, batch_size=batch_size,
              validation_split=0.1, verbose=1)
    return model
