In [1]:
import pandas as pd
import numpy as np
import joblib
import os
import json
from sklearn.preprocessing import StandardScaler
import keras
from keras import layers

def build_autoencoder(input_dim):
    model = keras.Sequential([
        layers.Dense(8, activation='relu', input_shape=(input_dim,)),
        layers.Dense(4, activation='relu'),
        layers.Dense(2, activation='relu'), 
        layers.Dense(4, activation='relu'),
        layers.Dense(8, activation='relu'),
        layers.Dense(input_dim, activation='linear')
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

  if not hasattr(np, "object"):


In [2]:
def train_anomaly_dl(mode='expense'):
    dataset_path = 'personal_finance_tracker_dataset.csv'
    if not os.path.exists(dataset_path):
        print("Dataset not found!")
        return
        
    df = pd.read_csv(dataset_path)
    col_cat = 'category' if mode == 'expense' else 'income_type'
    col_amt = 'monthly_expense_total' if mode == 'expense' else 'monthly_income'
    
    # 1. Prepare Data
    X_raw = df[[col_cat, col_amt]].copy()
    unique_cats = X_raw[col_cat].unique().tolist()
    cat_to_id = {cat: i for i, cat in enumerate(unique_cats)}
    X_raw['cat_label'] = X_raw[col_cat].map(cat_to_id)
    
    X = X_raw[['cat_label', col_amt]].values
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    # 2. Train
    model = build_autoencoder(X_scaled.shape[1])
    model.fit(X_scaled, X_scaled, epochs=50, batch_size=32, verbose=1)
    
    # 3. Save
    model.save(f'dl_anomaly_{mode}_model.keras')
    joblib.dump(scaler, f'scaler_anomaly_{mode}.pkl')
    
    # Save threshold
    reconstructions = model.predict(X_scaled)
    mse = np.mean(np.power(X_scaled - reconstructions, 2), axis=1)
    threshold = np.percentile(mse, 99.5)
    with open(f'threshold_{mode}.json', 'w') as f:
        json.dump({"threshold": float(threshold)}, f)
        
    print(f"{mode.upper()} training complete. Threshold: {threshold}")

train_anomaly_dl('expense')
train_anomaly_dl('income')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 0.9020
Epoch 2/50
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.7071
Epoch 3/50
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.6267
Epoch 4/50
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.5844
Epoch 5/50
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.5560
Epoch 6/50
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.5381
Epoch 7/50
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.5259
Epoch 8/50
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 0.5184
Epoch 9/50
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 0.5133
Epoch 10/50
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.5100
Epoch 11

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.8268
Epoch 2/50
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.6732
Epoch 3/50
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.6151
Epoch 4/50
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.5212
Epoch 5/50
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.2506
Epoch 6/50
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0870
Epoch 7/50
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0588
Epoch 8/50
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0434
Epoch 9/50
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.0333
Epoch 10/50
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 0.0261
Epoch 11/50
[1m94/9

In [7]:
import json, joblib, numpy as np, pandas as pd, tensorflow as tf
from sklearn.preprocessing import StandardScaler

mode = 'income'   # or 'income'

df = pd.read_csv('personal_finance_tracker_dataset.csv')

col_cat = 'category' if mode == 'expense' else 'income_type'
col_amt = 'monthly_expense_total' if mode == 'expense' else 'monthly_income'

X_raw = df[[col_cat, col_amt]].copy()

# recreate category mapping (must match training)
unique_cats = X_raw[col_cat].unique().tolist()
cat_to_id = {cat: i for i, cat in enumerate(unique_cats)}
X_raw['cat_label'] = X_raw[col_cat].map(cat_to_id)

X = X_raw[['cat_label', col_amt]].values

scaler = joblib.load(f'scaler_anomaly_{mode}.pkl')
X_scaled = scaler.transform(X)

model = tf.keras.models.load_model(f'dl_anomaly_{mode}_model.keras')

with open(f'threshold_{mode}.json') as f:
    threshold = json.load(f)['threshold']


In [8]:
reconstructions = model.predict(X_scaled)
mse = np.mean(np.power(X_scaled - reconstructions, 2), axis=1)


[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


In [9]:
anomalies = mse > threshold
print(f"{mode.upper()} anomaly %:", anomalies.mean() * 100)
print("Total anomalies:", anomalies.sum())


INCOME anomaly %: 0.5
Total anomalies: 15
