In [1]:

import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import StandardScaler
import joblib
import os

  if not hasattr(np, "object"):


In [2]:
# 1. Load the dataset
df = pd.read_csv('personal_finance_tracker_dataset.csv')
df.head()

Unnamed: 0,date,user_id,monthly_income,monthly_expense_total,savings_rate,budget_goal,financial_scenario,credit_score,debt_to_income_ratio,loan_payment,...,discretionary_spending,essential_spending,income_type,rent_or_mortgage,category,cash_flow_status,financial_advice_score,financial_stress_level,actual_savings,savings_goal_met
0,2019-01-01,1584,3119.58,3212.07,0.38,3676.11,inflation,721.0,0.56,125.77,...,857.55,1910.85,Freelance,1501.65,Investments,Positive,8.3,Low,0.0,0
1,2019-01-31,1045,3262.44,3732.81,0.1,2607.17,inflation,670.0,0.42,454.19,...,534.51,3165.2,Salary,1603.17,Investments,Positive,22.6,Low,0.0,0
2,2019-03-02,1756,2931.2,3335.58,0.15,3004.14,inflation,691.0,0.24,971.82,...,353.67,1504.56,Freelance,1097.82,Healthcare,Positive,58.8,Low,0.0,0
3,2019-04-01,1724,3506.79,2327.59,0.17,3346.97,normal,717.0,0.16,482.76,...,594.08,1450.72,Freelance,1155.64,Groceries,Positive,74.5,Low,1179.2,0
4,2019-05-01,1600,4606.87,2182.58,0.34,2670.09,inflation,795.0,0.25,263.74,...,556.86,1000.0,Salary,1170.86,Utilities,Negative,38.7,High,2424.29,0


In [3]:
# 2. Prepare Data (Aggregation and Sequence Creation)
# We group by user and month to get monthly totals, then create a sliding window of 3 months.

df['date'] = pd.to_datetime(df['date'])
df['month'] = df['date'].dt.to_period('M')

monthly_data = df.groupby(['user_id', 'month'])[['monthly_expense_total', 'monthly_income']].sum().reset_index()
monthly_data = monthly_data.sort_values(['user_id', 'month'])
monthly_data.head(10)

Unnamed: 0,user_id,month,monthly_expense_total,monthly_income
0,1000,2019-09,4718.17,3374.92
1,1000,2020-02,2088.04,3381.06
2,1000,2022-09,2400.73,2532.54
3,1001,2019-05,1671.02,4301.38
4,1001,2019-12,4082.16,5343.85
5,1001,2020-07,2119.47,3542.3
6,1002,2020-04,2307.9,4712.63
7,1003,2021-11,3552.34,3465.95
8,1003,2021-12,3222.0,4757.29
9,1003,2022-01,2797.07,4873.11


In [4]:
def create_sequences(data, target_col, window_size=3):
    X, y = [], []
    for user in data['user_id'].unique():
        user_data = data[data['user_id'] == user][target_col].values
        if len(user_data) > window_size:
            for i in range(len(user_data) - window_size):
                X.append(user_data[i:i+window_size])
                y.append(user_data[i+window_size])
    return np.array(X), np.array(y)

X_exp, y_exp = create_sequences(monthly_data, 'monthly_expense_total')
X_inc, y_inc = create_sequences(monthly_data, 'monthly_income')

print(f"Expense sequences: {len(X_exp)}")
print(f"Income sequences: {len(X_inc)}")

Expense sequences: 601
Income sequences: 601


In [6]:
# 3. Scaling Data (Critical for Deep Learning)
scaler_exp = StandardScaler()
X_exp_scaled = scaler_exp.fit_transform(X_exp)

scaler_inc = StandardScaler()
X_inc_scaled = scaler_inc.fit_transform(X_inc)

# Save scalers for prediction
joblib.dump(scaler_exp, 'scaler_expense.pkl')
joblib.dump(scaler_inc, 'scaler_income.pkl')

print("Scalers saved successfully.")

Scalers saved successfully.


In [7]:
# 4. Define and Train Expense Model
def build_model(input_shape):
    model = Sequential([
        Dense(32, activation='relu', input_shape=(input_shape,)),
        Dropout(0.2),
        Dense(16, activation='relu'),
        Dropout(0.1),
        Dense(8, activation='relu'),
        Dense(1) # Linear output for regression
    ])
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

expense_model = build_model(X_exp_scaled.shape[1])
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

print("Training Expense Model...")
expense_model.fit(X_exp_scaled, y_exp, epochs=100, batch_size=32, validation_split=0.2, callbacks=[early_stop], verbose=1)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training Expense Model...
Epoch 1/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 63ms/step - loss: 10549877.0000 - mae: 3099.8259 - val_loss: 11293501.0000 - val_mae: 3179.1538
Epoch 2/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 10548478.0000 - mae: 3099.6003 - val_loss: 11291922.0000 - val_mae: 3178.9021
Epoch 3/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 10546630.0000 - mae: 3099.3032 - val_loss: 11289138.0000 - val_mae: 3178.4512
Epoch 4/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - loss: 10543627.0000 - mae: 3098.8196 - val_loss: 11285577.0000 - val_mae: 3177.8743
Epoch 5/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - loss: 10539824.0000 - mae: 3098.2109 - val_loss: 11280846.0000 - val_mae: 3177.1101
Epoch 6/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 10535165.0000 - mae:

<keras.src.callbacks.history.History at 0x2230846f4d0>

In [8]:
# 5. Define and Train Income Model
income_model = build_model(X_inc_scaled.shape[1])

print("\nTraining Income Model...")
income_model.fit(X_inc_scaled, y_inc, epochs=100, batch_size=32, validation_split=0.2, callbacks=[early_stop], verbose=1)


Training Income Model...
Epoch 1/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 44ms/step - loss: 18507336.0000 - mae: 4134.0972 - val_loss: 18368282.0000 - val_mae: 4101.2842
Epoch 2/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 18504418.0000 - mae: 4133.7466 - val_loss: 18364868.0000 - val_mae: 4100.8672
Epoch 3/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - loss: 18500710.0000 - mae: 4133.3047 - val_loss: 18360130.0000 - val_mae: 4100.2852
Epoch 4/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 18495832.0000 - mae: 4132.7041 - val_loss: 18353360.0000 - val_mae: 4099.4536
Epoch 5/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 18488522.0000 - mae: 4131.8120 - val_loss: 18343532.0000 - val_mae: 4098.2456
Epoch 6/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 18476936.0000 - mae:

<keras.src.callbacks.history.History at 0x22309510550>

In [9]:
# 6. Save Models
expense_model.save('dl_expense_model.keras')
income_model.save('dl_income_model.keras')

print("Models saved as dl_expense_model.keras and dl_income_model.keras")

Models saved as dl_expense_model.keras and dl_income_model.keras
