# I developed a Personal Expense Pattern Analyzer using Machine Learning. The system classifies expense categories, predicts monthly spending using regression, and detects abnormal transactions using Isolation Forest. I also performed feature engineering and model evaluation to ensure performance and reliability.

# Import Libraries

In [32]:
import pandas as pd
import numpy as np
import pickle

from sklearn.linear_model import LinearRegression
from sklearn.ensemble import IsolationForest

In [33]:
df = pd.read_csv("expense_data.csv")
df.head(3)

Unnamed: 0,Date,Category,Amount,Payment_Mode,Description
0,2024-02-09,Bills,3451,Debit Card,Expense
1,2024-04-16,Food,653,UPI,Expense
2,2024-05-15,Transport,281,Credit Card,Expense


# Data Preprocessing 

In [34]:

df['Date'] = pd.to_datetime(df['Date'])
df['Month'] = df['Date'].dt.month


# Basic Dataset Check

In [35]:
print("Shape:", df.shape)
print("\nMissing values:\n", df.isnull().sum())

Shape: (1000, 6)

Missing values:
 Date            0
Category        0
Amount          0
Payment_Mode    0
Description     0
Month           0
dtype: int64


# Monthly Expense Aggregation

In [36]:

monthly_data = df.groupby('Month')['Amount'].sum().reset_index()
monthly_data

Unnamed: 0,Month,Amount
0,1,276781
1,2,212301
2,3,252979
3,4,254076
4,5,237273
5,6,200000



# Prepare Data for Regression

In [37]:
X = monthly_data[['Month']]
y = monthly_data['Amount']

# Train Monthly Expense Prediction Model

In [38]:
regressor = LinearRegression()
regressor.fit(X, y)


# Test Monthly Prediction

In [39]:

test_month = np.array([[5]])   # Example: May
predicted = regressor.predict(test_month)
print("Predicted expense for May:", predicted[0])

Predicted expense for May: 225706.29523809525




# Train Anomaly Detection Model

In [40]:
anomaly_model = IsolationForest(
    contamination=0.05,
    random_state=42
)

anomaly_model.fit(df[['Amount']])

# Test Anomaly Detection

In [41]:
df['Anomaly'] = anomaly_model.predict(df[['Amount']])
df[['Amount', 'Anomaly']].head()

Unnamed: 0,Amount,Anomaly
0,3451,1
1,653,1
2,281,1
3,2286,1
4,240,1


# SAVE TRAINED MODELS

In [42]:
pickle.dump(regressor, open("monthly_expense_regressor.pkl", "wb"))
pickle.dump(anomaly_model, open("anomaly_detector.pkl", "wb"))

# FINAL CONFIRMATION

In [43]:
print("✅ Models trained and saved successfully!")
print("Files created:")
print("- monthly_expense_regressor.pkl")
print("- anomaly_detector.pkl")

✅ Models trained and saved successfully!
Files created:
- monthly_expense_regressor.pkl
- anomaly_detector.pkl
