In [25]:
import pandas as pd
from sklearn.model_selection import train_test_split
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

In [4]:
#load dataset
df = pd.read_csv('/content/Finance_data.csv')

In [5]:
df.head(5)

Unnamed: 0,Income,Fixed Expenses,Discretionary Expenses,Savings,Risk Tolerance,Financial Goal
0,3000,1200,800,1000,Low (1),Emergency Fund
1,4000,1500,1000,1500,Medium (2),Vacation Savings
2,5000,2000,1200,1800,High (3),Retirement Savings
3,4500,1800,1100,1600,Medium (2),Down Payment
4,3500,1400,900,1200,Low (1),Emergency Fund


In [6]:
print(df.isnull().sum())


Income                    0
Fixed Expenses            0
Discretionary Expenses    0
Savings                   0
Risk Tolerance            0
Financial Goal            0
dtype: int64


In [7]:
# Map Risk Tolerance to numeric values
df['Risk Tolerance'] = df['Risk Tolerance'].map({'Low (1)': 1, 'Medium (2)': 2, 'High (3)': 3})

# One-hot encode Financial Goal
df = pd.get_dummies(df, columns=['Financial Goal'], drop_first=True)


In [8]:
df['Total Expenses'] = df['Fixed Expenses'] + df['Discretionary Expenses']


In [9]:
df['Spending Ratio'] = df['Total Expenses'] / df['Income']


In [10]:
df['Savings Rate'] = df['Savings'] / df['Income']


In [12]:
df['Spending Ratio'] = (df['Fixed Expenses'] + df['Discretionary Expenses']) / df['Income']
df['Savings Rate'] = df['Savings'] / df['Income']


In [23]:
# Define features (X) and target (y)
X = df[['Income', 'Fixed Expenses', 'Discretionary Expenses']]
y = df['Savings']


In [24]:


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [26]:


# Initialize and train the model
model = LinearRegression()
model.fit(X_train, y_train)


In [27]:
# Predict on test data
y_pred = model.predict(X_test)


In [28]:
from sklearn.metrics import mean_absolute_error, r2_score

# Calculate MAE and R²
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Absolute Error: {mae}")
print(f"R² Score: {r2}")


Mean Absolute Error: 9.178297836939443
R² Score: 0.9980556265006728


In [29]:
# Define percentages
fixed_expense_ratio = 0.50
discretionary_ratio = 0.30
savings_ratio = 0.20

# Suggest budget
df['Suggested Fixed Expenses'] = df['Income'] * fixed_expense_ratio
df['Suggested Discretionary Expenses'] = df['Income'] * discretionary_ratio
df['Suggested Savings'] = df['Income'] * savings_ratio


In [30]:
df['Fixed Expense Deviation'] = df['Fixed Expenses'] - df['Suggested Fixed Expenses']
df['Discretionary Expense Deviation'] = df['Discretionary Expenses'] - df['Suggested Discretionary Expenses']
df['Savings Deviation'] = df['Savings'] - df['Suggested Savings']


In [31]:
df['Total Expenses'] = df['Fixed Expenses'] + df['Discretionary Expenses']
df['Overspending Alert'] = df['Total Expenses'] > df['Income']


In [32]:
def recommend_investments(risk_tolerance):
    if risk_tolerance == 'Low (1)':
        return "Low-risk investments: Bonds, Savings Accounts"
    elif risk_tolerance == 'Medium (2)':
        return "Moderate-risk investments: Balanced ETFs, Mutual Funds"
    elif risk_tolerance == 'High (3)':
        return "High-risk investments: Stocks, Real Estate"

df['Investment Recommendation'] = df['Risk Tolerance'].map({'Low (1)': 'Low-risk investments',
                                                             'Medium (2)': 'Moderate-risk investments',
                                                             'High (3)': 'High-risk investments'})
