In [10]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import joblib

In [11]:
# Load the CSV file
df = pd.read_csv('dummy_financial_data.csv')
print(df.head())


   income  essential_spending  discretionary_spending  savings  \
0  141958               64268                   33289    36909   
1   35795               28141                   33024    24803   
2   20860               66044                    6777    20870   
3  123694               77214                   31749    35578   
4  148106               43827                   17338     7588   

  financial_health  
0          Healthy  
1          Healthy  
2          Healthy  
3          Healthy  
4             Poor  


In [12]:
# Features and target
X = df[['income', 'essential_spending', 'discretionary_spending', 'savings']]
y = df['financial_health']

# Encode target labels (Healthy, Moderate, Poor)
le = LabelEncoder()
y_encoded = le.fit_transform(y)  # Converts to 0, 1, 2


In [13]:
# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)


In [14]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Initialize and train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred, target_names=le.classes_))


              precision    recall  f1-score   support

     Healthy       0.95      0.95      0.95        21
    Moderate       0.92      0.92      0.92        12
        Poor       1.00      1.00      1.00         7

    accuracy                           0.95        40
   macro avg       0.96      0.96      0.96        40
weighted avg       0.95      0.95      0.95        40



In [16]:
# Save trained model
joblib.dump(model, 'model.pkl')

# Save label encoder
joblib.dump(le, 'label_encoder.pkl')

['label_encoder.pkl']

In [17]:
def predict_financial_health(income, essential_spending, discretionary_spending, savings):
    input_df = pd.DataFrame([{
        'income': income,
        'essential_spending': essential_spending,
        'discretionary_spending': discretionary_spending,
        'savings': savings
    }])
    
    pred = model.predict(input_df)[0]
    label = le.inverse_transform([pred])[0]
    
    # Optional recommendation message
    if label == 'Healthy':
        advice = "✅ You're financially healthy. Keep it up and start investing."
    elif label == 'Moderate':
        advice = "⚠️ Fair condition. Try to increase your monthly savings."
    else:
        advice = "🚨 Financial health is poor. Cut expenses and save more urgently."
    
    return f"Prediction: {label}\nAdvice: {advice}"


In [18]:
print(predict_financial_health(60000, 25000, 20000, 5000))


Prediction: Poor
Advice: 🚨 Financial health is poor. Cut expenses and save more urgently.
