In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor, RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
import json
import os
from datetime import datetime

In [None]:
# Load dataset
df = pd.read_csv('finance_data.csv')
print(f"Dataset loaded: {df.shape[0]} records")

In [None]:
# Encode categorical columns
le_occ = LabelEncoder()
le_city = LabelEncoder()
df['Occupation_encoded'] = le_occ.fit_transform(df['Occupation'])
df['City_Tier_encoded'] = le_city.fit_transform(df['City_Tier'])

In [None]:
# Create expense columns
expense_cols = ['Rent','Loan_Repayment','Insurance','Groceries','Transport','Eating_Out',
                'Entertainment','Utilities','Healthcare','Education','Miscellaneous']
df['Total_Expenses'] = df[expense_cols].sum(axis=1)

In [None]:
# Train savings prediction model
features = [
    'Income', 'Age', 'Dependents', 'Occupation_encoded', 'City_Tier_encoded',
    'Total_Expenses', 'Desired_Savings_Percentage', 'Disposable_Income'
]
X = df[features]
y = df['Desired_Savings']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model_savings = GradientBoostingRegressor(random_state=42)
model_savings.fit(X_train, y_train)
print("Savings model trained")

In [None]:
# Train overspending risk model
# Overspending = when expenses + desired savings exceed income
df['Required_Total'] = df['Total_Expenses'] + df['Desired_Savings']
df['Overspend'] = (df['Required_Total'] > df['Income']).astype(int)
X_risk = df[['Income', 'Age', 'Dependents', 'Occupation_encoded', 'City_Tier_encoded', 'Total_Expenses', 'Desired_Savings_Percentage']]
y_risk = df['Overspend']
clf_risk = RandomForestClassifier(random_state=42)
clf_risk.fit(X_risk, y_risk)
print("Risk model trained")

In [None]:
# User prediction function
_default_payload = {
    "Income": 60000.0,
    "Age": 30.0,
    "Dependents": 1.0,
    "Occupation_encoded": 1.0,
    "City_Tier_encoded": 1.0,
    "Total_Expenses": 7800.0,
    "Desired_Savings_Percentage": 15.0,
    "Disposable_Income": 35000.0,
    "Rent": 0.0,
    "Loan_Repayment": 0.0,
    "Insurance": 0.0,
    "Groceries": 0.0,
    "Transport": 0.0,
    "Eating_Out": 0.0,
    "Entertainment": 0.0,
    "Utilities": 0.0,
    "Healthcare": 0.0,
    "Education": 0.0,
    "Miscellaneous": 0.0,
}
_default_labels = {
    "Occupation": "Salaried",
    "City_Tier": "Tier 2",
}

def _normalize_payload(payload: dict) -> dict:
    values = _default_payload.copy()
    for key in values:
        try:
            values[key] = float(payload.get(key, values[key]))
        except (TypeError, ValueError):
            values[key] = values[key]
    return values

def generate_chart_data(user_values):
    """Generate updated chart data based on user input and dataset"""
    # Sample 10 records from dataset for scatter plot
    sample_df = df.sample(n=min(10, len(df)), random_state=42)
    
    # Scatter: Income vs Total Expenses
    scatter_data = {
        "income": sample_df['Income'].tolist(),
        "totalExpenses": sample_df['Total_Expenses'].tolist(),
        "cityTier": sample_df['City_Tier'].tolist(),
        "savingsPct": sample_df['Desired_Savings_Percentage'].tolist()
    }
    
    # Pie: Average expense breakdown
    pie_data = {
        "labels": expense_cols,
        "values": [round(df[col].mean(), 2) for col in expense_cols]
    }
    
    # Bar: Average expenses by occupation
    occupation_stats = df.groupby('Occupation')['Total_Expenses'].mean().sort_values(ascending=False)
    bar_data = {
        "labels": occupation_stats.index.tolist(),
        "values": [round(v, 2) for v in occupation_stats.values.tolist()]
    }
    
    # Projection: 12-month savings based on user's predicted savings
    predicted_monthly = max(user_values.get('predicted_savings', 0), 0)  # Don't project negative savings
    interest_rate = 0.06  # 6% annual
    projection_values = []
    for month in range(1, 13):
        if predicted_monthly > 0:
            projected = predicted_monthly * month * (1 + interest_rate/12)
        else:
            projected = 0
        projection_values.append(round(projected, 2))
    
    projection_data = {
        "months": list(range(1, 13)),
        "values": projection_values
    }
    
    # Heatmap: Correlation matrix
    corr_cols = ['Income', 'Total_Expenses', 'Desired_Savings_Percentage', 'Disposable_Income', 'Dependents']
    corr_matrix = df[corr_cols].corr()
    heatmap_data = {
        "labels": corr_cols,
        "matrix": corr_matrix.values.tolist()
    }
    
    return {
        "scatter": scatter_data,
        "pie": pie_data,
        "bar": bar_data,
        "projection": projection_data,
        "heatmap": heatmap_data
    }

def get_city_tier_adjustments(city_tier):
    """Get city-specific expense recommendations based on tier"""
    # City tier affects cost of living, especially for rent, transport, and dining
    adjustments = {
        'Tier 1': {
            'Rent': 35.0,  # Higher in metro cities
            'Transport': 12.0,  # More expensive commute
            'Eating_Out': 6.0,  # Dining costs more
            'Groceries': 12.0,  # Higher prices
            'Utilities': 6.0,  # Higher rates
            'Entertainment': 6.0,  # More expensive options
            'context': 'metro city (Mumbai, Delhi, Bangalore, etc.)'
        },
        'Tier 2': {
            'Rent': 28.0,  # Moderate
            'Transport': 10.0,
            'Eating_Out': 5.0,
            'Groceries': 10.0,
            'Utilities': 5.0,
            'Entertainment': 5.0,
            'context': 'Tier 2 city (Pune, Jaipur, Chandigarh, etc.)'
        },
        'Tier 3': {
            'Rent': 20.0,  # Lower costs
            'Transport': 7.0,
            'Eating_Out': 4.0,
            'Groceries': 8.0,
            'Utilities': 4.0,
            'Entertainment': 4.0,
            'context': 'Tier 3 city'
        }
    }
    
    # Default to Tier 2 if not found
    return adjustments.get(city_tier, adjustments['Tier 2'])

def analyze_expense_categories(user_values, income, city_tier='Tier 2'):
    """Analyze each expense category and provide detailed breakdown with savings opportunities"""
    user_expenses = {
        'Rent': user_values.get('Rent', 0),
        'Loan_Repayment': user_values.get('Loan_Repayment', 0),
        'Insurance': user_values.get('Insurance', 0),
        'Groceries': user_values.get('Groceries', 0),
        'Transport': user_values.get('Transport', 0),
        'Eating_Out': user_values.get('Eating_Out', 0),
        'Entertainment': user_values.get('Entertainment', 0),
        'Utilities': user_values.get('Utilities', 0),
        'Healthcare': user_values.get('Healthcare', 0),
        'Education': user_values.get('Education', 0),
        'Miscellaneous': user_values.get('Miscellaneous', 0),
    }
    
    # Get city tier adjustments
    city_adjustments = get_city_tier_adjustments(city_tier)
    city_context = city_adjustments['context']
    
    # Filter dataset by city tier for more accurate comparisons
    if city_tier in df['City_Tier'].values:
        city_df = df[df['City_Tier'] == city_tier]
        avg_expenses = {col: city_df[col].mean() for col in expense_cols}
    else:
        avg_expenses = {col: df[col].mean() for col in expense_cols}
    
    # City-adjusted recommended percentages
    recommended_pct = {
        'Rent': city_adjustments.get('Rent', 30.0),
        'Loan_Repayment': 10.0,
        'Insurance': 5.0,
        'Groceries': city_adjustments.get('Groceries', 10.0),
        'Transport': city_adjustments.get('Transport', 10.0),
        'Eating_Out': city_adjustments.get('Eating_Out', 5.0),
        'Entertainment': city_adjustments.get('Entertainment', 5.0),
        'Utilities': city_adjustments.get('Utilities', 5.0),
        'Healthcare': 5.0,
        'Education': 5.0,
        'Miscellaneous': 5.0,
    }
    
    category_analysis = []
    total_potential_savings = 0
    
    for category in expense_cols:
        amount = user_expenses.get(category, 0)
        if amount == 0:
            continue
            
        avg_amount = avg_expenses.get(category, 0)
        recommended_amount = income * (recommended_pct.get(category, 5) / 100)
        user_pct = (amount / income) * 100 if income > 0 else 0
        
        status = "\U0001F7E2 Good"  # Green circle
        potential_saving = 0
        advice = []
        
        # Determine status and potential savings
        if amount > recommended_amount * 1.3:  # 30% above recommended
            status = "\U0001F534 High"  # Red circle
            potential_saving = amount - recommended_amount
            advice.append(f"Reduce to recommended \u20B9{recommended_amount:,.0f} ({recommended_pct.get(category, 5):.0f}% of income for {city_context})")
        elif amount > recommended_amount * 1.1:  # 10% above recommended
            status = "\U0001F7E1 Moderate"  # Yellow circle
            potential_saving = amount - recommended_amount
            advice.append(f"Consider reducing to \u20B9{recommended_amount:,.0f} (typical for {city_context})")
        elif amount > avg_amount * 1.2:  # 20% above dataset average
            status = "\U0001F7E1 Above Average"
            potential_saving = amount - avg_amount
            advice.append(f"{city_tier} average is \u20B9{avg_amount:,.0f}")
        else:
            advice.append(f"Spending is within healthy range for {city_context}")
        
        # Enhanced category-specific tips with city context and detailed advice
        if category == 'Rent' and amount > income * recommended_pct.get('Rent', 30) / 100:
            if city_tier == 'Tier 1':
                advice.append("‚û§ Metro rent is high - consider suburbs (save 30-40%), co-living spaces, or roommates")
            else:
                advice.append("‚û§ Consider relocating to cheaper areas or finding roommates to split costs")
        
        elif category == 'Loan_Repayment' and amount > income * 0.10:
            advice.append("‚û§ High debt burden - consider debt consolidation, balance transfer to lower rate, or longer tenure")
            if amount > income * 0.30:
                advice.append("‚ö†Ô∏è CRITICAL: Debt >30% of income. Contact lender for restructuring immediately")
        
        elif category == 'Insurance' and amount > income * 0.05:
            advice.append("‚û§ Review policies - consider term insurance over ULIPs. Health: \u20B95-10L family floater adequate")
            advice.append("‚û§ Compare premiums across providers annually. Online policies 20-30% cheaper")
        
        elif category == 'Groceries' and amount > income * recommended_pct.get('Groceries', 10) / 100:
            if city_tier == 'Tier 1':
                advice.append("‚û§ Shop at wholesale stores (D-Mart, Metro, BigBasket), buy in bulk. Save \u20B92000-4000/month")
                advice.append("‚û§ Reduce packaged/processed foods, plan weekly menus, freeze leftovers to prevent waste")
            else:
                advice.append("‚û§ Buy seasonal produce from local mandis/markets (30-50% cheaper than supermarkets)")
                advice.append("‚û§ Avoid food waste - plan meals, use leftovers creatively, buy only what you need")
        
        elif category == 'Transport' and amount > income * recommended_pct.get('Transport', 10) / 100:
            if city_tier == 'Tier 1':
                advice.append("‚û§ Metro/local trains 70% cheaper than cabs. Monthly pass: \u20B9800 vs \u20B96000 in cabs")
                advice.append("‚û§ Carpool apps (Quick Ride, sRide) save 50%. Bike pooling for <5km trips")
            else:
                advice.append("‚û§ Public bus passes \u20B9500-800/month vs \u20B93000+ in auto/cabs. Bike for short distances")
                advice.append("‚û§ Maintain vehicle regularly to prevent costly repairs. Proper tire pressure saves 10% fuel")
        
        elif category == 'Eating_Out' and amount > income * recommended_pct.get('Eating_Out', 5) / 100:
            if city_tier == 'Tier 1':
                advice.append("‚û§ Office lunch \u20B9200-300/day = \u20B96000/month waste. Meal prep saves 60-70%")
                advice.append("‚û§ Cook bulk on weekends, freeze portions. Tiffin services \u20B92500/month vs \u20B99000 eating out")
            else:
                advice.append("‚û§ Home cooking saves 70-80%. Pack office lunch (\u20B950 vs \u20B9150-200 outside)")
                advice.append("‚û§ Limit restaurants to 1-2x/month for special occasions only")
        
        elif category == 'Entertainment' and amount > income * recommended_pct.get('Entertainment', 5) / 100:
            if city_tier == 'Tier 1':
                advice.append("‚û§ Free city events, parks, museums on free days. Share OTT subscriptions (\u20B9300 saved)")
                advice.append("‚û§ Cancel unused gym (home workouts free), review all subscriptions quarterly")
            else:
                advice.append("‚û§ Community events, libraries, local parks free. Cancel Netflix if not watching 10+ hrs/month")
                advice.append("‚û§ Download movies on OTT for offline viewing. Reduces repeated subscription renewals")
        
        elif category == 'Utilities' and amount > income * recommended_pct.get('Utilities', 5) / 100:
            if city_tier == 'Tier 1':
                advice.append("‚û§ AC at 24\u00B0C saves \u20B91000-2000/month. LED bulbs save 75% electricity")
                advice.append("‚û§ 5-star appliances save 30-40% power. Unplug devices, solar heater saves \u20B9500/month")
            else:
                advice.append("‚û§ LED bulbs \u20B9200 investment saves \u20B9100/month. Optimize geyser timing (15 min enough)")
                advice.append("‚û§ Unplug phone chargers (vampire power). Fix water leaks immediately")
        
        elif category == 'Healthcare' and amount > income * 0.05:
            advice.append("‚û§ Check health insurance coverage (\u20B95-10L family floater). Preventive care saves lakhs later")
            advice.append("‚û§ Generic medicines 50-80% cheaper (same composition). Annual checkup catches issues early")
            advice.append("‚û§ Dental/eye checkups prevent expensive treatments. Many company insurance covers preventive care")
        
        elif category == 'Education' and amount > income * 0.05:
            advice.append("‚û§ Online courses: Coursera/Udemy 90% off sales (\u20B9400 vs \u20B94000). YouTube tutorials free")
            advice.append("‚û§ Check employer learning reimbursement programs. Many offer \u20B910-50K/year")
            advice.append("‚û§ Public libraries, free certification programs, NPTEL courses at zero cost")
        
        elif category == 'Miscellaneous' and amount > income * 0.05:
            advice.append("‚û§ Track EVERY expense for 30 days - you'll find \u20B92000-5000 in hidden leaks")
            advice.append("‚û§ 24-hour rule: Wait 1 day before any purchase >\u20B91000. Cancel unused subscriptions")
            advice.append("‚û§ Use cash-back apps, credit card rewards. Can save \u20B9500-1000/month on regular spends")
        
        total_potential_savings += potential_saving
        
        category_analysis.append({
            "category": category.replace('_', ' '),
            "current_amount": round(amount, 2),
            "current_percentage": round(user_pct, 2),
            "recommended_amount": round(recommended_amount, 2),
            "recommended_percentage": recommended_pct.get(category, 5),
            "city_tier_average": round(avg_amount, 2),
            "status": status,
            "potential_saving": round(potential_saving, 2),
            "advice": advice
        })
    
    # Sort by potential savings (highest first)
    category_analysis.sort(key=lambda x: x['potential_saving'], reverse=True)
    
    return {
        "categories": category_analysis,
        "total_potential_savings": round(total_potential_savings, 2),
        "city_tier": city_tier,
        "summary": f"Total potential savings across all categories: \u20B9{total_potential_savings:,.2f}/month (based on {city_context} benchmarks)"
    }

def generate_comprehensive_recommendations(user_values, predicted_savings, overspend_prob, category_breakdown):
    """Generate comprehensive recommendations covering investments, insurance, debt, and savings"""
    recommendations = []
    
    income = user_values.get('Income', 0)
    age = user_values.get('Age', 0)
    expenses = user_values.get('Total_Expenses', 0)
    savings_pct = user_values.get('Desired_Savings_Percentage', 0)
    desired_savings_amount = income * (savings_pct / 100)
    available_after_expenses = income - expenses
    city_tier = category_breakdown.get('city_tier', 'Tier 2')
    
    # Current expense analysis
    loan_amount = user_values.get('Loan_Repayment', 0)
    insurance_amount = user_values.get('Insurance', 0)
    
    # === SECTION 1: EXPENSE OPTIMIZATION ===
    recommendations.append("")
    recommendations.append("üìã EXPENSE OPTIMIZATION STRATEGY")
    recommendations.append("")
    
    if available_after_expenses < desired_savings_amount:
        shortfall = desired_savings_amount - available_after_expenses
        recommendations.append(f"üí∞ Primary Goal: Reduce expenses by \u20B9{shortfall:,.0f}/month to meet your {savings_pct}% savings target")
        recommendations.append("")
        
        # Highlight top savings opportunities
        high_priority = [cat for cat in category_breakdown['categories'] if cat['potential_saving'] > 0][:3]
        
        if high_priority:
            recommendations.append("üìä TOP 3 IMMEDIATE ACTIONS:")
            cumulative_savings = 0
            for idx, cat in enumerate(high_priority, 1):
                cumulative_savings += cat['potential_saving']
                recommendations.append(f"   {idx}. {cat['category']}: \u20B9{cat['current_amount']:,.0f} ‚û§ \u20B9{cat['recommended_amount']:,.0f}")
                recommendations.append(f"      ‚û§ Save: \u20B9{cat['potential_saving']:,.0f}/month (\u20B9{cat['potential_saving']*12:,.0f}/year)")
                if cat['advice']:
                    recommendations.append(f"      \u2022 {cat['advice'][0]}")
            
            recommendations.append("")
            if cumulative_savings >= shortfall:
                recommendations.append(f"‚úÖ SUCCESS: These 3 actions would save \u20B9{cumulative_savings:,.0f}/month, covering your \u20B9{shortfall:,.0f} shortfall!")
            else:
                remaining = shortfall - cumulative_savings
                recommendations.append(f"‚ö†Ô∏è PROGRESS: \u20B9{cumulative_savings:,.0f} saved, \u20B9{remaining:,.0f} more needed")
    else:
        surplus = available_after_expenses - desired_savings_amount
        recommendations.append(f"‚úÖ EXCELLENT FINANCES: \u20B9{surplus:,.0f}/month surplus after meeting {savings_pct}% savings goal")
        recommendations.append("")
    
    # === SECTION 2: INVESTMENT STRATEGY ===
    recommendations.append("")
    recommendations.append("üìã INVESTMENT & WEALTH BUILDING")
    recommendations.append("")
    
    investable_amount = max(predicted_savings, 0)
    
    if investable_amount > 0:
        recommendations.append(f"üí∞ Available for Investments: \u20B9{investable_amount:,.0f}/month (\u20B9{investable_amount*12:,.0f}/year)")
        recommendations.append("")
        
        # Age-based investment strategy
        if age < 30:
            recommendations.append("üìä AGGRESSIVE GROWTH PORTFOLIO (Age < 30):")
            equity_amt = investable_amount * 0.70
            debt_amt = investable_amount * 0.20
            liquid_amt = investable_amount * 0.10
            
            recommendations.append(f"   \u2022 Equity Mutual Funds/Index Funds (70%): \u20B9{equity_amt:,.0f}/month")
            recommendations.append(f"     - Nifty 50 Index Fund: \u20B9{equity_amt*0.4:,.0f}")
            recommendations.append(f"     - Mid-Cap Funds: \u20B9{equity_amt*0.3:,.0f}")
            recommendations.append(f"     - International Funds (US): \u20B9{equity_amt*0.3:,.0f}")
            recommendations.append(f"   \u2022 Debt Funds/PPF (20%): \u20B9{debt_amt:,.0f}/month for stability")
            recommendations.append(f"   \u2022 Liquid Emergency Fund (10%): \u20B9{liquid_amt:,.0f}/month")
            
        elif age < 45:
            recommendations.append("üìä BALANCED GROWTH PORTFOLIO (Age 30-45):")
            equity_amt = investable_amount * 0.60
            debt_amt = investable_amount * 0.30
            liquid_amt = investable_amount * 0.10
            
            recommendations.append(f"   \u2022 Equity Funds (60%): \u20B9{equity_amt:,.0f}/month")
            recommendations.append(f"     - Large-Cap Index Funds: \u20B9{equity_amt*0.5:,.0f}")
            recommendations.append(f"     - Diversified Equity Funds: \u20B9{equity_amt*0.5:,.0f}")
            recommendations.append(f"   \u2022 Debt Instruments (30%): \u20B9{debt_amt:,.0f}/month")
            recommendations.append(f"     - PPF/EPF: \u20B9{debt_amt*0.5:,.0f}")
            recommendations.append(f"     - Corporate Bonds/Debt Funds: \u20B9{debt_amt*0.5:,.0f}")
            recommendations.append(f"   \u2022 Liquid Fund (10%): \u20B9{liquid_amt:,.0f}/month")
            
        else:
            recommendations.append("üìä CONSERVATIVE PORTFOLIO (Age 45+):")
            equity_amt = investable_amount * 0.40
            debt_amt = investable_amount * 0.50
            liquid_amt = investable_amount * 0.10
            
            recommendations.append(f"   \u2022 Equity Funds (40%): \u20B9{equity_amt:,.0f}/month")
            recommendations.append(f"     - Large-Cap/Blue-Chip Funds: \u20B9{equity_amt:,.0f}")
            recommendations.append(f"   \u2022 Fixed Income (50%): \u20B9{debt_amt:,.0f}/month")
            recommendations.append(f"     - Senior Citizen Savings Scheme: \u20B9{debt_amt*0.4:,.0f}")
            recommendations.append(f"     - Fixed Deposits: \u20B9{debt_amt*0.3:,.0f}")
            recommendations.append(f"     - Government Bonds: \u20B9{debt_amt*0.3:,.0f}")
            recommendations.append(f"   \u2022 Liquid Funds (10%): \u20B9{liquid_amt:,.0f}/month")
        
        recommendations.append("")
        
        # City-specific investment opportunities
        if city_tier == 'Tier 1':
            recommendations.append("üìä METRO CITY OPPORTUNITIES:")
            recommendations.append("   \u2022 REITs (Real Estate Investment Trusts) - High rental yields")
            recommendations.append("   \u2022 Corporate Bond Funds - Better access to quality instruments")
            recommendations.append("   \u2022 Gold ETFs - Hedge against metro inflation")
        else:
            recommendations.append("üìä NON-METRO ADVANTAGES:")
            recommendations.append("   \u2022 Lower cost of living = Higher savings rate")
            recommendations.append("   \u2022 Consider purchasing property with surplus")
            recommendations.append("   \u2022 Sovereign Gold Bonds - Tax-efficient gold investment")
        
        # Long-term projections
        recommendations.append("")
        recommendations.append("üí∞ WEALTH PROJECTIONS (12% annual returns):")
        for years in [5, 10, 20]:
            future_value = investable_amount * 12 * (((1 + 0.12)**(years) - 1) / 0.12)
            recommendations.append(f"   \u2022 After {years} years: \u20B9{future_value:,.0f}")
    else:
        recommendations.append("‚ö†Ô∏è Currently no surplus for investments. Focus on expense reduction first.")
    
    # === SECTION 3: INSURANCE COVERAGE ===
    recommendations.append("")
    recommendations.append("üìã INSURANCE & RISK PROTECTION")
    recommendations.append("")
    
    # Life insurance rule: 10-15x annual income
    required_life_cover = income * 12 * 10
    
    # Health insurance: ‚Çπ5L minimum, ‚Çπ10L recommended for Tier 1
    if city_tier == 'Tier 1':
        required_health_cover = 1000000  # 10 Lakhs
    else:
        required_health_cover = 500000  # 5 Lakhs
    
    current_insurance_monthly = insurance_amount
    recommended_insurance_monthly = income * 0.05  # 5% of income
    
    recommendations.append(f"üìä LIFE INSURANCE:")
    recommendations.append(f"   \u2022 Recommended Cover: \u20B9{required_life_cover/100000:.1f} Lakhs")
    recommendations.append(f"   \u2022 Best Option: Term Insurance (Pure protection, low premium)")
    recommendations.append(f"   \u2022 Estimated Premium: \u20B9{required_life_cover*0.0005/12:,.0f}/month")
    recommendations.append(f"   ‚ö†Ô∏è AVOID: ULIPs and endowment plans (high charges, poor returns)")
    
    recommendations.append("")
    recommendations.append(f"üìä HEALTH INSURANCE:")
    recommendations.append(f"   \u2022 Recommended Cover: \u20B9{required_health_cover/100000:.0f} Lakhs (family floater)")
    recommendations.append(f"   \u2022 Current Premium: \u20B9{current_insurance_monthly:,.0f}/month")
    
    if current_insurance_monthly < recommended_insurance_monthly * 0.7:
        gap = recommended_insurance_monthly - current_insurance_monthly
        recommendations.append(f"   ‚ö†Ô∏è INCREASE: Add \u20B9{gap:,.0f}/month for adequate coverage")
    elif current_insurance_monthly > recommended_insurance_monthly * 1.5:
        excess = current_insurance_monthly - recommended_insurance_monthly
        recommendations.append(f"   ‚û§ OPTIMIZE: You may be over-insured. Review policies to save \u20B9{excess:,.0f}/month")
    else:
        recommendations.append(f"   ‚úÖ ADEQUATE: Insurance spending is appropriate")
    
    recommendations.append("")
    recommendations.append("üí∞ ADDITIONAL COVERAGE:")
    recommendations.append("   \u2022 Critical Illness Rider: \u20B9500-1000/month (covers 30+ diseases)")
    recommendations.append("   \u2022 Accident Cover: \u20B9200-400/month (\u20B950L-\u20B91Cr cover)")
    if city_tier != 'Tier 1':
        recommendations.append("   \u2022 Property Insurance: \u20B9300-600/month (home + contents)")
    
    # === SECTION 4: DEBT MANAGEMENT ===
    recommendations.append("")
    recommendations.append("üìã DEBT OPTIMIZATION STRATEGY")
    recommendations.append("")
    
    if loan_amount > 0:
        loan_to_income = (loan_amount / income) * 100
        recommendations.append(f"üí∞ Current Debt Burden: \u20B9{loan_amount:,.0f}/month ({loan_to_income:.1f}% of income)")
        recommendations.append("")
        
        if loan_to_income > 40:
            recommendations.append("‚ö†Ô∏è CRITICAL: Debt exceeds 40% of income - High risk!")
            recommendations.append("   üìä PRIORITY ACTIONS:")
            recommendations.append("   1. Pause all new debt/credit card spending")
            recommendations.append("   2. Contact lender for restructuring/longer tenure")
            recommendations.append("   3. Use debt snowball method: Pay off smallest loan first")
            recommendations.append("   4. Consider debt consolidation at lower interest rate")
        elif loan_to_income > 30:
            recommendations.append("‚ö†Ô∏è MODERATE RISK: Debt at 30-40% of income")
            recommendations.append("   üìä OPTIMIZATION TIPS:")
            recommendations.append("   \u2022 Avalanche method: Target highest interest rate debt first")
            recommendations.append("   \u2022 Make bi-weekly payments instead of monthly (saves interest)")
            recommendations.append("   \u2022 Round up payments: Pay \u20B911,000 instead of \u20B910,500")
        else:
            recommendations.append("‚úÖ HEALTHY: Debt below 30% threshold")
            recommendations.append("   üìä ACCELERATE PAYOFF:")
            recommendations.append("   \u2022 Add \u20B91000-2000 extra to principal each month")
            recommendations.append(f"   \u2022 Potential interest saved: \u20B9{loan_amount*0.1*2:,.0f} over loan tenure")
        
        recommendations.append("")
        recommendations.append("üí∞ DEBT PRIORITIZATION (Highest to Lowest Interest):")
        recommendations.append("   1. Credit Card Debt (18-36% APR) - Pay ASAP")
        recommendations.append("   2. Personal Loans (10-18% APR) - Prepay when possible")
        recommendations.append("   3. Car Loans (8-12% APR) - Standard EMI + occasional lump sum")
        recommendations.append("   4. Home Loans (7-9% APR) - Don't rush, invest surplus instead")
        
    else:
        recommendations.append("‚úÖ DEBT-FREE: Excellent position for wealth building!")
        recommendations.append("")
        recommendations.append("üìä MAINTAIN DISCIPLINE:")
        recommendations.append("   \u2022 Use credit cards for rewards, pay in full each month")
        recommendations.append("   \u2022 Avoid personal loans unless absolutely necessary")
        recommendations.append("   \u2022 Build 6-month emergency fund before any major purchases")
        recommendations.append("   \u2022 If buying home: Keep EMI below 30% of income")
    
    # === SECTION 5: EMERGENCY FUND ===
    recommendations.append("")
    recommendations.append("üìã EMERGENCY FUND & LIQUIDITY")
    recommendations.append("")
    
    emergency_target = expenses * 6  # 6 months expenses
    
    recommendations.append(f"üí∞ Target Emergency Fund: \u20B9{emergency_target:,.0f} (6 months expenses)")
    recommendations.append("")
    recommendations.append("üìä BUILD STRATEGY:")
    
    if investable_amount > 0:
        emergency_monthly = min(investable_amount * 0.2, emergency_target / 12)
        recommendations.append(f"   \u2022 Allocate: \u20B9{emergency_monthly:,.0f}/month to emergency fund")
        recommendations.append(f"   \u2022 Timeline: Build in {emergency_target/emergency_monthly:.0f} months")
    
    recommendations.append("")
    recommendations.append("üìä WHERE TO PARK:")
    recommendations.append("   \u2022 Liquid Funds (50%): Instant redemption, 6-7% returns")
    recommendations.append("   \u2022 High-interest Savings (30%): Immediate access")
    recommendations.append("   \u2022 Fixed Deposits with sweep-in (20%): Better rates, some liquidity")
    recommendations.append("   ‚ùå AVOID: Stocks, real estate, or locked-in investments")
    
    # === SECTION 6: TAX OPTIMIZATION ===
    recommendations.append("")
    recommendations.append("üìã TAX SAVING OPPORTUNITIES")
    recommendations.append("")
    
    annual_income = income * 12
    
    if annual_income > 1000000:  # > 10L
        recommendations.append("üìä HIGH INCOME TAX SAVER (30% bracket):")
    elif annual_income > 500000:  # 5-10L
        recommendations.append("üìä MODERATE INCOME TAX SAVER (20% bracket):")
    else:
        recommendations.append("üìä BASIC TAX PLANNING:")
    
    recommendations.append("")
    recommendations.append("üí∞ Section 80C (\u20B91.5L limit):")
    recommendations.append("   \u2022 ELSS Funds: \u20B912,500/month (3-year lock, equity returns)")
    recommendations.append("   \u2022 PPF: \u20B912,500/month (15-year lock, 7.1% safe returns)")
    recommendations.append("   \u2022 Life Insurance Premium: Already counting towards 80C")
    recommendations.append("   \u2022 Home Loan Principal: Auto-qualifies if you have home loan")
    
    recommendations.append("")
    recommendations.append("üí∞ Additional Deductions:")
    recommendations.append("   \u2022 80D: Health insurance premium (\u20B925K-\u20B950K)")
    recommendations.append("   \u2022 80CCD(1B): NPS contribution (\u20B950K over 80C limit)")
    recommendations.append("   \u2022 80E: Education loan interest (No upper limit)")
    recommendations.append("   \u2022 HRA: If paying rent and not owning house")
    
    if annual_income > 500000:
        tax_saved = 150000 * 0.30  # Assuming 30% bracket
        recommendations.append("")
        recommendations.append(f"üí∞ POTENTIAL TAX SAVINGS: \u20B9{tax_saved:,.0f}/year with proper planning")
    
    # === SECTION 7: QUICK WINS ===
    recommendations.append("")
    recommendations.append(f"üìã QUICK WINS FOR {city_tier} CITIES")
    recommendations.append("")
    
    quick_wins = []
    for cat in category_breakdown['categories']:
        if cat['category'] in ['Eating Out', 'Entertainment', 'Miscellaneous', 'Groceries'] and cat['potential_saving'] > 500:
            quick_wins.append({
                'category': cat['category'],
                'saving': cat['potential_saving'],
                'advice': cat['advice'][0] if cat['advice'] else 'Reduce spending'
            })
    
    if quick_wins:
        for idx, win in enumerate(quick_wins[:5], 1):
            recommendations.append(f"{idx}. {win['category']}: Save \u20B9{win['saving']:,.0f}/month")
            recommendations.append(f"   {win['advice']}")
    else:
        recommendations.append("‚úÖ Your discretionary spending is already optimized!")
        recommendations.append("‚û§ Focus on building investments and emergency fund")
    
    # === SECTION 8: RISK MITIGATION ===
    if overspend_prob and overspend_prob > 0.5:
        recommendations.append("")
        recommendations.append("üìã HIGH RISK MITIGATION")
        recommendations.append("")
        recommendations.append(f"‚ö†Ô∏è Overspend Probability: {overspend_prob*100:.1f}%")
        recommendations.append("")
        recommendations.append("üìä IMMEDIATE ACTIONS:")
        recommendations.append("   1. Set up auto-transfer: Move savings on salary day itself")
        recommendations.append("   2. Separate accounts: One for fixed expenses, one for discretionary")
        recommendations.append("   3. Spending alerts: Set up SMS alerts at 50%, 75%, 90% of budget")
        recommendations.append("   4. Weekly reviews: Check expenses every Sunday")
        recommendations.append("   5. 24-hour rule: Wait 24 hours before any purchase > \u20B91000")
    
    return recommendations

def generate_insights(user_values, predicted_savings, overspend_prob):
    """Generate personalized insights based on user data and predictions"""
    insights = []
    
    income = user_values.get('Income', 0)
    expenses = user_values.get('Total_Expenses', 0)
    savings_pct = user_values.get('Desired_Savings_Percentage', 0)
    desired_savings_amount = income * (savings_pct / 100)
    available_after_expenses = income - expenses
    
    # Insight 1: Expense ratio and reality check
    if income > 0:
        expense_ratio = (expenses / income) * 100
        insights.append(f"Your expenses are {expense_ratio:.1f}% of your income (\u20B9{expenses:,.0f} / \u20B9{income:,.0f}).")
        
        # Reality check: Can they actually save?
        if available_after_expenses <= 0:
            insights.append(f"‚ö†Ô∏è Critical: Expenses equal or exceed income. No funds available for savings.")
        elif available_after_expenses < desired_savings_amount:
            shortfall = desired_savings_amount - available_after_expenses
            insights.append(f"‚ö†Ô∏è Gap: Only \u20B9{available_after_expenses:,.0f} available, but \u20B9{desired_savings_amount:,.0f} needed for {savings_pct}% savings goal (shortfall: \u20B9{shortfall:,.0f}).")
        else:
            surplus = available_after_expenses - desired_savings_amount
            insights.append(f"‚úÖ Healthy surplus: \u20B9{available_after_expenses:,.0f} available after expenses. Your {savings_pct}% goal (\u20B9{desired_savings_amount:,.0f}) leaves \u20B9{surplus:,.0f} buffer.")
    
    # Insight 2: Savings rate assessment - compare available vs target
    if available_after_expenses > 0:
        actual_savings_pct = (available_after_expenses / income) * 100
        
        # Compare against user's goal first
        if actual_savings_pct >= savings_pct:
            # Can meet or exceed goal
            if actual_savings_pct >= savings_pct * 1.1:  # 10% above goal
                insights.append(f"‚úÖ Excellent: {actual_savings_pct:.1f}% available, exceeding your {savings_pct}% goal!")
            else:
                insights.append(f"‚úÖ Good: {actual_savings_pct:.1f}% available, meeting your {savings_pct}% savings goal.")
        else:
            # Cannot meet goal
            gap_pct = savings_pct - actual_savings_pct
            insights.append(f"‚ö†Ô∏è Below Target: Only {actual_savings_pct:.1f}% available, falling short of your {savings_pct}% goal by {gap_pct:.1f}%.")
    elif predicted_savings < 0:
        insights.append(f"‚ùå Deficit situation: Your {savings_pct}% target is unachievable with current expenses. Reduce expenses or adjust savings goal.")
    
    # Insight 3: Overspending risk
    if overspend_prob is not None:
        if overspend_prob < 0.3:
            if available_after_expenses >= desired_savings_amount:
                insights.append(f"‚úÖ Low overspending risk ({overspend_prob*100:.1f}%). Income comfortably covers expenses + savings goals.")
            else:
                insights.append(f"‚ö†Ô∏è Low overspending risk ({overspend_prob*100:.1f}%), but savings target exceeds available funds.")
        elif overspend_prob < 0.6:
            insights.append(f"‚ö†Ô∏è Moderate risk ({overspend_prob*100:.1f}%). Expenses + savings goals strain your income.")
        else:
            insights.append(f"‚ùå High risk ({overspend_prob*100:.1f}%). Expenses + savings goals exceed income. Budget adjustment critical.")
    
    # Insight 4: Predicted savings context - clearly show what's achievable vs desired
    if predicted_savings > 0:
        monthly_savings = predicted_savings
        annual_savings = monthly_savings * 12
        
        # Check if there's a shortfall
        if available_after_expenses < desired_savings_amount:
            # User CANNOT meet their goal
            shortfall = desired_savings_amount - available_after_expenses
            insights.append(f"üí∞ Achievable: \u20B9{monthly_savings:,.2f}/month (\u20B9{annual_savings:,.2f}/year). Shortfall: \u20B9{shortfall:,.2f}/month (\u20B9{shortfall * 12:,.2f}/year) below your {savings_pct}% goal of \u20B9{desired_savings_amount:,.2f}/month.")
        else:
            # User CAN meet or exceed their goal
            surplus = available_after_expenses - desired_savings_amount
            if surplus > 100:  # Meaningful surplus
                insights.append(f"üí∞ Target achieved: \u20B9{monthly_savings:,.2f}/month (\u20B9{annual_savings:,.2f}/year) meets your {savings_pct}% goal. Additional \u20B9{surplus:,.2f}/month available for extra savings or investments.")
            else:
                insights.append(f"üí∞ Target achieved: \u20B9{monthly_savings:,.2f}/month (\u20B9{annual_savings:,.2f}/year) meets your {savings_pct}% savings goal.")
    elif predicted_savings < 0:
        deficit = abs(predicted_savings)
        annual_deficit = deficit * 12
        insights.append(f"üí∞ Monthly shortfall: -\u20B9{deficit:,.2f} (-\u20B9{annual_deficit:,.2f} annually). Immediate action required.")
    else:
        insights.append(f"üí∞ Breakeven: Expenses consume all available income after meeting savings target.")
    
    return insights

def run_user_prediction(payload: dict) -> dict:
    values = _normalize_payload(payload)
    occupation_label = payload.get("Occupation", _default_labels["Occupation"])
    city_label = payload.get("City_Tier", _default_labels["City_Tier"])
    
    # Encode categorical values
    if occupation_label:
        try:
            values["Occupation_encoded"] = float(le_occ.transform([occupation_label])[0])
        except Exception:
            pass
    if city_label:
        try:
            values["City_Tier_encoded"] = float(le_city.transform([city_label])[0])
        except Exception:
            pass

    # Calculate savings mathematically
    income = values.get("Income", 0)
    expenses = values.get("Total_Expenses", 0)
    savings_pct = values.get("Desired_Savings_Percentage", 0)
    
    available_after_expenses = income - expenses
    desired_savings_amount = income * (savings_pct / 100)
    
    # Predicted savings is what's left after expenses, capped by desired target
    if available_after_expenses >= desired_savings_amount:
        predicted = desired_savings_amount
    else:
        predicted = available_after_expenses

    # Predict overspend probability
    overspend_payload = pd.DataFrame([{
        "Income": values["Income"],
        "Age": values["Age"],
        "Dependents": values["Dependents"],
        "Occupation_encoded": values["Occupation_encoded"],
        "City_Tier_encoded": values["City_Tier_encoded"],
        "Total_Expenses": values["Total_Expenses"],
        "Desired_Savings_Percentage": values["Desired_Savings_Percentage"]
    }])
    overspend_prob = None
    try:
        overspend_prob = float(clf_risk.predict_proba(overspend_payload)[0][1])
    except Exception:
        overspend_prob = None

    # Generate detailed category breakdown with city tier context
    category_breakdown = analyze_expense_categories(values, income, city_label)
    
    # Generate chart data, insights, and comprehensive recommendations
    values['predicted_savings'] = predicted
    chart_data = generate_chart_data(values)
    insights = generate_insights(values, predicted, overspend_prob)
    recommendations = generate_comprehensive_recommendations(values, predicted, overspend_prob, category_breakdown)

    result = {
        "predicted_desired_savings": round(predicted, 2),
        "desired_savings_amount": round(desired_savings_amount, 2),
        "shortfall": round(max(0, desired_savings_amount - available_after_expenses), 2),
        "overspend_probability": overspend_prob,
        "input": values,
        "generated_at": datetime.utcnow().isoformat() + "Z",
        "charts": chart_data,
        "insights": insights,
        "recommendations": recommendations,
        "expense_breakdown": category_breakdown
    }
    
    # Write result to file
    with open("user_prediction.json", "w", encoding="utf-8") as f:
        json.dump(result, f, ensure_ascii=False, indent=2)
    
    return result


In [None]:
# Execute user prediction if payload provided
if os.environ.get("USER_INPUT_PAYLOAD"):
    payload = json.loads(os.environ["USER_INPUT_PAYLOAD"])
    user_result = run_user_prediction(payload)
    print("USER_RESULT", json.dumps(user_result))
else:
    print("No user input payload provided")