# BudgetGuard: Cashflow Forecasting & Goal Risk

**Bayesian Expense Modeling with Monte Carlo Simulation**

This notebook demonstrates:
- 30/60/90-day cashflow forecasts with uncertainty quantification
- Overdraft and savings-goal risk analysis
- Scenario analysis with spending caps
- Variance decomposition to identify risk drivers
- Backtesting and calibration analysis

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

from budgetguard import (
    ExpenseCategory,
    BayesianExpenseModel,
    CashflowSimulator,
    ScenarioAnalyzer,
    ForecastBacktester,
    create_example_model
)

# Plotting configuration
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

print("BudgetGuard modules loaded successfully")

## 1. Setup: Define Expense Model

Create a Bayesian expense model with categories based on your spending patterns.

In [None]:
# Define spending categories with Bayesian priors
categories = {
    'housing': ExpenseCategory(
        name='housing',
        mean=1500.0,  # $1,500/month average
        std=50.0,     # Low variance (fixed rent)
        alpha=2.0,
        beta=2.0
    ),
    'groceries': ExpenseCategory(
        name='groceries',
        mean=400.0,
        std=100.0,    # Moderate variance
        alpha=2.0,
        beta=2.0
    ),
    'dining': ExpenseCategory(
        name='dining',
        mean=300.0,
        std=150.0,    # High variance (discretionary)
        alpha=2.0,
        beta=2.0
    ),
    'transportation': ExpenseCategory(
        name='transportation',
        mean=200.0,
        std=80.0,
        alpha=2.0,
        beta=2.0
    ),
    'subscriptions': ExpenseCategory(
        name='subscriptions',
        mean=100.0,
        std=20.0,     # Very low variance (fixed)
        alpha=2.0,
        beta=2.0
    ),
    'entertainment': ExpenseCategory(
        name='entertainment',
        mean=200.0,
        std=120.0,    # High variance (discretionary)
        alpha=2.0,
        beta=2.0
    )
}

expense_model = BayesianExpenseModel(categories)

# Display category summary
cat_df = pd.DataFrame([
    {
        'Category': cat.name,
        'Monthly Mean': f'${cat.mean:.0f}',
        'Std Dev': f'${cat.std:.0f}',
        'CV (%)': f'{cat.std/cat.mean*100:.1f}'
    }
    for cat in categories.values()
])

print("Expense Categories:")
print(cat_df.to_string(index=False))
print(f"\nTotal Monthly Spending: ${sum(cat.mean for cat in categories.values()):.0f}")

## 2. Multi-Horizon Cashflow Forecasts (30/60/90 Days)

Generate probabilistic forecasts with uncertainty quantification.

In [None]:
# Initialize simulator
simulator = CashflowSimulator(
    initial_balance=5000.0,
    expense_model=expense_model,
    monthly_income=4500.0
)

# Run forecasts for multiple horizons
horizons = [30, 60, 90]
forecasts = {}
savings_goal = 3000.0

print("\nCASHFLOW FORECASTS")
print("=" * 80)

for days in horizons:
    forecast = simulator.simulate(
        days=days,
        n_simulations=10000,
        savings_goal=savings_goal
    )
    forecasts[days] = forecast
    
    print(f"\n{days}-Day Forecast:")
    print(f"  Mean Balance:        ${forecast.mean_balance:>10,.2f}")
    print(f"  Median (P50):        ${forecast.percentile_50:>10,.2f}")
    print(f"  5th Percentile:      ${forecast.percentile_5:>10,.2f}")
    print(f"  95th Percentile:     ${forecast.percentile_95:>10,.2f}")
    print(f"  Overdraft Risk:      {forecast.overdraft_probability:>10.2%}")
    print(f"  Goal Miss Risk:      {forecast.goal_miss_probability:>10.2%}")

### Visualize Forecast Distributions

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

for idx, (days, forecast) in enumerate(forecasts.items()):
    ax = axes[idx]
    
    # Plot distribution
    ax.hist(forecast.simulations, bins=50, alpha=0.7, edgecolor='black')
    
    # Add vertical lines for key statistics
    ax.axvline(forecast.mean_balance, color='red', linestyle='--', 
               linewidth=2, label='Mean')
    ax.axvline(forecast.percentile_50, color='green', linestyle='--', 
               linewidth=2, label='Median')
    ax.axvline(savings_goal, color='orange', linestyle=':', 
               linewidth=2, label='Goal')
    ax.axvline(0, color='darkred', linestyle=':', 
               linewidth=2, label='Overdraft')
    
    ax.set_title(f'{days}-Day Forecast', fontsize=12, fontweight='bold')
    ax.set_xlabel('Final Balance ($)', fontsize=10)
    ax.set_ylabel('Frequency', fontsize=10)
    ax.legend()
    ax.grid(alpha=0.3)

plt.tight_layout()
plt.savefig('forecast_distributions.png', dpi=300, bbox_inches='tight')
plt.show()

print("Distribution plots saved as 'forecast_distributions.png'")

## 3. Scenario Analysis: Impact of Spending Caps

Analyze how category-specific spending caps reduce overdraft risk.

In [None]:
analyzer = ScenarioAnalyzer(simulator)

# Test different cap scenarios
scenario_results = analyzer.analyze_cap_impact(
    days=60,
    category_caps={
        'dining': [200, 250, 300],
        'entertainment': [100, 150, 200],
        'subscriptions': [75, 90]
    }
)

print("\nSCENARIO ANALYSIS: Spending Cap Impact")
print("=" * 80)
print(f"\nBaseline Overdraft Risk: {scenario_results[scenario_results['scenario'] == 'baseline']['overdraft_prob'].values[0]:.2%}")
print("\nRisk Reduction by Category Cap:")

cap_results = scenario_results[scenario_results['scenario'] != 'baseline'].sort_values('risk_reduction', ascending=False)
for _, row in cap_results.iterrows():
    if pd.notna(row['risk_reduction']):
        print(f"  {row['category']:<15} cap ${row['cap']:>6,.0f}: "
              f"Risk {row['overdraft_prob']:>6.2%} "
              f"(reduction: {row['risk_reduction']:>6.2%})")

### Visualize Scenario Impact

In [None]:
# Plot risk reduction by scenario
plot_data = cap_results[pd.notna(cap_results['risk_reduction'])].copy()
plot_data['scenario_label'] = plot_data.apply(
    lambda x: f"{x['category']}\n${x['cap']:.0f}", axis=1
)

plt.figure(figsize=(12, 6))
bars = plt.bar(range(len(plot_data)), plot_data['risk_reduction'] * 100)
plt.xticks(range(len(plot_data)), plot_data['scenario_label'], rotation=45, ha='right')
plt.ylabel('Risk Reduction (%)', fontsize=12)
plt.title('Overdraft Risk Reduction by Spending Cap Scenario', 
          fontsize=14, fontweight='bold')
plt.grid(axis='y', alpha=0.3)

# Color bars by category
colors = {'dining': 'coral', 'entertainment': 'skyblue', 'subscriptions': 'lightgreen'}
for bar, cat in zip(bars, plot_data['category']):
    bar.set_color(colors.get(cat, 'gray'))

plt.tight_layout()
plt.savefig('scenario_analysis.png', dpi=300, bbox_inches='tight')
plt.show()

print("Scenario analysis plot saved as 'scenario_analysis.png'")

## 4. Variance Decomposition: Identify Risk Drivers

Determine which spending categories contribute most to cashflow uncertainty.

In [None]:
variance_results = analyzer.variance_decomposition(days=60)

print("\nVARIANCE DECOMPOSITION (60-Day Horizon)")
print("=" * 80)
print("\nContribution to Cashflow Uncertainty:")
print(variance_results.to_string(index=False))

### Visualize Variance Contributions

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# Pie chart of variance contribution
ax1.pie(variance_results['variance_contribution_pct'], 
        labels=variance_results['category'],
        autopct='%1.1f%%',
        startangle=90)
ax1.set_title('Variance Contribution by Category', fontsize=12, fontweight='bold')

# Bar chart of standard deviations
ax2.barh(variance_results['category'], variance_results['std_dev'])
ax2.set_xlabel('Standard Deviation ($)', fontsize=11)
ax2.set_title('Spending Uncertainty by Category', fontsize=12, fontweight='bold')
ax2.grid(axis='x', alpha=0.3)

plt.tight_layout()
plt.savefig('variance_decomposition.png', dpi=300, bbox_inches='tight')
plt.show()

print("Variance decomposition plots saved as 'variance_decomposition.png'")

## 5. Backtesting: Forecast Accuracy & Calibration

Validate model performance on historical data.

In [None]:
# Generate synthetic historical data for demonstration
np.random.seed(42)
dates = pd.date_range(start='2024-01-01', end='2025-01-01', freq='D')
balances = 5000 + np.cumsum(np.random.normal(10, 100, len(dates)))

historical_data = pd.DataFrame({
    'date': dates,
    'balance': balances,
    'category': 'general',
    'amount': np.random.normal(100, 30, len(dates))
})

# Create backtester
backtester = ForecastBacktester(historical_data)

# Define test periods (every 30 days)
test_periods = [
    (datetime(2024, month, 1), 30) 
    for month in range(1, 11)
]

# Run backtest
backtest_results = backtester.backtest_accuracy(simulator, test_periods)
calibration = backtester.calibration_analysis(backtest_results)

print("\nBACKTEST RESULTS")
print("=" * 80)
print(f"\nMean Absolute Error:     ${calibration['mean_absolute_error']:,.2f}")
print(f"Mean % Error:            {calibration['mean_pct_error']:.2f}%")
print(f"RMSE:                    ${calibration['rmse']:,.2f}")
print(f"90% CI Coverage:         {calibration['confidence_coverage']:.1%}")
print(f"Expected Coverage:       {calibration['expected_coverage']:.1%}")

well_calibrated = abs(calibration['confidence_coverage'] - calibration['expected_coverage']) < 0.05
print(f"\nCalibration Status:      {'✓ Well-calibrated' if well_calibrated else '✗ Needs adjustment'}")

### Visualize Forecast Accuracy

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Forecast vs Actual
axes[0, 0].scatter(backtest_results['actual_balance'], 
                   backtest_results['forecast_mean'],
                   alpha=0.6)
axes[0, 0].plot([backtest_results['actual_balance'].min(), 
                 backtest_results['actual_balance'].max()],
                [backtest_results['actual_balance'].min(), 
                 backtest_results['actual_balance'].max()],
                'r--', label='Perfect Forecast')
axes[0, 0].set_xlabel('Actual Balance ($)')
axes[0, 0].set_ylabel('Forecast Mean ($)')
axes[0, 0].set_title('Forecast vs Actual')
axes[0, 0].legend()
axes[0, 0].grid(alpha=0.3)

# Error distribution
axes[0, 1].hist(backtest_results['error'], bins=15, edgecolor='black', alpha=0.7)
axes[0, 1].axvline(0, color='red', linestyle='--', linewidth=2)
axes[0, 1].set_xlabel('Forecast Error ($)')
axes[0, 1].set_ylabel('Frequency')
axes[0, 1].set_title('Error Distribution')
axes[0, 1].grid(alpha=0.3)

# Error over time
axes[1, 0].plot(backtest_results['start_date'], 
                backtest_results['error'], marker='o')
axes[1, 0].axhline(0, color='red', linestyle='--', linewidth=1)
axes[1, 0].set_xlabel('Date')
axes[1, 0].set_ylabel('Forecast Error ($)')
axes[1, 0].set_title('Error Time Series')
axes[1, 0].grid(alpha=0.3)
axes[1, 0].tick_params(axis='x', rotation=45)

# Confidence interval coverage
coverage = backtest_results['in_90_ci'].value_counts()
axes[1, 1].bar(['Outside CI', 'Inside CI'], 
               [coverage.get(False, 0), coverage.get(True, 0)],
               color=['red', 'green'], alpha=0.7)
axes[1, 1].set_ylabel('Count')
axes[1, 1].set_title('90% Confidence Interval Coverage')
axes[1, 1].grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.savefig('backtest_results.png', dpi=300, bbox_inches='tight')
plt.show()

print("Backtest plots saved as 'backtest_results.png'")

## 6. Summary Report

Comprehensive summary of all analyses.

In [None]:
print("\n" + "=" * 80)
print("BUDGETGUARD ANALYSIS SUMMARY")
print("=" * 80)

print("\n1. FINANCIAL POSITION")
print(f"   Current Balance:      ${simulator.initial_balance:,.2f}")
print(f"   Monthly Income:       ${simulator.monthly_income:,.2f}")
print(f"   Monthly Expenses:     ${sum(cat.mean for cat in categories.values()):,.2f}")
print(f"   Savings Goal:         ${savings_goal:,.2f}")

print("\n2. RISK ASSESSMENT (60-Day Horizon)")
forecast_60 = forecasts[60]
print(f"   Expected Balance:     ${forecast_60.mean_balance:,.2f}")
print(f"   Overdraft Risk:       {forecast_60.overdraft_probability:.2%}")
print(f"   Goal Miss Risk:       {forecast_60.goal_miss_probability:.2%}")

print("\n3. TOP RISK DRIVERS")
for idx, row in variance_results.head(3).iterrows():
    print(f"   {idx+1}. {row['category']:<15} {row['variance_contribution_pct']:.1f}% of variance")

print("\n4. RECOMMENDED ACTIONS")
top_caps = cap_results.nlargest(3, 'risk_reduction')
for idx, row in enumerate(top_caps.itertuples(), 1):
    print(f"   {idx}. Cap {row.category} at ${row.cap:.0f}/month "
          f"→ {row.risk_reduction:.2%} risk reduction")

print("\n5. MODEL PERFORMANCE")
print(f"   Mean Absolute Error:  ${calibration['mean_absolute_error']:,.2f}")
print(f"   Forecast Accuracy:    {100 - abs(calibration['mean_pct_error']):.1f}%")
print(f"   Probability Calibr.:  {calibration['confidence_coverage']:.1%} coverage")

print("\n" + "=" * 80)
print("Analysis complete. All visualizations saved.")
print("=" * 80)