# 05 - Forecasting
## Nusantara Food Watch - Time Series Forecasting

**Purpose:** Build predictive models for food price trends

**Input:** Processed CSV from `data/processed/`

**Output:** 
- Forecast results in `data/processed/`
- Model performance charts in `reports/figures/`

---

## Setup

In [None]:
# Add project root to Python path
import sys
from pathlib import Path

project_root = Path.cwd().parent
sys.path.insert(0, str(project_root))

print(f"üìÅ Project root: {project_root}")

In [None]:
# Standard imports
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Time series / Forecasting
# Uncomment the libraries you need:
# from statsmodels.tsa.seasonal import seasonal_decompose
# from statsmodels.tsa.stattools import adfuller, acf, pacf
# from statsmodels.tsa.arima.model import ARIMA
# from statsmodels.tsa.holtwinters import ExponentialSmoothing
# from prophet import Prophet

# Metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Our custom utilities
from src.data_analysis.utils import (
    DataSaver,
    save_csv,
    setup_plot_style
)

from src.data_analysis.config import (
    PROCESSED_DIR, 
    FIGURES_DIR
)

# Setup plotting style
setup_plot_style()
%matplotlib inline

print("‚úÖ Imports complete!")
print(f"\nüìÅ Working directories:")
print(f"   Input/Output (Processed): {PROCESSED_DIR}")
print(f"   Figures: {FIGURES_DIR}")

## Configuration

In [None]:
# Input file from processed folder
INPUT_FILE = 'cleaned_data.csv'  # Change this to your processed file

# Output files
FORECAST_OUTPUT = 'forecast_results.csv'
METRICS_OUTPUT = 'forecast_metrics.csv'

# Forecasting parameters
FORECAST_PERIODS = 30  # Number of days/months to forecast
TRAIN_TEST_SPLIT = 0.8  # 80% train, 20% test

# Model selection
MODEL_TYPE = 'arima'  # 'arima', 'prophet', 'exponential_smoothing', etc.

print(f"üì• Input: {INPUT_FILE}")
print(f"üì§ Forecast output: {FORECAST_OUTPUT}")
print(f"üì§ Metrics output: {METRICS_OUTPUT}")
print(f"üîÆ Forecast periods: {FORECAST_PERIODS}")
print(f"üìä Train/Test split: {TRAIN_TEST_SPLIT}")
print(f"ü§ñ Model: {MODEL_TYPE}")

## Load Data

In [None]:
# Load from processed folder
df = pd.read_csv(PROCESSED_DIR / INPUT_FILE)

# Convert date column
if 'tanggal' in df.columns:
    df['tanggal'] = pd.to_datetime(df['tanggal'])
    df = df.sort_values('tanggal')

print(f"‚úÖ Loaded {len(df):,} records")
print(f"üìä Shape: {df.shape}")
print(f"üìÖ Date range: {df['tanggal'].min()} to {df['tanggal'].max()}")

In [None]:
# Preview
df.head()

---
## Your Analysis Here

Use the cells below for your forecasting logic.

In [None]:
# Example: Train/test split
# split_point = int(len(df) * TRAIN_TEST_SPLIT)
# train = df[:split_point]
# test = df[split_point:]
# 
# print(f"Train: {len(train)} records")
# print(f"Test: {len(test)} records")

In [None]:
# Example: Build and fit model
# model = ARIMA(train['harga'], order=(1, 1, 1))
# fitted_model = model.fit()
# print(fitted_model.summary())

In [None]:
# Example: Generate forecast
# forecast = fitted_model.forecast(steps=FORECAST_PERIODS)
# print(forecast)

---
## Model Evaluation

In [None]:
# Example: Calculate metrics
# from sklearn.metrics import mean_absolute_error, mean_squared_error
# 
# mae = mean_absolute_error(test['harga'], predictions)
# rmse = np.sqrt(mean_squared_error(test['harga'], predictions))
# mape = np.mean(np.abs((test['harga'] - predictions) / test['harga'])) * 100
# 
# print(f"MAE: {mae:.2f}")
# print(f"RMSE: {rmse:.2f}")
# print(f"MAPE: {mape:.2f}%")

---
## Save Results

In [None]:
# Example: Save forecast results
# df_forecast = pd.DataFrame({
#     'date': forecast_dates,
#     'forecast': forecast_values,
#     'lower_bound': lower_ci,
#     'upper_bound': upper_ci
# })
# 
# save_csv(df_forecast, FORECAST_OUTPUT, processed=True)

In [None]:
# Example: Save metrics
# df_metrics = pd.DataFrame({
#     'model': [MODEL_TYPE],
#     'mae': [mae],
#     'rmse': [rmse],
#     'mape': [mape]
# })
# 
# save_csv(df_metrics, METRICS_OUTPUT, processed=True)

In [None]:
# Example: Save forecast plot
# saver = DataSaver()
# saver.save_figure(fig, 'forecast_plot.png', dpi=300)