In [None]:
# === Imports ===
import pandas as pd
import numpy as np
from prophet import Prophet
from prophet.diagnostics import cross_validation, performance_metrics
import optuna
import matplotlib.pyplot as plt
import seaborn as sns

# Display settings
pd.set_option("display.max_columns", None)


# === Load and preprocess data ===
df = pd.read_csv("final_df_binary.csv")

# Convert to datetime
df["date"] = pd.to_datetime(df["date"])
df["year"] = df["date"].dt.year

# Define working days
df["is_working_day"] = ((df["is_holiday"] == 0) & (df["is_weekend"] == 0)).astype(int)

# Use only working days until 2023
train_data = df[(df["year"] <= 2023) & (df["is_working_day"] == 1)].copy()

print("Data shape:", train_data.shape)
train_data.head()



# === Initialize containers ===
best_params_dict = {}
results = []
sections = train_data["section_id"].unique()

print(f"Found {len(sections)} sections for tuning.")


# === Section-wise tuning with Optuna ===
for section in sections:
    section_data = train_data[train_data["section_id"] == section]

    # Skip sections with insufficient data
    if len(section_data) < 50:
        print(f"Skipping section {section}: not enough data ({len(section_data)} rows).")
        continue

    # Prepare Prophet input
    prophet_df = section_data[["date", "total_task_time_minutes"]].copy()
    prophet_df.columns = ["ds", "y"]
    prophet_df = prophet_df.sort_values("ds")

    # Define Optuna objective
    def objective(trial):
        changepoint_prior_scale = trial.suggest_float("changepoint_prior_scale", 0.001, 0.5, log=True)
        seasonality_prior_scale = trial.suggest_float("seasonality_prior_scale", 0.1, 10.0, log=True)
        holidays_prior_scale = trial.suggest_float("holidays_prior_scale", 0.1, 10.0, log=True)
        changepoint_range = trial.suggest_float("changepoint_range", 0.6, 0.95)
        seasonality_mode = trial.suggest_categorical("seasonality_mode", ["additive", "multiplicative"])
        growth = trial.suggest_categorical("growth", ["linear", "logistic"])

        model = Prophet(
            yearly_seasonality=True,
            weekly_seasonality=False,
            daily_seasonality=False,
            changepoint_prior_scale=changepoint_prior_scale,
            seasonality_prior_scale=seasonality_prior_scale,
            holidays_prior_scale=holidays_prior_scale,
            changepoint_range=changepoint_range,
            seasonality_mode=seasonality_mode,
            growth=growth
        )

        # If logistic growth, Prophet requires a 'cap' column
        prophet_df_train = prophet_df.copy()
        if growth == "logistic":
            prophet_df_train["cap"] = prophet_df_train["y"].max() * 1.2  # cap = 20% above max observed

        model.fit(prophet_df_train)

        df_cv = cross_validation(
            model,
            horizon="90 days",
            period="30 days",
            initial="730 days",
            parallel=None
        )
        df_p = performance_metrics(df_cv, rolling_window=0.1)
        return df_p["mae"].values[0]

    # Run Optuna
    study = optuna.create_study(direction="minimize")
    study.optimize(objective, n_trials=30, show_progress_bar=True)

    # Best parameters
    best_params = study.best_params
    best_params_dict[section] = best_params

    results.append({
        "section_id": section,
        "best_cps": best_params["changepoint_prior_scale"],
        "best_sps": best_params["seasonality_prior_scale"],
        "best_hps": best_params["holidays_prior_scale"],
        "best_cpr": best_params["changepoint_range"],
        "best_mode": best_params["seasonality_mode"],
        "best_growth": best_params["growth"],
        "best_mae": study.best_value
    })

    print(
        f"Section {section}: "
        f"CPS={best_params['changepoint_prior_scale']:.4f}, "
        f"SPS={best_params['seasonality_prior_scale']:.4f}, "
        f"HPS={best_params['holidays_prior_scale']:.4f}, "
        f"CR={best_params['changepoint_range']:.2f}, "
        f"Mode={best_params['seasonality_mode']}, "
        f"Growth={best_params['growth']}, "
        f"MAE={study.best_value:.3f}"
    )



# === Save results ===
results_df = pd.DataFrame(results)
results_df.to_csv("prophet_tuning_results.csv", index=False)

print("Tuning results saved to prophet_tuning_results.csv")
results_df.head()



# === Visualization: Best MAE by Section ===
plt.figure(figsize=(10, 6))
sns.barplot(x="section_id", y="best_mae", data=results_df)
plt.title("Best MAE by Section (Prophet Tuning)")
plt.xlabel("Section")
plt.ylabel("Best MAE")
plt.xticks(rotation=45)
plt.show()


# === Extra: Optuna study visualization for last section ===
from optuna.visualization import plot_param_importances, plot_optimization_history

print(f"Showing Optuna plots for the last section: {section}")

plot_optimization_history(study).show()
plot_param_importances(study).show()
