In [1]:
import pandas as pd
import numpy as np
from typing import List
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error
from statsmodels.tsa.api import ExponentialSmoothing
from statsforecast import StatsForecast
from statsforecast.models import AutoARIMA
from datetime import datetime
import tempfile

# -----------------------------
# 模型定义
# -----------------------------
class HistoricAverageModel:
    def fit(self, series: pd.Series):
        self.mean = series.mean()
    def forecast(self, horizon: int) -> np.ndarray:
        return np.full(horizon, self.mean)

# -----------------------------
# 各种模型的预测函数
# -----------------------------
def naive_forecast(series: pd.Series, horizon: int) -> np.ndarray:
    return np.full(horizon, series.iloc[-1])

def drift_forecast(series: pd.Series, horizon: int) -> np.ndarray:
    slope = (series.iloc[-1] - series.iloc[0]) / (len(series) - 1)
    return np.array([series.iloc[-1] + slope * (i + 1) for i in range(horizon)])

def seasonal_naive_forecast(series: pd.Series, horizon: int, season_length: int = 12) -> np.ndarray:
    return series.iloc[-season_length:][-horizon:].values

def ets_forecast(series: pd.Series, horizon: int) -> np.ndarray:
    try:
        model = ExponentialSmoothing(series, trend="add", seasonal="mul", seasonal_periods=12)
        fit = model.fit()
    except Exception:
        model = ExponentialSmoothing(series, trend="add", seasonal="add", seasonal_periods=12)
        fit = model.fit()
    return fit.forecast(horizon)

def arima_forecast(series: pd.Series, horizon: int) -> np.ndarray:
    df = pd.DataFrame({"ds": series.index, "y": series.values}).reset_index(drop=True)
    with tempfile.TemporaryDirectory() as tmpdir:
        sf = StatsForecast(
            models=[AutoARIMA(season_length=12)],
            freq='MS',
            n_jobs=1,
            fallback_model=True
        )
        forecast_df = sf.forecast(df=df, h=horizon, id_col=None, time_col="ds", value_col="y")
        return forecast_df['AutoARIMA'].values

# -----------------------------
# 模型字典统一管理
# -----------------------------
model_functions = {
    "naive": naive_forecast,
    "drift": drift_forecast,
    "seasonal_naive": seasonal_naive_forecast,
    "ETS": ets_forecast,
    "ARIMA": arima_forecast,
    "mean": lambda s, h: HistoricAverageModel().fit(s) or HistoricAverageModel().forecast(h)
}

# -----------------------------
# 交叉验证评估函数
# -----------------------------
def rolling_cv_evaluation(series: pd.Series, model_name: str, forecast_horizon: int, metric: str, step: int = 6) -> float:
    errors = []
    start = 0
    while start + forecast_horizon * 2 <= len(series):
        train = series.iloc[start: start + forecast_horizon]
        test = series.iloc[start + forecast_horizon: start + 2 * forecast_horizon]

        # 模型选择与预测
        if model_name == "mean":
            model = HistoricAverageModel()
            model.fit(train)
            pred = model.forecast(forecast_horizon)
        else:
            pred = model_functions[model_name](train, forecast_horizon)

        # 避免 zero MAPE 问题
        if metric == "mape" and np.any(test.values == 0):
            print(f"Warning: Zero value in test set for MAPE in model {model_name}")
            return np.nan
        elif metric == "mape":
            error = mean_absolute_percentage_error(test, pred)
        elif metric == "mse":
            error = mean_squared_error(test, pred)
        else:
            raise ValueError("Unsupported metric")

        errors.append(error)
        start += step

    return np.mean(errors)

# -----------------------------
# 批量模型评估
# -----------------------------
def evaluate_models(series_dict: dict, forecast_horizon: int, metric: str, categories: dict) -> pd.DataFrame:
    results = []
    for ts_id, series in series_dict.items():
        for model in categories.get(series.name, []):
            error = rolling_cv_evaluation(series, model, forecast_horizon, metric)
            results.append({
                "ts_id": ts_id,
                "model": model,
                metric: error
            })
    return pd.DataFrame(results)

# -----------------------------
# 找出每个时间序列的最佳模型
# -----------------------------
def select_best_models(results_df: pd.DataFrame, metric: str) -> pd.DataFrame:
    return results_df.loc[results_df.groupby("ts_id")[metric].idxmin()].reset_index(drop=True).rename(columns={metric: f"avg_{metric}"})

# -----------------------------
# 对比 Naive 模型与最佳模型
# -----------------------------
def compare_with_naive(best_models_df: pd.DataFrame, results_df: pd.DataFrame, metric: str):
    print("\nComparison with Naive benchmark:")
    for ts_id in best_models_df['ts_id']:
        naive_score = results_df[(results_df['ts_id'] == ts_id) & (results_df['model'] == 'naive')][metric].mean()
        best_row = best_models_df[best_models_df['ts_id'] == ts_id]
        best_model = best_row['model'].values[0]
        best_score = best_row[f"avg_{metric}"].values[0]
        print(f"{ts_id}: Best = {best_model}, {metric.upper()} = {best_score:.4f}, Naive = {naive_score:.4f}")

# -----------------------------
# 示例分类模型清单
# -----------------------------
categories = {
    'Category 1': ["naive", "ETS"],
    'Category 2': ["naive", "drift", "ARIMA"],
    'Category 3': ["naive", "seasonal_naive", "ETS"],
    'Category 4': ["naive", "ARIMA", "drift", "mean"],
    'Category 5': ["naive", "ETS", "ARIMA"]
}



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def forecast_with_best_models(series_dict: dict, best_models_df: pd.DataFrame, forecast_horizon: int) -> pd.DataFrame:
    all_forecasts = []

    for _, row in best_models_df.iterrows():
        ts_id = row['ts_id']
        model_name = row['model']
        series = series_dict[ts_id]

        # 选择模型进行预测
        if model_name == "mean":
            model = HistoricAverageModel()
            model.fit(series)
            forecast = model.forecast(forecast_horizon)
        else:
            forecast = model_functions[model_name](series, forecast_horizon)

        # 时间索引自动推进（月度时间序列）
        last_date = series.index[-1]
        freq = pd.infer_freq(series.index) or 'MS'
        forecast_index = pd.date_range(start=last_date + pd.tseries.frequencies.to_offset(freq), periods=forecast_horizon, freq=freq)

        forecast_series = pd.Series(forecast, index=forecast_index, name=ts_id)
        forecast_df = forecast_series.reset_index()
        forecast_df.columns = ['date', 'forecast']
        forecast_df['ts_id'] = ts_id
        forecast_df['model'] = model_name
        all_forecasts.append(forecast_df)

    return pd.concat(all_forecasts, ignore_index=True)

# 可选：保存到 CSV
def save_forecasts_to_csv(forecast_df: pd.DataFrame, filename: str = "final_forecasts.csv"):
    forecast_df.to_csv(filename, index=False)
    print(f"✅ Forecasts saved to {filename}")
