# Time Series Forecasting with Mixture of Experts

This notebook demonstrates how to use multiple models (ARIMA, ETS, Random Forest) to forecast time series data for multiple SKUs and then combine them using a Mixture of Experts model.

In [None]:

import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error
from sklearn.linear_model import LinearRegression

# Load the data
inventory_data = pd.read_csv('Domestic Auto Inventories.csv', index_col='DATE', parse_dates=True)
production_data = pd.read_csv('Domestic Auto Production.csv', index_col='DATE', parse_dates=True)
sales_data = pd.read_csv('Total Vehicle Sales.csv', index_col='DATE', parse_dates=True)

# Combine the data into a single DataFrame
df = pd.concat([inventory_data, production_data, sales_data], axis=1)

# Fill missing values with forward fill first, then backward fill
df.fillna(method='ffill', inplace=True)
df.fillna(method='bfill', inplace=True)

# Define functions to train models and calculate metrics
def train_models_for_sku(sku_data):
    train, test = sku_data[sku_data.index < '2024-01-01'], sku_data[sku_data.index >= '2024-01-01']
    arima_model = ARIMA(train, order=(5, 1, 0)).fit()
    ets_model = ExponentialSmoothing(train, trend='add', seasonal=None).fit()
    X = np.arange(len(train)).reshape(-1, 1)
    y = train.values
    rf_model = RandomForestRegressor(n_estimators=100).fit(X, y)
    arima_forecast = arima_model.forecast(steps=len(test))
    ets_forecast = ets_model.forecast(steps=len(test))
    rf_forecast = rf_model.predict(np.arange(len(train), len(train) + len(test)).reshape(-1, 1))
    metrics = {
        'ARIMA': calculate_metrics(test, arima_forecast),
        'ETS': calculate_metrics(test, ets_forecast),
        'Random Forest': calculate_metrics(test, rf_forecast)
    }
    return metrics

def calculate_metrics(true_values, predicted_values):
    mae = mean_absolute_error(true_values, predicted_values)
    mape = mean_absolute_percentage_error(true_values, predicted_values)
    wape = np.sum(np.abs(true_values - predicted_values)) / np.sum(np.abs(true_values))
    return mae, mape, wape

def train_mixture_of_experts(sku_data):
    train, test = sku_data[sku_data.index < '2024-01-01'], sku_data[sku_data.index >= '2024-01-01']
    arima_model = ARIMA(train, order=(5, 1, 0)).fit()
    ets_model = ExponentialSmoothing(train, trend='add', seasonal=None).fit()
    X = np.arange(len(train)).reshape(-1, 1)
    y = train.values
    rf_model = RandomForestRegressor(n_estimators=100).fit(X, y)
    arima_forecast = arima_model.forecast(steps=len(test))
    ets_forecast = ets_model.forecast(steps=len(test))
    rf_forecast = rf_model.predict(np.arange(len(train), len(train) + len(test)).reshape(-1, 1))
    combined_forecasts = np.vstack((arima_forecast, ets_forecast, rf_forecast)).T
    meta_model = LinearRegression()
    meta_model.fit(combined_forecasts, test)
    meta_forecast = meta_model.predict(combined_forecasts)
    metrics = calculate_metrics(test, meta_forecast)
    return metrics

# Train models and evaluate them
results = pd.DataFrame(columns=['SKU', 'Model', 'MAE', 'MAPE', 'WAPE'])
moe_results = pd.DataFrame(columns=['SKU', 'Model', 'MAE', 'MAPE', 'WAPE'])

for sku in df.columns:
    sku_data = df[sku]
    metrics = train_models_for_sku(sku_data)
    for model_name, (mae, mape, wape) in metrics.items():
        results = results.append({
            'SKU': sku,
            'Model': model_name,
            'MAE': mae,
            'MAPE': mape,
            'WAPE': wape
        }, ignore_index=True)
    mae, mape, wape = train_mixture_of_experts(sku_data)
    moe_results = moe_results.append({
        'SKU': sku,
        'Model': 'Mixture of Experts',
        'MAE': mae,
        'MAPE': mape,
        'WAPE': wape
    }, ignore_index=True)

# Combine results
all_results = pd.concat([results, moe_results], ignore_index=True)
all_results = all_results.sort_values(by=['SKU', 'MAE', 'MAPE', 'WAPE'], ascending=True)
all_results
