In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
os.chdir('/content/drive/MyDrive/ml-final/ML-Final')

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller, acf, pacf
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from prophet import Prophet
import pmdarima as pm
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import TimeSeriesSplit
import mlflow
import mlflow.sklearn
import dagshub
import joblib
import pickle
import os
from statsmodels.tsa.stattools import adfuller, kpss

warnings.filterwarnings('ignore')

In [4]:
dagshub.init(repo_owner='egval20', repo_name='ML-Final', mlflow=True)

Output()



Open the following link in your browser to authorize the client:
https://dagshub.com/login/oauth/authorize?state=79f22d82-1d55-493c-a007-7a1952a74d21&client_id=32b60ba385aa7cecf24046d8195a71c07dd345d9657977863b52e7748e0f0f28&middleman_request_id=666d8ce7a8875182d6c7836ea4817bb09a104c2ef48c8f7ccd323509e672ca1c




## Data Prep

In [5]:
import joblib
import sys
sys.path.append('.')
from data_preprocessing_pipeline import *

In [6]:
def get_model_ready_data(pipeline_path='preprocessing_pipeline.pkl'):
    pipeline = joblib.load(pipeline_path)
    def preprocess_for_model(raw_data):
        return pipeline.transform(raw_data)
    return preprocess_for_model, pipeline

preprocess_fn, loaded_pipeline = get_model_ready_data()

train_raw = pd.read_csv('data/train.csv')
test_raw = pd.read_csv('data/test.csv')
stores = pd.read_csv('data/stores.csv')
features = pd.read_csv('data/features.csv')
print(f"Train shape: {train_raw.shape}")
print(f"Test shape: {test_raw.shape}")
print(f"Date range - Train: {train_raw['Date'].min()} to {train_raw['Date'].max()}")
print(f"Date range - Test: {test_raw['Date'].min()} to {test_raw['Date'].max()}")

train_processed = preprocess_fn(train_raw)
test_processed = preprocess_fn(test_raw)
X_train = train_processed[loaded_pipeline.feature_names_]
y_train = train_processed['Weekly_Sales']
X_test = test_processed[loaded_pipeline.feature_names_]

print(f"Preprocessed train shape: {X_train.shape}")
print(f"Preprocessed test shape: {X_test.shape}")

Train shape: (421570, 5)
Test shape: (115064, 4)
Date range - Train: 2010-02-05 to 2012-10-26
Date range - Test: 2012-11-02 to 2013-07-26
Preprocessed train shape: (421570, 62)
Preprocessed test shape: (115064, 62)


In [7]:
def drop_lag_features(data, columns_to_drop):
    existing_cols = [col for col in columns_to_drop if col in data.columns]
    cleaned_data = data.drop(columns=existing_cols)
    print(f"Dropped {len(existing_cols)} lag/MA columns: {existing_cols}")
    return cleaned_data

lag_columns_to_drop = [
    'Sales_Lag_1', 'Sales_Lag_2', 'Sales_Lag_3', 'Sales_Lag_4', 'Sales_Lag_8', 'Sales_Lag_52',
    'Sales_MA_4', 'Sales_MA_8', 'Sales_MA_12',
    'Sales_STD_4', 'Sales_STD_8', 'Sales_STD_12'
]

train_processed_clean = drop_lag_features(train_processed, lag_columns_to_drop)
test_processed_clean = drop_lag_features(test_processed, lag_columns_to_drop)

print(f"Original train shape: {train_processed.shape}")
print(f"Cleaned train shape: {train_processed_clean.shape}")

Dropped 12 lag/MA columns: ['Sales_Lag_1', 'Sales_Lag_2', 'Sales_Lag_3', 'Sales_Lag_4', 'Sales_Lag_8', 'Sales_Lag_52', 'Sales_MA_4', 'Sales_MA_8', 'Sales_MA_12', 'Sales_STD_4', 'Sales_STD_8', 'Sales_STD_12']
Dropped 12 lag/MA columns: ['Sales_Lag_1', 'Sales_Lag_2', 'Sales_Lag_3', 'Sales_Lag_4', 'Sales_Lag_8', 'Sales_Lag_52', 'Sales_MA_4', 'Sales_MA_8', 'Sales_MA_12', 'Sales_STD_4', 'Sales_STD_8', 'Sales_STD_12']
Original train shape: (421570, 64)
Cleaned train shape: (421570, 52)


In [8]:
mlflow.set_experiment("Prophet_Training")
with mlflow.start_run(run_name="SARIMA_Cleaning"):
    mlflow.log_param("columns_to_drop_count", len(lag_columns_to_drop))
    mlflow.log_param("columns_to_drop", lag_columns_to_drop)
    mlflow.log_metric("original_train_rows", train_processed.shape[0])
    mlflow.log_metric("original_train_cols", train_processed.shape[1])
    mlflow.log_metric("final_train_rows", train_processed_clean.shape[0])
    mlflow.log_metric("final_train_cols", train_processed_clean.shape[1])

2025/07/06 14:32:37 INFO mlflow.tracking.fluent: Experiment with name 'Prophet_Training' does not exist. Creating a new experiment.


🏃 View run SARIMA_Cleaning at: https://dagshub.com/egval20/ML-Final.mlflow/#/experiments/6/runs/306643154cf744e7bf4663723b9791e7
🧪 View experiment at: https://dagshub.com/egval20/ML-Final.mlflow/#/experiments/6


In [9]:
class TimeSeriesAnalyzer:
    def __init__(self):
        self.stationarity_results = {}

    def check_stationarity(self, series, name="Series"):
        adf_result = adfuller(series.dropna())
        adf_pvalue = adf_result[1]
        kpss_result = kpss(series.dropna())
        kpss_pvalue = kpss_result[1]

        print(f"\n{name} Stationarity Tests:")
        print(f"ADF Test - p-value: {adf_pvalue:.4f} ({'Stationary' if adf_pvalue < 0.05 else 'Non-stationary'})")
        print(f"KPSS Test - p-value: {kpss_pvalue:.4f} ({'Stationary' if kpss_pvalue > 0.05 else 'Non-stationary'})")

        self.stationarity_results[name] = {
            'adf_pvalue': adf_pvalue,
            'kpss_pvalue': kpss_pvalue,
            'is_stationary': adf_pvalue < 0.05 and kpss_pvalue > 0.05
        }

        return adf_pvalue < 0.05 and kpss_pvalue > 0.05

    def plot_decomposition(self, series, freq=52, title="Time Series Decomposition"):
        decomposition = seasonal_decompose(series, model='additive', period=freq)
        fig, axes = plt.subplots(4, 1, figsize=(15, 12))
        decomposition.observed.plot(ax=axes[0], title=f'{title} - Original')
        decomposition.trend.plot(ax=axes[1], title='Trend')
        decomposition.seasonal.plot(ax=axes[2], title='Seasonal')
        decomposition.resid.plot(ax=axes[3], title='Residual')
        plt.tight_layout()
        plt.show()

        return decomposition

In [10]:
class WalmartTimeSeriesPreprocessor:
    def __init__(self):
        self.store_dept_scalers = {}
        self.is_fitted = False

    def prepare_time_series_data(self, data, target_col='Weekly_Sales'):
        data = data.copy()
        data['Date'] = pd.to_datetime(data['Date'])
        data = data.sort_values(['Store', 'Dept', 'Date'])
        ts_data = []
        store_dept_combinations = data.groupby(['Store', 'Dept']).size().reset_index(name='count')

        for _, row in store_dept_combinations.iterrows():
            store, dept = row['Store'], row['Dept']
            subset = data[(data['Store'] == store) & (data['Dept'] == dept)].copy()
            if len(subset) > 10:
                subset = subset.set_index('Date')
                subset = subset.sort_index()

                ts_data.append({
                    'store': store,
                    'dept': dept,
                    'data': subset,
                    'series': subset[target_col] if target_col in subset.columns else None
                })

        return ts_data

    def create_hierarchical_series(self, data, level='total'):
        data = data.copy()
        data['Date'] = pd.to_datetime(data['Date'])

        if level == 'total':
            series = data.groupby('Date')['Weekly_Sales'].sum().sort_index()
        elif level == 'store':
            series = data.groupby(['Store', 'Date'])['Weekly_Sales'].sum().reset_index()
        elif level == 'dept':
            series = data.groupby(['Dept', 'Date'])['Weekly_Sales'].sum().reset_index()
        else:
            series = data.groupby(['Store', 'Dept', 'Date'])['Weekly_Sales'].sum().reset_index()

        return series

In [11]:
ts_analyzer = TimeSeriesAnalyzer()
ts_preprocessor = WalmartTimeSeriesPreprocessor()

total_sales = ts_preprocessor.create_hierarchical_series(train_processed_clean, level='total')
print(f"Total sales series length: {len(total_sales)}")

store_sales = ts_preprocessor.create_hierarchical_series(train_processed_clean, level='store')
print(f"Store level data shape: {store_sales.shape}")

dept_sales = ts_preprocessor.create_hierarchical_series(train_processed_clean, level='dept')
print(f"Department level data shape: {dept_sales.shape}")

Total sales series length: 143
Store level data shape: (6435, 3)
Department level data shape: (11090, 3)


## Split Data and Evaluation

In [12]:
def create_time_series_splits(data, validation_size=0.2):
    data = data.copy()
    data['Date'] = pd.to_datetime(data['Date'])
    unique_dates = sorted(data['Date'].unique())
    total_periods = len(unique_dates)

    split_point = int(total_periods * (1 - validation_size))
    train_end_date = unique_dates[split_point - 1]
    val_start_date = unique_dates[split_point]

    train_data = data[data['Date'] <= train_end_date]
    val_data = data[data['Date'] >= val_start_date]

    print(f"Training period: {train_data['Date'].min()} to {train_data['Date'].max()}")
    print(f"Validation period: {val_data['Date'].min()} to {val_data['Date'].max()}")
    print(f"Training samples: {len(train_data)}, Validation samples: {len(val_data)}")

    return train_data, val_data

In [13]:
def evaluate_time_series_model(model, train_series, val_series, model_name):
    try:
        fitted_model = model.fit()

        val_steps = len(val_series)
        forecast = fitted_model.forecast(steps=val_steps)

        mae = mean_absolute_error(val_series, forecast)
        mse = mean_squared_error(val_series, forecast)
        rmse = np.sqrt(mse)

        mape = np.mean(np.abs((val_series - forecast) / val_series)) * 100

        metrics = {
            'MAE': mae,
            'MSE': mse,
            'RMSE': rmse,
            'MAPE': mape,
            'AIC': fitted_model.aic,
            'BIC': fitted_model.bic
        }

        print(f"\n{model_name} Validation Results:")
        for metric, value in metrics.items():
            print(f"{metric}: {value:.4f}")

        return fitted_model, forecast, metrics

    except Exception as e:
        print(f"Error evaluating {model_name}: {e}")
        return None, None, None

In [14]:
train_split, val_split = create_time_series_splits(train_processed_clean, validation_size=0.2)

train_total_sales = ts_preprocessor.create_hierarchical_series(train_split, level='total')
val_total_sales = ts_preprocessor.create_hierarchical_series(val_split, level='total')

print(f"Training series length: {len(train_total_sales)}")
print(f"Validation series length: {len(val_total_sales)}")

Training period: 2010-02-05 00:00:00 to 2012-04-06 00:00:00
Validation period: 2012-04-13 00:00:00 to 2012-10-26 00:00:00
Training samples: 335761, Validation samples: 85809
Training series length: 114
Validation series length: 29


# Prophet

In [15]:
class ProphetModeler:
    def __init__(self):
        self.models = {}
        self.predictions = {}

    def prepare_prophet_data(self, series, exog_data=None):
        if isinstance(series, pd.Series):
            df = pd.DataFrame({
                'ds': series.index,
                'y': series.values
            })
        else:
            df = series.rename(columns={'Date': 'ds', 'Weekly_Sales': 'y'})
        if exog_data is not None:
            for col in exog_data.columns:
                if col not in ['Date', 'Weekly_Sales']:
                    df[col] = exog_data[col].values

        return df

    def fit_prophet(self, data, name="Prophet",
                   yearly_seasonality=True,
                   weekly_seasonality=True,
                   holidays=None,
                   regressors=None):
        model = Prophet(
            yearly_seasonality=yearly_seasonality,
            weekly_seasonality=weekly_seasonality,
            seasonality_mode='multiplicative',
            changepoint_prior_scale=0.05,
            interval_width=0.95
        )

        if holidays is not None:
            model.holidays = holidays
        if regressors is not None:
            for regressor in regressors:
                if regressor in data.columns:
                    model.add_regressor(regressor)

        model.fit(data)
        self.models[name] = model
        print(f"{name} Prophet model fitted successfully")
        return model

    def predict(self, model_name, periods, freq='W', include_history=True):
        if model_name not in self.models:
            raise ValueError(f"Model {model_name} not found")

        model = self.models[model_name]
        future = model.make_future_dataframe(periods=periods, freq=freq,
                                           include_history=include_history)
        forecast = model.predict(future)
        self.predictions[model_name] = forecast
        return forecast

    def plot_forecast(self, model_name):
        if model_name not in self.models or model_name not in self.predictions:
            raise ValueError(f"Model or predictions for {model_name} not found")
        model = self.models[model_name]
        forecast = self.predictions[model_name]

        fig1 = model.plot(forecast)
        plt.title(f'{model_name} - Forecast')
        plt.show()

        fig2 = model.plot_components(forecast)
        plt.show()

        return fig1, fig2

In [16]:
def create_holiday_df():
    holidays = []
    super_bowl_dates = ['2010-02-12', '2011-02-11', '2012-02-10', '2013-02-08']
    for date in super_bowl_dates:
        holidays.append({
            'holiday': 'Super Bowl',
            'ds': pd.to_datetime(date),
            'lower_window': -1,
            'upper_window': 1
        })

    labor_day_dates = ['2010-09-10', '2011-09-09', '2012-09-07', '2013-09-06']
    for date in labor_day_dates:
        holidays.append({
            'holiday': 'Labor Day',
            'ds': pd.to_datetime(date),
            'lower_window': -1,
            'upper_window': 1
        })
    thanksgiving_dates = ['2010-11-26', '2011-11-25', '2012-11-23', '2013-11-29']
    for date in thanksgiving_dates:
        holidays.append({
            'holiday': 'Thanksgiving',
            'ds': pd.to_datetime(date),
            'lower_window': -1,
            'upper_window': 2
        })
    christmas_dates = ['2010-12-31', '2011-12-30', '2012-12-28', '2013-12-27']
    for date in christmas_dates:
        holidays.append({
            'holiday': 'Christmas',
            'ds': pd.to_datetime(date),
            'lower_window': -1,
            'upper_window': 1
        })

    return pd.DataFrame(holidays)

In [17]:
def evaluate_prophet_model(model, train_data, val_data, model_name):
    try:

        fitted_model = model.fit(train_data)
        val_periods = len(val_data)
        future = fitted_model.make_future_dataframe(periods=val_periods, freq='W')
        forecast = fitted_model.predict(future)
        val_forecast = forecast.tail(val_periods)['yhat'].values
        val_actual = val_data['y'].values
        mae = mean_absolute_error(val_actual, val_forecast)
        mse = mean_squared_error(val_actual, val_forecast)
        rmse = np.sqrt(mse)
        mape = np.mean(np.abs((val_actual - val_forecast) / val_actual)) * 100

        metrics = {
            'MAE': mae,
            'MSE': mse,
            'RMSE': rmse,
            'MAPE': mape
        }

        print(f"\n{model_name} Validation Results:")
        for metric, value in metrics.items():
            print(f"{metric}: {value:.4f}")

        return fitted_model, val_forecast, metrics

    except Exception as e:
        print(f"Error evaluating {model_name}: {e}")
        return None, None, None


In [18]:
with mlflow.start_run(run_name="Prophet_Training"):
    prophet_modeler = ProphetModeler()
    mlflow.log_param("model_type", "Prophet")
    mlflow.log_param("validation_size", 0.2)
    mlflow.log_param("seasonality_mode", "multiplicative")
    mlflow.log_param("changepoint_prior_scale", 0.05)
    holidays_df = create_holiday_df()

    print("\n1. Training Prophet on Total Sales with Validation")
    train_prophet_data = prophet_modeler.prepare_prophet_data(train_total_sales)
    val_prophet_data = prophet_modeler.prepare_prophet_data(val_total_sales)
    prophet_model = Prophet(
        yearly_seasonality=True,
        weekly_seasonality=True,
        seasonality_mode='multiplicative',
        changepoint_prior_scale=0.05,
        interval_width=0.95,
        holidays=holidays_df
    )
    fitted_prophet, prophet_forecast, prophet_metrics = evaluate_prophet_model(
        prophet_model, train_prophet_data, val_prophet_data, "Prophet_Total"
    )

    if fitted_prophet:
        for metric_name, metric_value in prophet_metrics.items():
            mlflow.log_metric(f"total_{metric_name.lower()}", metric_value)
        full_total_sales = pd.concat([train_total_sales, val_total_sales])
        full_prophet_data = prophet_modeler.prepare_prophet_data(full_total_sales)

        final_prophet_model = Prophet(
            yearly_seasonality=True,
            weekly_seasonality=True,
            seasonality_mode='multiplicative',
            changepoint_prior_scale=0.05,
            interval_width=0.95,
            holidays=holidays_df
        )
        final_prophet_model.fit(full_prophet_data)
        prophet_modeler.models["Prophet_Total"] = final_prophet_model
        with open('models/prophet_total_model.pkl', 'wb') as f:
            pickle.dump(final_prophet_model, f)
        mlflow.log_artifact('models/prophet_total_model.pkl')


    print("\n2. Training Prophet for Top 5 Stores with Validation")
    top_stores = train_split.groupby('Store')['Weekly_Sales'].sum().nlargest(5).index
    store_prophet_models = {}
    store_prophet_metrics = {}

    for store in top_stores:
        print(f"\nTraining Prophet for Store {store}")
        store_train_data = train_split[train_split['Store'] == store]
        store_val_data = val_split[val_split['Store'] == store]
        if len(store_train_data) > 20 and len(store_val_data) > 5:
            store_train_series = store_train_data.groupby('Date')['Weekly_Sales'].sum().sort_index()
            store_val_series = store_val_data.groupby('Date')['Weekly_Sales'].sum().sort_index()
            store_train_prophet = prophet_modeler.prepare_prophet_data(store_train_series)
            store_val_prophet = prophet_modeler.prepare_prophet_data(store_val_series)
            store_prophet_model = Prophet(
                yearly_seasonality=True,
                weekly_seasonality=True,
                seasonality_mode='multiplicative',
                changepoint_prior_scale=0.05,
                holidays=holidays_df
            )
            fitted_store_prophet, store_prophet_forecast, store_prophet_metrics_dict = evaluate_prophet_model(
                store_prophet_model, store_train_prophet, store_val_prophet, f"Prophet_Store_{store}"
            )
            if fitted_store_prophet:
                store_prophet_metrics[store] = store_prophet_metrics_dict
                for metric_name, metric_value in store_prophet_metrics_dict.items():
                    mlflow.log_metric(f"store_{store}_{metric_name.lower()}", metric_value)
                combined_store_data = pd.concat([
                    train_split[train_split['Store'] == store],
                    val_split[val_split['Store'] == store]
                ])
                combined_store_series = combined_store_data.groupby('Date')['Weekly_Sales'].sum().sort_index()
                combined_store_prophet = prophet_modeler.prepare_prophet_data(combined_store_series)

                final_store_prophet = Prophet(
                    yearly_seasonality=True,
                    weekly_seasonality=True,
                    seasonality_mode='multiplicative',
                    changepoint_prior_scale=0.05,
                    holidays=holidays_df
                )
                final_store_prophet.fit(combined_store_prophet)
                store_prophet_models[store] = final_store_prophet
        else:
            print(f"Insufficient data for store {store}")

    with open('models/prophet_store_models.pkl', 'wb') as f:
        pickle.dump(store_prophet_models, f)
    mlflow.log_artifact('models/prophet_store_models.pkl')

    print("\n3. Training Prophet for Top 5 Departments with Validation")
    top_depts = train_split.groupby('Dept')['Weekly_Sales'].sum().nlargest(5).index
    dept_prophet_models = {}
    dept_prophet_metrics = {}
    for dept in top_depts:
        print(f"\nTraining Prophet for Department {dept}")
        dept_train_data = train_split[train_split['Dept'] == dept]
        dept_val_data = val_split[val_split['Dept'] == dept]
        if len(dept_train_data) > 30 and len(dept_val_data) > 8:
            dept_train_series = dept_train_data.groupby('Date')['Weekly_Sales'].sum().sort_index()
            dept_val_series = dept_val_data.groupby('Date')['Weekly_Sales'].sum().sort_index()
            dept_train_prophet = prophet_modeler.prepare_prophet_data(dept_train_series)
            dept_val_prophet = prophet_modeler.prepare_prophet_data(dept_val_series)
            dept_prophet_model = Prophet(
                yearly_seasonality=True,
                weekly_seasonality=True,
                seasonality_mode='multiplicative',
                changepoint_prior_scale=0.08,
                holidays=holidays_df
            )

            dept_prophet_model.add_seasonality(
                name='monthly',
                period=30.5,
                fourier_order=5
            )
            fitted_dept_prophet, dept_prophet_forecast, dept_prophet_metrics_dict = evaluate_prophet_model(
                dept_prophet_model, dept_train_prophet, dept_val_prophet, f"Prophet_Dept_{dept}"
            )
            if fitted_dept_prophet:
                dept_prophet_metrics[dept] = dept_prophet_metrics_dict
                for metric_name, metric_value in dept_prophet_metrics_dict.items():
                    mlflow.log_metric(f"dept_{dept}_{metric_name.lower()}", metric_value)
                combined_dept_data = pd.concat([
                    train_split[train_split['Dept'] == dept],
                    val_split[val_split['Dept'] == dept]
                ])
                combined_dept_series = combined_dept_data.groupby('Date')['Weekly_Sales'].sum().sort_index()
                combined_dept_prophet = prophet_modeler.prepare_prophet_data(combined_dept_series)
                final_dept_prophet = Prophet(
                    yearly_seasonality=True,
                    weekly_seasonality=True,
                    seasonality_mode='multiplicative',
                    changepoint_prior_scale=0.08,
                    holidays=holidays_df
                )
                final_dept_prophet.add_seasonality(
                    name='monthly',
                    period=30.5,
                    fourier_order=5
                )
                final_dept_prophet.fit(combined_dept_prophet)

                dept_prophet_models[dept] = final_dept_prophet
        else:
            print(f"Insufficient data for department {dept}")

    with open('models/prophet_dept_models.pkl', 'wb') as f:
        pickle.dump(dept_prophet_models, f)
    mlflow.log_artifact('models/prophet_dept_models.pkl')
    print("\nProphet Training with Validation Complete!")


1. Training Prophet on Total Sales with Validation


INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpkk52wudm/o_l3pjp7.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpkk52wudm/uhtdjmxd.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=85166', 'data', 'file=/tmp/tmpkk52wudm/o_l3pjp7.json', 'init=/tmp/tmpkk52wudm/uhtdjmxd.json', 'output', 'file=/tmp/tmpkk52wudm/prophet_modelsirdsqh3/prophet_model-20250706143252.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
14:32:52 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
14:32:52 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing



Prophet_Total Validation Results:
MAE: 8836645.4156
MSE: 80400615228632.0312
RMSE: 8966639.0152
MAPE: 19.0179


INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpkk52wudm/ctj8kyrt.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpkk52wudm/do18qv1i.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=40797', 'data', 'file=/tmp/tmpkk52wudm/ctj8kyrt.json', 'init=/tmp/tmpkk52wudm/do18qv1i.json', 'output', 'file=/tmp/tmpkk52wudm/prophet_modelvd95t5l3/prophet_model-20250706143303.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
14:33:03 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
14:33:03 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpkk


2. Training Prophet for Top 5 Stores with Validation

Training Prophet for Store 20

Prophet_Store_20 Validation Results:
MAE: 523886.2238
MSE: 289011944298.6796
RMSE: 537598.3113
MAPE: 25.5158


INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpkk52wudm/_7eyohn5.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpkk52wudm/37mrst7a.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=72917', 'data', 'file=/tmp/tmpkk52wudm/_7eyohn5.json', 'init=/tmp/tmpkk52wudm/37mrst7a.json', 'output', 'file=/tmp/tmpkk52wudm/prophet_modeljv8e3ruh/prophet_model-20250706143317.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
14:33:17 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
14:33:17 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpkk


Training Prophet for Store 14

Prophet_Store_14 Validation Results:
MAE: 518260.6145
MSE: 292737015930.1584
RMSE: 541051.7683
MAPE: 30.8025


INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpkk52wudm/3rx2_si0.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpkk52wudm/do_g1l10.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=60870', 'data', 'file=/tmp/tmpkk52wudm/3rx2_si0.json', 'init=/tmp/tmpkk52wudm/do_g1l10.json', 'output', 'file=/tmp/tmpkk52wudm/prophet_model7w4tjhwz/prophet_model-20250706143329.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
14:33:29 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
14:33:29 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpkk


Training Prophet for Store 4

Prophet_Store_4 Validation Results:
MAE: 629151.0090
MSE: 400877392888.6628
RMSE: 633148.7921
MAPE: 29.3484


INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpkk52wudm/twb4052c.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpkk52wudm/gkxbtr2f.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=13068', 'data', 'file=/tmp/tmpkk52wudm/twb4052c.json', 'init=/tmp/tmpkk52wudm/gkxbtr2f.json', 'output', 'file=/tmp/tmpkk52wudm/prophet_model7e635_nm/prophet_model-20250706143341.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
14:33:41 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
14:33:41 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpkk


Training Prophet for Store 13

Prophet_Store_13 Validation Results:
MAE: 133691.3664
MSE: 22674445770.2598
RMSE: 150580.3632
MAPE: 6.6194


INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpkk52wudm/gzfcbwg0.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpkk52wudm/bfx6zra3.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=181', 'data', 'file=/tmp/tmpkk52wudm/gzfcbwg0.json', 'init=/tmp/tmpkk52wudm/bfx6zra3.json', 'output', 'file=/tmp/tmpkk52wudm/prophet_model74f55v7_/prophet_model-20250706143353.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
14:33:53 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
14:33:53 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpkk52


Training Prophet for Store 2

Prophet_Store_2 Validation Results:
MAE: 163319.0283
MSE: 32038653580.7787
RMSE: 178993.4456
MAPE: 8.7393


INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpkk52wudm/i2i3h9zb.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpkk52wudm/7b_u15ja.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=42536', 'data', 'file=/tmp/tmpkk52wudm/i2i3h9zb.json', 'init=/tmp/tmpkk52wudm/7b_u15ja.json', 'output', 'file=/tmp/tmpkk52wudm/prophet_modelli5bpb6k/prophet_model-20250706143405.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
14:34:05 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
14:34:05 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpkk


3. Training Prophet for Top 5 Departments with Validation

Training Prophet for Department 92

Prophet_Dept_92 Validation Results:
MAE: 543675.4835
MSE: 350852733344.5341
RMSE: 592328.2311
MAPE: 15.8966


INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpkk52wudm/4g1vflnn.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpkk52wudm/ftputn4a.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=22175', 'data', 'file=/tmp/tmpkk52wudm/4g1vflnn.json', 'init=/tmp/tmpkk52wudm/ftputn4a.json', 'output', 'file=/tmp/tmpkk52wudm/prophet_model8jygspgx/prophet_model-20250706143418.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
14:34:18 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
14:34:18 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpkk


Training Prophet for Department 95

Prophet_Dept_95 Validation Results:
MAE: 194943.7587
MSE: 47197827627.4221
RMSE: 217250.6102
MAPE: 5.9838


INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpkk52wudm/gto615zf.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpkk52wudm/z3d6ugw6.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=15555', 'data', 'file=/tmp/tmpkk52wudm/gto615zf.json', 'init=/tmp/tmpkk52wudm/z3d6ugw6.json', 'output', 'file=/tmp/tmpkk52wudm/prophet_model3sqbs0q6/prophet_model-20250706143430.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
14:34:30 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
14:34:30 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpkk


Training Prophet for Department 38

Prophet_Dept_38 Validation Results:
MAE: 385564.8236
MSE: 167186639463.9722
RMSE: 408884.6285
MAPE: 14.2605


INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpkk52wudm/l3tx2yay.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpkk52wudm/rq2t7hi3.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=33024', 'data', 'file=/tmp/tmpkk52wudm/l3tx2yay.json', 'init=/tmp/tmpkk52wudm/rq2t7hi3.json', 'output', 'file=/tmp/tmpkk52wudm/prophet_modelmn4wgivt/prophet_model-20250706143442.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
14:34:42 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
14:34:42 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpkk


Training Prophet for Department 72

Prophet_Dept_72 Validation Results:
MAE: 2542729.6824
MSE: 6512668625658.4238
RMSE: 2551993.0693
MAPE: 147.1561


INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpkk52wudm/q1ci460c.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpkk52wudm/xwo3jn2_.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=17781', 'data', 'file=/tmp/tmpkk52wudm/q1ci460c.json', 'init=/tmp/tmpkk52wudm/xwo3jn2_.json', 'output', 'file=/tmp/tmpkk52wudm/prophet_modeli0zelo7j/prophet_model-20250706143454.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
14:34:54 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
14:34:54 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpkk


Training Prophet for Department 90

Prophet_Dept_90 Validation Results:
MAE: 216675.5123
MSE: 54989844702.9891
RMSE: 234499.1358
MAPE: 10.6895


INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpkk52wudm/5eb7vzdy.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpkk52wudm/0nu0e091.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=48414', 'data', 'file=/tmp/tmpkk52wudm/5eb7vzdy.json', 'init=/tmp/tmpkk52wudm/0nu0e091.json', 'output', 'file=/tmp/tmpkk52wudm/prophet_model3m1fneut/prophet_model-20250706143506.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
14:35:06 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
14:35:06 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing



Prophet Training with Validation Complete!
🏃 View run Prophet_Training at: https://dagshub.com/egval20/ML-Final.mlflow/#/experiments/6/runs/b22108d45a494593ad000803c3dcd0e3
🧪 View experiment at: https://dagshub.com/egval20/ML-Final.mlflow/#/experiments/6
