<a href="https://colab.research.google.com/github/ekvirika/WalmartRecruiting/blob/main/notebooks/model_experiment_sarima.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# Install required packages
!pip install wandb torch torchvision pandas numpy matplotlib seaborn scikit-learn mlflow

# Set up Kaggle API
!pip install kaggle



In [3]:
# Upload your kaggle.json to Colab and run:
!mkdir -p ~/.kaggle
!cp /content/drive/MyDrive/ColabNotebooks/kaggle_API_credentials/kaggle.json ~/.kaggle/kaggle.json
! chmod 600 ~/.kaggle/kaggle.json

In [4]:
# Download the dataset
!kaggle competitions download -c walmart-recruiting-store-sales-forecasting
!unzip -q walmart-recruiting-store-sales-forecasting.zip

walmart-recruiting-store-sales-forecasting.zip: Skipping, found more recently modified local copy (use --force to force download)
replace features.csv.zip? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace sampleSubmission.csv.zip? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace stores.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace test.csv.zip? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace train.csv.zip? [y]es, [n]o, [A]ll, [N]one, [r]ename: n


In [5]:
!unzip -q train.csv.zip
!unzip -q stores.csv.zip
!unzip -q test.csv.zip
!unzip -q features.csv.zip

replace train.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
unzip:  cannot find or open stores.csv.zip, stores.csv.zip.zip or stores.csv.zip.ZIP.
replace test.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace features.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: n


In [None]:
# SARIMA Pipeline for Walmart Sales Forecasting
# =====================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Time series analysis libraries
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.holtwinters import ExponentialSmoothing

# ML libraries
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import TimeSeriesSplit

# MLflow for experiment tracking
import mlflow
import mlflow.sklearn
from mlflow.models.signature import infer_signature

# Other utilities
import joblib
from typing import Dict, List, Tuple, Optional
import itertools
from tqdm import tqdm

class SARIMAWalmartPipeline:
    """
    Complete SARIMA pipeline for Walmart sales forecasting

    This pipeline handles:
    1. Data loading and preprocessing
    2. Time series stationarity analysis
    3. Feature engineering for time series
    4. SARIMA model selection and training
    5. Model evaluation and validation
    6. Forecasting and submission generation
    """

    def __init__(self, experiment_name: str = "SARIMA_Training"):
        """
        Initialize the SARIMA pipeline

        Args:
            experiment_name: Name for MLflow experiment
        """
        self.experiment_name = experiment_name
        self.models = {}
        self.best_model = None
        self.best_params = None
        self.best_score = np.inf
        self.scalers = {}

        # Set up MLflow experiment
        mlflow.set_experiment(experiment_name)

    def load_data(self, train_path: str, test_path: str, features_path: str,
                  stores_path: str) -> None:
        """
        Load and merge all datasets

        Args:
            train_path: Path to training data
            test_path: Path to test data
            features_path: Path to features data
            stores_path: Path to stores data
        """
        print("Loading datasets...")

        # Load main datasets
        self.train_df = pd.read_csv(train_path)
        self.test_df = pd.read_csv(test_path)
        self.features_df = pd.read_csv(features_path)
        self.stores_df = pd.read_csv(stores_path)

        print(f"Train data shape: {self.train_df.shape}")
        print(f"Test data shape: {self.test_df.shape}")
        print(f"Features data shape: {self.features_df.shape}")
        print(f"Stores data shape: {self.stores_df.shape}")

    def preprocess_data(self) -> None:
        """
        Preprocess the data for time series analysis

        This includes:
        - Converting date columns to datetime
        - Merging datasets
        - Handling missing values
        - Creating time-based features
        """
        with mlflow.start_run(run_name="SARIMA_Preprocessing", nested=True):
            print("Preprocessing data...")

            # Convert date columns
            self.train_df['Date'] = pd.to_datetime(self.train_df['Date'])
            self.test_df['Date'] = pd.to_datetime(self.test_df['Date'])
            self.features_df['Date'] = pd.to_datetime(self.features_df['Date'])

            # Merge training data with features and stores
            self.train_merged = self.train_df.merge(self.features_df, on=['Store', 'Date'], how='left')
            self.train_merged = self.train_merged.merge(self.stores_df, on='Store', how='left')

            # 🔧 FIX duplicated IsHoliday in train
            if 'IsHoliday_x' in self.train_merged.columns and 'IsHoliday_y' in self.train_merged.columns:
                self.train_merged['IsHoliday'] = self.train_merged['IsHoliday_x'] | self.train_merged['IsHoliday_y']
                self.train_merged.drop(['IsHoliday_x', 'IsHoliday_y'], axis=1, inplace=True)

            # Merge test data with features and stores
            self.test_merged = self.test_df.merge(self.features_df, on=['Store', 'Date'], how='left')
            self.test_merged = self.test_merged.merge(self.stores_df, on='Store', how='left')

            # 🔧 FIX duplicated IsHoliday in test
            if 'IsHoliday_x' in self.test_merged.columns and 'IsHoliday_y' in self.test_merged.columns:
                self.test_merged['IsHoliday'] = self.test_merged['IsHoliday_x'] | self.test_merged['IsHoliday_y']
                self.test_merged.drop(['IsHoliday_x', 'IsHoliday_y'], axis=1, inplace=True)

            # Handle missing values in train
            train_numeric_cols = self.train_merged.select_dtypes(include=[np.number]).columns
            self.train_merged[train_numeric_cols] = self.train_merged[train_numeric_cols].fillna(method='ffill')

            # Handle missing values in test (exclude 'Weekly_Sales' since it's not in test)
            test_numeric_cols = self.test_merged.select_dtypes(include=[np.number]).columns
            test_numeric_cols = [col for col in test_numeric_cols if col != 'Weekly_Sales']
            self.test_merged[test_numeric_cols] = self.test_merged[test_numeric_cols].fillna(method='ffill')



            # Create time-based features
            self._create_time_features()

            # Log preprocessing metrics
            mlflow.log_metric("train_samples", len(self.train_merged))
            mlflow.log_metric("test_samples", len(self.test_merged))
            mlflow.log_metric("missing_values_train", self.train_merged.isnull().sum().sum())

            print("Data preprocessing completed!")

    def _create_time_features(self) -> None:
        """
        Create time-based features for both train and test datasets
        """
        for df in [self.train_merged, self.test_merged]:
            df['Year'] = df['Date'].dt.year
            df['Month'] = df['Date'].dt.month
            df['Week'] = df['Date'].dt.isocalendar().week
            df['DayOfYear'] = df['Date'].dt.dayofyear
            df['Quarter'] = df['Date'].dt.quarter
            df['IsHoliday'] = df['IsHoliday'].astype(int)

            # Cyclical encoding for seasonal features
            df['Month_sin'] = np.sin(2 * np.pi * df['Month'] / 12)
            df['Month_cos'] = np.cos(2 * np.pi * df['Month'] / 12)
            df['Week_sin'] = np.sin(2 * np.pi * df['Week'] / 52)
            df['Week_cos'] = np.cos(2 * np.pi * df['Week'] / 52)

    def analyze_stationarity(self, store_id: int, dept_id: int) -> Dict:
        """
        Analyze stationarity of time series using ADF and KPSS tests

        Args:
            store_id: Store identifier
            dept_id: Department identifier

        Returns:
            Dictionary with stationarity test results
        """
        # Get time series for specific store-department combination
        ts_data = self.train_merged[
            (self.train_merged['Store'] == store_id) &
            (self.train_merged['Dept'] == dept_id)
        ].set_index('Date')['Weekly_Sales'].sort_index()

        # ADF Test (Null hypothesis: series has unit root - non-stationary)
        adf_result = adfuller(ts_data.dropna())

        # KPSS Test (Null hypothesis: series is stationary)
        kpss_result = kpss(ts_data.dropna())

        results = {
            'adf_statistic': adf_result[0],
            'adf_pvalue': adf_result[1],
            'adf_is_stationary': adf_result[1] < 0.05,
            'kpss_statistic': kpss_result[0],
            'kpss_pvalue': kpss_result[1],
            'kpss_is_stationary': kpss_result[1] > 0.05,
            'series_length': len(ts_data)
        }

        return results

    def seasonal_decomposition(self, store_id: int, dept_id: int,
                             period: int = 52) -> None:
        """
        Perform seasonal decomposition of time series

        Args:
            store_id: Store identifier
            dept_id: Department identifier
            period: Seasonal period (52 for weekly data)
        """
        ts_data = self.train_merged[
            (self.train_merged['Store'] == store_id) &
            (self.train_merged['Dept'] == dept_id)
        ].set_index('Date')['Weekly_Sales'].sort_index()

        if len(ts_data) < 2 * period:
            print(f"Not enough data for decomposition. Need at least {2*period} points, got {len(ts_data)}")
            return

        decomposition = seasonal_decompose(ts_data, model='additive', period=period)

        fig, axes = plt.subplots(4, 1, figsize=(15, 12))
        decomposition.observed.plot(ax=axes[0], title='Original')
        decomposition.trend.plot(ax=axes[1], title='Trend')
        decomposition.seasonal.plot(ax=axes[2], title='Seasonal')
        decomposition.resid.plot(ax=axes[3], title='Residual')
        plt.tight_layout()
        plt.show()

        return decomposition

    def find_optimal_sarima_params(self, store_id: int, dept_id: int,
                                  max_p: int = 3, max_d: int = 2, max_q: int = 3,
                                  max_P: int = 2, max_D: int = 1, max_Q: int = 2,
                                  seasonal_period: int = 52) -> Dict:
        """
        Find optimal SARIMA parameters using grid search with AIC criterion

        Args:
            store_id: Store identifier
            dept_id: Department identifier
            max_p, max_d, max_q: Maximum values for non-seasonal parameters
            max_P, max_D, max_Q: Maximum values for seasonal parameters
            seasonal_period: Seasonal period

        Returns:
            Dictionary with best parameters and model performance
        """
        with mlflow.start_run(run_name=f"SARIMA_GridSearch_Store{store_id}_Dept{dept_id}", nested=True):

            # Get time series data
            ts_data = self.train_merged[
                (self.train_merged['Store'] == store_id) &
                (self.train_merged['Dept'] == dept_id)
            ].set_index('Date')['Weekly_Sales'].sort_index()

            if len(ts_data) < 100:  # Need sufficient data for SARIMA
                print(f"Insufficient data for Store {store_id}, Dept {dept_id}")
                return None

            # Generate parameter combinations
            p_values = range(0, max_p + 1)
            d_values = range(0, max_d + 1)
            q_values = range(0, max_q + 1)
            P_values = range(0, max_P + 1)
            D_values = range(0, max_D + 1)
            Q_values = range(0, max_Q + 1)

            param_combinations = list(itertools.product(
                p_values, d_values, q_values, P_values, D_values, Q_values
            ))

            best_aic = np.inf
            best_params = None
            results = []

            print(f"Testing {len(param_combinations)} parameter combinations...")

            for params in tqdm(param_combinations):
                try:
                    p, d, q, P, D, Q = params

                    # Fit SARIMA model
                    model = SARIMAX(ts_data,
                                   order=(p, d, q),
                                   seasonal_order=(P, D, Q, seasonal_period),
                                   enforce_stationarity=False,
                                   enforce_invertibility=False)

                    fitted_model = model.fit(disp=False)

                    aic = fitted_model.aic
                    bic = fitted_model.bic

                    results.append({
                        'params': params,
                        'aic': aic,
                        'bic': bic,
                        'converged': fitted_model.mle_retvals['converged']
                    })

                    if aic < best_aic and fitted_model.mle_retvals['converged']:
                        best_aic = aic
                        best_params = params

                except Exception as e:
                    continue

            # Log best parameters
            if best_params:
                p, d, q, P, D, Q = best_params
                mlflow.log_params({
                    'best_p': p, 'best_d': d, 'best_q': q,
                    'best_P': P, 'best_D': D, 'best_Q': Q,
                    'seasonal_period': seasonal_period
                })
                mlflow.log_metric('best_aic', best_aic)
                mlflow.log_metric('store_id', store_id)
                mlflow.log_metric('dept_id', dept_id)

            return {
                'best_params': best_params,
                'best_aic': best_aic,
                'all_results': results,
                'store_id': store_id,
                'dept_id': dept_id
            }

    def train_sarima_model(self, store_id: int, dept_id: int,
                          params: Optional[Tuple] = None) -> None:
        """
        Train SARIMA model for specific store-department combination

        Args:
            store_id: Store identifier
            dept_id: Department identifier
            params: SARIMA parameters (p,d,q,P,D,Q). If None, will use grid search
        """
        with mlflow.start_run(run_name=f"SARIMA_Training_Store{store_id}_Dept{dept_id}"):

            # Get time series data
            ts_data = self.train_merged[
                (self.train_merged['Store'] == store_id) &
                (self.train_merged['Dept'] == dept_id)
            ].set_index('Date')['Weekly_Sales'].sort_index()

            if len(ts_data) < 100:
                print(f"Insufficient data for Store {store_id}, Dept {dept_id}")
                return

            # Find optimal parameters if not provided
            if params is None:
                param_search = self.find_optimal_sarima_params(store_id, dept_id)
                if param_search and param_search['best_params']:
                    params = param_search['best_params']
                else:
                    print(f"Could not find optimal parameters for Store {store_id}, Dept {dept_id}")
                    return

            p, d, q, P, D, Q = params

            # Split data for validation
            train_size = int(len(ts_data) * 0.8)
            train_data = ts_data[:train_size]
            val_data = ts_data[train_size:]

            # Fit SARIMA model
            model = SARIMAX(train_data,
                           order=(p, d, q),
                           seasonal_order=(P, D, Q, 52),
                           enforce_stationarity=False,
                           enforce_invertibility=False)

            fitted_model = model.fit(disp=False)

            # Make predictions on validation set
            val_predictions = fitted_model.forecast(steps=len(val_data))

            # Calculate metrics
            mae = mean_absolute_error(val_data, val_predictions)
            mse = mean_squared_error(val_data, val_predictions)
            rmse = np.sqrt(mse)

            # Log metrics
            mlflow.log_params({
                'p': p, 'd': d, 'q': q, 'P': P, 'D': D, 'Q': Q,
                'seasonal_period': 52, 'store_id': store_id, 'dept_id': dept_id
            })
            mlflow.log_metrics({
                'mae': mae, 'mse': mse, 'rmse': rmse,
                'aic': fitted_model.aic, 'bic': fitted_model.bic
            })

            # Store model
            model_key = f"store_{store_id}_dept_{dept_id}"
            self.models[model_key] = {
                'model': fitted_model,
                'params': params,
                'metrics': {'mae': mae, 'mse': mse, 'rmse': rmse},
                'store_id': store_id,
                'dept_id': dept_id
            }

            # Save model artifact
            model_path = f"sarima_model_store_{store_id}_dept_{dept_id}"
            mlflow.statsmodels.log_model(fitted_model, model_path)

            print(f"Model trained for Store {store_id}, Dept {dept_id}")
            print(f"Parameters: {params}")
            print(f"Validation RMSE: {rmse:.2f}")

    def train_all_models(self, sample_stores: Optional[List[int]] = None) -> None:
        """
        Train SARIMA models for all store-department combinations

        Args:
            sample_stores: List of store IDs to train on. If None, trains on all stores
        """
        print("Training SARIMA models for all store-department combinations...")

        # Get unique store-department combinations
        combinations = self.train_merged[['Store', 'Dept']].drop_duplicates()

        if sample_stores:
            combinations = combinations[combinations['Store'].isin(sample_stores)]

        print(f"Training models for {len(combinations)} combinations...")

        for _, row in tqdm(combinations.iterrows(), total=len(combinations)):
            store_id, dept_id = row['Store'], row['Dept']

            # Check if we have enough data
            store_dept_data = self.train_merged[
                (self.train_merged['Store'] == store_id) &
                (self.train_merged['Dept'] == dept_id)
            ]

            if len(store_dept_data) >= 100:  # Minimum data requirement
                self.train_sarima_model(store_id, dept_id)
            else:
                print(f"Skipping Store {store_id}, Dept {dept_id} - insufficient data")

    def generate_forecasts(self, forecast_horizon: int = 39) -> pd.DataFrame:
        """
        Generate forecasts for test period using trained models

        Args:
            forecast_horizon: Number of periods to forecast

        Returns:
            DataFrame with forecasts
        """
        print("Generating forecasts...")

        forecasts = []

        for model_key, model_info in self.models.items():
            store_id = model_info['store_id']
            dept_id = model_info['dept_id']
            fitted_model = model_info['model']

            # Get test data for this store-department combination
            test_data = self.test_merged[
                (self.test_merged['Store'] == store_id) &
                (self.test_merged['Dept'] == dept_id)
            ].copy()

            if len(test_data) > 0:
                # Generate forecasts
                forecast = fitted_model.forecast(steps=len(test_data))

                # Create forecast dataframe
                forecast_df = test_data[['Store', 'Dept', 'Date']].copy()
                forecast_df['Weekly_Sales'] = forecast.values if hasattr(forecast, 'values') else forecast

                forecasts.append(forecast_df)

        # Combine all forecasts
        if forecasts:
            final_forecasts = pd.concat(forecasts, ignore_index=True)
            return final_forecasts
        else:
            return pd.DataFrame()


    def create_submission(self, forecasts_df: pd.DataFrame) -> pd.DataFrame:
        """
        Create submission file for Kaggle

        Args:
            forecasts_df: DataFrame with forecasts

        Returns:
            Submission DataFrame
        """
        # Merge with test data to get the Id column
        submission = self.test_merged.merge(
            forecasts_df,
            on=['Store', 'Dept', 'Date'],
            how='left'
        )[['Id', 'Weekly_Sales']]

        # Handle missing predictions with median
        submission['Weekly_Sales'] = submission['Weekly_Sales'].fillna(
            submission['Weekly_Sales'].median()
        )

        return submission

    def evaluate_model_performance(self) -> Dict:
        """
        Evaluate overall model performance using cross-validation

        Returns:
            Dictionary with performance metrics
        """
        print("Evaluating model performance...")

        all_metrics = []

        for model_key, model_info in self.models.items():
            store_id = model_info['store_id']
            dept_id = model_info['dept_id']

            # Get time series data
            ts_data = self.train_merged[
                (self.train_merged['Store'] == store_id) &
                (self.train_merged['Dept'] == dept_id)
            ].set_index('Date')['Weekly_Sales'].sort_index()

            if len(ts_data) >= 120:  # Need enough data for cross-validation
                # Time series cross-validation
                tscv = TimeSeriesSplit(n_splits=3)
                fold_metrics = []

                for train_idx, val_idx in tscv.split(ts_data):
                    train_fold = ts_data.iloc[train_idx]
                    val_fold = ts_data.iloc[val_idx]

                    try:
                        # Fit model on fold
                        params = model_info['params']
                        p, d, q, P, D, Q = params

                        model = SARIMAX(train_fold,
                                       order=(p, d, q),
                                       seasonal_order=(P, D, Q, 52),
                                       enforce_stationarity=False,
                                       enforce_invertibility=False)

                        fitted_model = model.fit(disp=False)

                        # Forecast
                        forecast = fitted_model.forecast(steps=len(val_fold))

                        # Calculate metrics
                        mae = mean_absolute_error(val_fold, forecast)
                        mse = mean_squared_error(val_fold, forecast)
                        rmse = np.sqrt(mse)

                        fold_metrics.append({'mae': mae, 'mse': mse, 'rmse': rmse})

                    except Exception as e:
                        continue

                if fold_metrics:
                    # Average metrics across folds
                    avg_metrics = {
                        'mae': np.mean([m['mae'] for m in fold_metrics]),
                        'mse': np.mean([m['mse'] for m in fold_metrics]),
                        'rmse': np.mean([m['rmse'] for m in fold_metrics]),
                        'store_id': store_id,
                        'dept_id': dept_id
                    }
                    all_metrics.append(avg_metrics)

        if all_metrics:
            # Calculate overall performance
            overall_performance = {
                'mean_mae': np.mean([m['mae'] for m in all_metrics]),
                'mean_mse': np.mean([m['mse'] for m in all_metrics]),
                'mean_rmse': np.mean([m['rmse'] for m in all_metrics]),
                'median_mae': np.median([m['mae'] for m in all_metrics]),
                'median_mse': np.median([m['mse'] for m in all_metrics]),
                'median_rmse': np.median([m['rmse'] for m in all_metrics]),
                'num_models': len(all_metrics)
            }

            return overall_performance
        else:
            return {}

    def save_pipeline(self, filepath: str) -> None:
        """
        Save the complete pipeline

        Args:
            filepath: Path to save the pipeline
        """
        pipeline_data = {
            'models': self.models,
            'best_model': self.best_model,
            'best_params': self.best_params,
            'best_score': self.best_score,
            'experiment_name': self.experiment_name
        }

        joblib.dump(pipeline_data, filepath)
        print(f"Pipeline saved to {filepath}")

    def load_pipeline(self, filepath: str) -> None:
        """
        Load a saved pipeline

        Args:
            filepath: Path to load the pipeline from
        """
        pipeline_data = joblib.load(filepath)

        self.models = pipeline_data['models']
        self.best_model = pipeline_data['best_model']
        self.best_params = pipeline_data['best_params']
        self.best_score = pipeline_data['best_score']
        self.experiment_name = pipeline_data['experiment_name']

        print(f"Pipeline loaded from {filepath}")

# Example usage and demonstration
if __name__ == "__main__":
    # Initialize pipeline
    pipeline = SARIMAWalmartPipeline("SARIMA_Walmart_Experiment")

    # Load data (you need to provide the actual file paths)
    pipeline.load_data('train.csv', 'test.csv', 'features.csv', 'stores.csv')

    # Preprocess data
    pipeline.preprocess_data()

    # Train models (start with a sample of stores for testing)
    pipeline.train_all_models(sample_stores=[1, 2, 3, 4, 5])

    # Generate forecasts
    forecasts = pipeline.generate_forecasts()

    # Create submission
    submission = pipeline.create_submission(forecasts)
    submission.to_csv('sarima_submission.csv', index=False)

    # Evaluate performance
    performance = pipeline.evaluate_model_performance()
    print("Overall Performance:", performance)

    # Save pipeline
    pipeline.save_pipeline('sarima_pipeline.pkl')

    print("SARIMA pipeline ready for use!")

Loading datasets...
Train data shape: (421570, 5)
Test data shape: (115064, 4)
Features data shape: (8190, 12)
Stores data shape: (45, 3)
Preprocessing data...
Data preprocessing completed!
Training SARIMA models for all store-department combinations...
Training models for 377 combinations...


  0%|          | 0/377 [00:00<?, ?it/s]

Testing 864 parameter combinations...



  0%|          | 0/864 [00:00<?, ?it/s][A
  0%|          | 1/864 [00:00<01:52,  7.68it/s][A
  0%|          | 2/864 [00:00<03:43,  3.86it/s][A
  0%|          | 3/864 [00:03<21:36,  1.51s/it][A
  0%|          | 4/864 [00:03<14:20,  1.00s/it][A
  1%|          | 5/864 [00:05<16:43,  1.17s/it][A
  1%|          | 6/864 [00:08<26:02,  1.82s/it][A
  1%|          | 7/864 [00:08<19:18,  1.35s/it][A
  1%|          | 8/864 [00:09<18:36,  1.30s/it][A
  1%|          | 9/864 [00:18<50:22,  3.54s/it][A
  1%|          | 10/864 [00:18<37:24,  2.63s/it][A
  1%|▏         | 11/864 [00:21<38:48,  2.73s/it][A
  1%|▏         | 12/864 [00:28<56:16,  3.96s/it][A
  2%|▏         | 13/864 [00:31<51:15,  3.61s/it][A
  2%|▏         | 14/864 [00:37<1:00:06,  4.24s/it][A
  2%|▏         | 15/864 [00:50<1:38:57,  6.99s/it][A
  2%|▏         | 16/864 [00:53<1:21:49,  5.79s/it][A
  2%|▏         | 17/864 [00:59<1:22:39,  5.86s/it][A
  2%|▏         | 18/864 [01:02<1:12:28,  5.14s/it][A
  2%|▏         | 20

Model trained for Store 1, Dept 1
Parameters: (0, 0, 0, 0, 1, 2)
Validation RMSE: 8030.70
Testing 864 parameter combinations...



  0%|          | 0/864 [00:00<?, ?it/s][A
  0%|          | 1/864 [00:00<02:57,  4.87it/s][A
  0%|          | 2/864 [00:03<24:51,  1.73s/it][A
  0%|          | 3/864 [00:06<38:38,  2.69s/it][A
  0%|          | 4/864 [00:07<24:45,  1.73s/it][A
  1%|          | 5/864 [00:11<39:56,  2.79s/it][A
  1%|          | 6/864 [00:15<43:21,  3.03s/it][A
  1%|          | 7/864 [00:15<30:47,  2.16s/it][A
  1%|          | 8/864 [00:16<23:57,  1.68s/it][A
  1%|          | 9/864 [00:37<1:49:55,  7.71s/it][A
  1%|          | 10/864 [00:39<1:25:44,  6.02s/it][A
  1%|▏         | 11/864 [00:51<1:52:44,  7.93s/it][A
  1%|▏         | 12/864 [00:58<1:48:14,  7.62s/it][A
  2%|▏         | 13/864 [01:01<1:27:51,  6.19s/it][A
  2%|▏         | 14/864 [01:09<1:34:54,  6.70s/it][A
  2%|▏         | 15/864 [01:28<2:28:59, 10.53s/it][A
  2%|▏         | 16/864 [01:31<1:54:12,  8.08s/it][A
  2%|▏         | 17/864 [01:34<1:32:12,  6.53s/it][A
  2%|▏         | 18/864 [01:44<1:48:35,  7.70s/it][A
  2%|▏   

Model trained for Store 1, Dept 2
Parameters: (0, 0, 0, 0, 1, 2)
Validation RMSE: 2034.70
Testing 864 parameter combinations...



  0%|          | 0/864 [00:00<?, ?it/s][A
  0%|          | 2/864 [00:00<02:23,  6.02it/s][A
  0%|          | 3/864 [00:12<1:14:43,  5.21s/it][A
  0%|          | 4/864 [00:12<48:00,  3.35s/it]  [A
  1%|          | 5/864 [00:16<51:20,  3.59s/it][A
  1%|          | 6/864 [00:19<48:58,  3.42s/it][A
  1%|          | 7/864 [00:20<34:45,  2.43s/it][A
  1%|          | 8/864 [00:20<26:45,  1.88s/it][A
  1%|          | 9/864 [00:32<1:08:43,  4.82s/it][A
  1%|          | 10/864 [00:33<50:31,  3.55s/it] [A
  1%|▏         | 11/864 [00:44<1:25:58,  6.05s/it][A
  1%|▏         | 12/864 [00:47<1:12:57,  5.14s/it][A
  2%|▏         | 13/864 [00:50<1:00:32,  4.27s/it][A
  2%|▏         | 14/864 [00:59<1:22:55,  5.85s/it][A
  2%|▏         | 15/864 [01:15<2:05:54,  8.90s/it][A
  2%|▏         | 16/864 [01:17<1:38:04,  6.94s/it][A
  2%|▏         | 17/864 [01:20<1:21:01,  5.74s/it][A
  2%|▏         | 18/864 [01:28<1:28:11,  6.25s/it][A
  2%|▏         | 19/864 [01:28<1:02:09,  4.41s/it][A
  2