<a href="https://colab.research.google.com/github/adadoun/inventoryPlanningRecommendation/blob/main/ArimaSalesPrediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Sales Data Analysis

This notebook contains as statistical model : ARIMA for sales prediction

In [None]:
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error, mean_squared_error
import warnings
import itertools
warnings.filterwarnings('ignore')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Load train/test datasets
train_data = pd.read_csv('drive/MyDrive/Collab_DATA/PolarData/train_data.csv')
test_data = pd.read_csv('drive/MyDrive/Collab_DATA/PolarData/test_data.csv')

## Utilities functions for ARIMA algorithm

In [None]:
def optimize_arima_params(train_data: pd.Series) -> tuple:
    """
    Find the optimal ARIMA parameters using AIC criterion.

    Args:
        train_data (pd.Series): Time series data for training.

    Returns:
        tuple: Best order (p, d, q) for ARIMA model.
    """
    p = range(0, 3)
    d = range(0, 2)
    q = range(0, 3)
    pdq_combinations = list(itertools.product(p, d, q))

    best_aic = float('inf')
    best_order = None

    for order in pdq_combinations:
        try:
            model = ARIMA(train_data, order=order)
            results = model.fit()
            if results.aic < best_aic:
                best_aic = results.aic
                best_order = order
        except:
            continue

    return best_order

def fit_arima(train_data: pd.Series) -> tuple:
    """
    Fit an ARIMA model with optimized parameters.

    Args:
        train_data (pd.Series): Time series data for training.

    Returns:
        tuple: (fitted_model, best_order) - The fitted ARIMA model and its optimal parameters.
    """
    best_order = optimize_arima_params(train_data)
    model = ARIMA(train_data, order=best_order)
    model_fit = model.fit()
    return model_fit, best_order

def make_predictions(model: ARIMA, steps: int) -> np.ndarray:
    """
    Make future predictions using the fitted ARIMA model.

    Args:
        model (ARIMA): Fitted ARIMA model.
        steps (int): Number of steps to forecast.

    Returns:
        np.ndarray: Array of predicted values.
    """
    return model.forecast(steps=steps)

def evaluate_model(actual: np.ndarray, predicted: np.ndarray) -> tuple:
    """
    Evaluate the model performance using various metrics.

    Args:
        actual (np.ndarray): Array of actual values.
        predicted (np.ndarray): Array of predicted values.

    Returns:
        tuple: (MAE, RMSE, MAPE) - Mean Absolute Error, Root Mean Square Error, and Mean Absolute Percentage Error.
    """
    mae = mean_absolute_error(actual, predicted)
    rmse = np.sqrt(mean_squared_error(actual, predicted))
    mape = np.mean(np.abs((actual - predicted) / actual)) * 100
    return mae, rmse, mape

## ARIMA Algorithm

In [None]:
# Dictionary to store results for each SKU
results = {}

for sku in train_data['SKU'].unique()[0:500]:

    # filter by sku
    sku_data = train_data[train_data['SKU'] == sku].set_index('DATE')['QUANTITY_SOLD']

    # Fit ARIMA model with optimized parameters
    model, best_order = fit_arima(sku_data)

    # Make predictions
    predictions = make_predictions(model, steps=len(test_data))

    # Evaluate model
    mae, rmse, mape = evaluate_model(test_data['QUANTITY_SOLD'].values, predictions)

    # Store results
    results[sku] = {
        'MAE': mae,
        'RMSE': rmse,
        'MAPE': mape,
        'Best ARIMA Order': best_order
    }

# Convert results to DataFrame
results_df = pd.DataFrame.from_dict(results, orient='index')

# Calculate overall average metrics
average_metrics = results_df[['MAE', 'RMSE', 'MAPE']].mean()

print("\nAverage Metrics Across Top SKUs:")
print(f"MAE: {average_metrics['MAE']:.2f}")
print(f"RMSE: {average_metrics['RMSE']:.2f}")
print(f"MAPE: {average_metrics['MAPE']:.2f}%")

print("\nTop 5 SKUs by lowest MAPE:")
print(results_df.sort_values('MAPE').head())

print("\nBottom 5 SKUs by highest MAPE:")
print(results_df.sort_values('MAPE', ascending=False).head())


Average Metrics Across Top SKUs:
MAE: 37081.16
RMSE: 42904.74
MAPE: 329089.48%

Top 5 SKUs by lowest MAPE:
                MAE        RMSE       MAPE Best ARIMA Order
71fa7a84  82.751365  276.346263  83.964496        (2, 1, 1)
546915c8  82.973207  276.424461  83.986085        (0, 1, 2)
5d8c9957  82.690663  276.323707  83.994170        (2, 1, 1)
6b438d83  82.658719  276.312157  84.010403        (0, 1, 1)
19f54204  82.612795  276.295519  84.033671        (0, 1, 1)

Bottom 5 SKUs by highest MAPE:
                   MAE          RMSE          MAPE Best ARIMA Order
55448f55  1.847226e+07  2.129370e+07  1.641439e+08        (2, 1, 2)
79c5cb71  4.911478e+03  4.918467e+03  4.380066e+04        (0, 1, 0)
0436983b  4.329653e+03  4.337350e+03  3.868800e+04        (1, 1, 0)
0c574743  4.165820e+03  4.173732e+03  3.724807e+04        (0, 1, 0)
57652c2a  3.204700e+03  3.213233e+03  2.878946e+04        (0, 1, 0)
