In [1]:
unique_products = ['Round steak, 1 kilogram',
 'Sirloin steak, 1 kilogram',
 'Prime rib roast, 1 kilogram',
 'Blade roast, 1 kilogram',
 'Stewing beef, 1 kilogram',
 'Ground beef, 1 kilogram',
 'Pork chops, 1 kilogram',
 'Butt roast, 1 kilogram',
 'Chicken, 1 kilogram',
 'Bacon, 500 grams',
 'Wieners, 450 grams',
 'Canned salmon, 213 grams',
 'Homogenized milk, 1 litre',
 'Partly skimmed milk, 1 litre',
 'Butter, 454 grams',
 'Processed cheese slices, 250 grams',
 'Evaporated milk, 385 millilitres',
 'Eggs, 1 dozen',
 'Bread, 675 grams',
 'Soda crackers, 450 grams',
 'Macaroni, 500 grams',
 'Flour, 2.5 kilograms',
 'Corn flakes, 675 grams',
 'Apples, 1 kilogram',
 'Bananas, 1 kilogram',
 'Grapefruits, 1 kilogram',
 'Oranges, 1 kilogram',
 'Apple juice, 1.36 litres',
 'Orange juice, 1 litre',
 'Cabbage, 1 kilogram',
 'Carrots, 1 kilogram',
 'Celery, 1 kilogram',
 'Mushrooms, 1 kilogram',
 'Onions, 1 kilogram',
 'Potatoes, 4.54 kilograms',
 'French fried potatoes, frozen, 1 kilogram',
 'Baked beans, canned, 398 millilitres',
 'Tomatoes, canned, 796 millilitres',
 'Tomato juice, 1.36 litres',
 'Ketchup, 1 litre',
 'Sugar, white, 2 kilograms',
 'Coffee, roasted, 300 grams',
 'Coffee, instant, 200 grams',
 'Tea (72 bags)',
 'Cooking or salad oil, 1 litre',
 'Soup, canned, 284 millilitres',
 'Baby food, 128 millilitres',
 'Peanut butter, 500 grams',
 'Fruit flavoured crystals, 2.25 litres',
 'Soft drinks, cola type, 2 litres',
 'Soft drinks, lemon-lime type, 2 litres',
 'Laundry detergent, 4 litres',
 'Paper towels (2 rolls)',
 'Facial tissue (200 tissues)',
 'Bathroom tissue (4 rolls)',
 'Shampoo, 300 millilitres',
 'Deodorant, 60 grams',
 'Toothpaste, 100 millilitres',
 'Cigarettes (200)',
 'Regular, unleaded gasoline at self-service stations, cents per litre',
 'Homogenized milk, 4 litres',
 'Partly skimmed milk, 4 litres']

In [2]:
def load_weights(unique_products, data_path):
    """
    Load pre-trained SARIMA model weights for each unique product from the specified directory.

    Parameters:
    - unique_products (list): List of unique product names.
    - data_path (str): Path to the directory containing the saved model weights.

    Returns:
    - dict: A dictionary containing product names and their corresponding SARIMA model weights.
    """
    # Initialize an empty dictionary to store model weights
    weights = {}

    # Iterate through unique product names
    for item in tqdm(unique_products, desc='Loading Weights'):
        # Load the SARIMA model weight for the current product
        weight = SARIMAXResults.load(f'{data_path}/{item}_model.pkl')

        # Store the model weight in the dictionary
        weights[item] = weight

    return weights

In [3]:
def save_forecast_results(model_results, steps=12, last_date='2022-02-28'):
    """
    Generate and save forecast results based on a time series model.

    Parameters:
    - model_results (object): The result object from a time series forecasting model.
    - steps (int, optional): Number of steps (future time points) to forecast. Default is 12.
    - last_date (str, optional): The last date in the historical data. Default is '2022-02-28'.

    Returns:
    - dict: A dictionary containing forecasted values for each future date.
    """
    # Generate forecast based on the provided number of steps
    forecast = model_results.get_forecast(steps=steps)

    # Convert forecasted values back to the original scale
    forecasted_values = np.exp(forecast.predicted_mean)

    # Generate future dates for the forecast
    future_dates = pd.date_range(start=last_date, periods=steps + 1, freq='M')[1:]

    # Create a dictionary mapping each future date to its corresponding forecasted value
    forecast_dict = {date.strftime("%b-%Y"): value for date, value in zip(future_dates, forecasted_values)}

    return forecast_dict

In [4]:
import pandas as pd

def get_last_date_data(data):
    """
    Extract and return the data for the last date in the 'REF_DATE' column.

    Parameters:
    - data (DataFrame): The input time series data.

    Returns:
    - DataFrame: The data for the last date in the 'REF_DATE' column.
    """
    # Filter data for the specific product and create a copy to avoid SettingWithCopyWarning
    product_data = data.copy()

    # Convert REF_DATE to datetime and coerce out-of-bounds dates to NaT
    product_data['REF_DATE'] = pd.to_datetime(product_data['REF_DATE'], errors='coerce')


    # Set REF_DATE as the index and specify the frequency
    product_data.set_index('REF_DATE', inplace=True)
    # product_data.index.freq = 'MS'  # Monthly Start frequency


    # Get the data for the last date
    last_date_data = product_data.last('1D')

    return last_date_data

In [5]:
def sales_predictions(predictions, current_value):
    """
    Determine the month with the predicted sales value closest to the current sales value.

    Parameters:
    - predictions (dict): A dictionary mapping months to their corresponding predicted sales values.
    - current_value (float): The current sales value for comparison.

    Returns:
    - str: The month with the predicted sales value closest to the current sales value.
    """
    # Extract predicted sales values from the dictionary
    prediction = list(predictions.values())

    # Convert the list to a NumPy array for numerical operations
    pred = np.array(prediction)

    # Calculate the absolute differences between predicted values and the current value
    diff = np.abs(pred - current_value)

    # Find the index of the minimum difference
    min_diff_index = np.argmin(diff)

    # Retrieve the original value associated with the minimum difference
    original_value = prediction[min_diff_index]

    # Find the month corresponding to the original value
    month = [k for k, v in predictions.items() if v == original_value][0]

    return month


In [12]:
import time
from datetime import date

def pipeline(data, product_name, path_to_models):
    """
    Execute a pipeline to predict sales and determine the best month to buy a specific product.

    Parameters:
    - data (DataFrame): The input time series data.
    - product_name (str): The name of the product for analysis.
    - path_to_models (str): Path to the directory containing saved SARIMA model weights.

    Returns:
    - None: The function prints the result.
    """
    # Step 1: Load SARIMA model weights for all unique products
    weights = load_weights(unique_products, path_to_models)

    # Create a progress bar with a total count of 4 steps
    with tqdm(total=4, desc="Predicting Sales") as pbar:
        # Step 2: Get the data for the last date
        last_date_data = get_last_date_data(data)
        pbar.update(1)  # Update progress bar
        time.sleep(1)
        # Step 3: Extract the last value of the specified product
        last_value_of_product = float(data.loc[data['Products'] == product_name, 'VALUE'][0])
        pbar.update(1)  # Update progress bar
        time.sleep(1)

        # Step 4: Generate sales forecast for the specified product
        sales_forecast = save_forecast_results(weights[product_name], steps=12, last_date=str(date.today()))
        pbar.update(1)  # Update progress bar
        time.sleep(1)

        # Step 5: Determine the best month to buy based on the sales forecast
        best_month_to_buy = sales_predictions(sales_forecast, last_value_of_product)
        pbar.update(1)  # Update progress bar
        time.sleep(1)

    # Print the result
    print(f'\nThe best month to buy {product_name} is {best_month_to_buy}')



In [14]:
import numpy as np
import warnings
from tqdm import tqdm
from joblib import Parallel, delayed

# Load your dataset
data = pd.read_csv('/content/drive/MyDrive/Intro to ML PR/Project/18100002-trimmed.csv')
path_to_models = '/content/drive/MyDrive/Intro to ML PR/Project/Weights'

pipeline(data, 'Round steak, 1 kilogram', path_to_models)

Loading Weights: 100%|██████████| 62/62 [00:03<00:00, 17.52it/s]
Predicting Sales: 100%|██████████| 4/4 [00:04<00:00,  1.01s/it]


The best month to buy Round steak, 1 kilogram is Jan-2024



