In [34]:
import pandas as pd
from prophet import Prophet
from sklearn.metrics import mean_absolute_error
from matplotlib import pyplot as plt

In [35]:
df = pd.read_csv("/home/zeal/Documents/6th sem/inv. mangmnt sy/chuimui/updated_data.csv")
df.head()

Unnamed: 0,Order_ID,Material_ID,Item_ID,Warehouse_x,Destination,Available_Time,Deadline,Danger_Type,Area,Weight,Warehouse_ID_x,warehouse_id,Warehouse_y,Distance(M),Warehouse_ID_y,Destination_id,Category,Quantity
0,A140109,B-6128,P01-79c46a02-e12f-41c4-9ec9-25e48597ebfe,Ecom Express - Rohtak,Blue Dart Hub - Siliguri,2022-04-05 23:59:59,2022-04-11 23:59:59,type_1,38880,30920000,WH_001,WH_043,Ecom Express - Rohtak,2444326,WH_001,0,AUTOMOTIVE,197
1,A140109,B-6128,P01-43f08b0f-87f8-4a3f-91b8-40ed1947bdaa,Ecom Express - Rohtak,Blue Dart Hub - Siliguri,2022-04-05 23:59:59,2022-04-11 23:59:59,type_1,38880,30920000,WH_001,WH_043,Ecom Express - Rohtak,2444326,WH_001,0,AUTOMOTIVE,49
2,A140109,B-6128,P01-899d7387-aab0-4443-b6ba-7520fb4ee981,Ecom Express - Rohtak,Blue Dart Hub - Siliguri,2022-04-05 23:59:59,2022-04-11 23:59:59,type_1,38880,30920000,WH_001,WH_043,Ecom Express - Rohtak,2444326,WH_001,0,AUTOMOTIVE,38
3,A140109,B-6128,P01-acc23cdf-7fe7-4388-b8ff-5704eed86ef5,Ecom Express - Rohtak,Blue Dart Hub - Siliguri,2022-04-05 23:59:59,2022-04-11 23:59:59,type_1,38880,30920000,WH_001,WH_043,Ecom Express - Rohtak,2444326,WH_001,0,AUTOMOTIVE,47
4,A140109,B-6128,P01-cd0377d4-770c-45c3-9bd8-a5b098246e7e,Ecom Express - Rohtak,Blue Dart Hub - Siliguri,2022-04-05 23:59:59,2022-04-11 23:59:59,type_1,38880,30920000,WH_001,WH_043,Ecom Express - Rohtak,2444326,WH_001,0,AUTOMOTIVE,191


In [36]:
df.columns

Index(['Order_ID', 'Material_ID', 'Item_ID', 'Warehouse_x', 'Destination',
       'Available_Time', 'Deadline', 'Danger_Type', 'Area', 'Weight',
       'Warehouse_ID_x', 'warehouse_id', 'Warehouse_y', 'Distance(M)',
       'Warehouse_ID_y', 'Destination_id', 'Category', 'Quantity'],
      dtype='object')

In [37]:
#  Preprocessing
# Rename 'Destination' to 'Warehouse'
df = df.rename(columns={'Destination': 'Warehouse'})

# Drop 'Warehouse_x' column
df = df.drop('Warehouse_x', axis=1)

# Combine Category and Material_ID for more granularity.  Remove either if needed
df['Product'] = df['Category'] + '_' + df['Material_ID']

# Aggregate data to daily level
df['Available_Time'] = pd.to_datetime(df['Available_Time'])
daily_df = df.groupby(['Available_Time', 'Warehouse', 'Product'])['Quantity'].sum().reset_index()
daily_df = daily_df.rename(columns={'Available_Time': 'ds', 'Quantity': 'y'}) # Prophet Requirements
print(daily_df.head())


                   ds                      Warehouse  \
0 2022-04-05 12:00:00            Amazon FC - Udaipur   
1 2022-04-05 16:00:00            Amazon FC - Udaipur   
2 2022-04-05 16:00:00  Delhivery Warehouse - Jodhpur   
3 2022-04-05 19:00:00           Amazon FC - Amritsar   
4 2022-04-05 19:00:00            Amazon FC - Dhanbad   

                     Product    y  
0  HOME AND KITCHEN I_C-0121  655  
1  HOME AND KITCHEN I_C-0121  730  
2              BEAUTY_E-0838  400  
3        PET SUPPLIES_E-1251  543  
4        BREAD/BAKERY_X-3286  146  


In [51]:
from joblib import Parallel, delayed
import os

# Function to train Prophet model and forecast (to be parallelized)
def forecast_product_warehouse(df, product, warehouse, periods=7):
    """
    Trains a Prophet model for a specific product and warehouse,
    and saves the forecast to a CSV file.
    """
    product_warehouse_df = df[(df['Product'] == product) & (df['Warehouse'] == warehouse)].copy()

    if product_warehouse_df.empty:
        print(f"No data for Product: {product}, Warehouse: {warehouse} - Empty DataFrame")
        return None

    # Remove NaN values:
    product_warehouse_df = product_warehouse_df.dropna()  # Drop rows with ANY NaN values
    #Alternative is to fillna, if it makes sense for your data
    #product_warehouse_df = product_warehouse_df.fillna(0)

    print(f"Forecasting Product: {product}, Warehouse: {warehouse}")  # Debugging
    print(f"DataFrame shape: {product_warehouse_df.shape}")  # Debugging

    if len(product_warehouse_df) < 2:
        print(f"Not enough data (less than 2 rows) for Product: {product}, Warehouse: {warehouse}")
        return None

    # Prophet model
    try:  # Added a try-except block
        model = Prophet()
        model.fit(product_warehouse_df)
    except Exception as e:
        print(f"Error fitting model for Product: {product}, Warehouse: {warehouse}: {e}")
        return None

    # Make future dataframe
    future = model.make_future_dataframe(periods=periods)
    forecast = model.predict(future)

    # Select only the forecast period
    forecast_7_days = forecast.tail(periods)[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]

    # Add product and warehouse info
    forecast_7_days['Product'] = product
    forecast_7_days['Warehouse'] = warehouse

    # Save the forecast to a CSV file
    filename = f"forecasts/forecast_{product.replace('_', '-')}_{warehouse.replace(' ', '-')}.csv" # Create more readable filenames
    forecast_7_days.to_csv(filename, index=False)
    print(f"Forecast saved to {filename}")

    return None # No need to return the forecast

# Create 'forecasts' directory if it doesn't exist
if not os.path.exists('forecasts'):
    os.makedirs('forecasts')

# Get unique combinations of Product and Warehouse
product_warehouse_combos = daily_df[['Product', 'Warehouse']].drop_duplicates()

# Prepare arguments for parallel processing
tasks = [
    (daily_df, row['Product'], row['Warehouse'])
    for index, row in product_warehouse_combos.iterrows()
]

# Run the forecasting in parallel
# Adjust n_jobs based on your CPU cores. -1 means use all available cores
num_cores = os.cpu_count()
print(f"Number of available cores: {num_cores}")
Parallel(n_jobs=num_cores)(delayed(forecast_product_warehouse)(*task) for task in tasks)

print("All forecasts complete.")


Number of available cores: 20
Forecasting Product: BREAD/BAKERY_X-3286, Warehouse: Amazon FC - Dhanbad
DataFrame shape: (1, 4)
Not enough data (less than 2 rows) for Product: BREAD/BAKERY_X-3286, Warehouse: Amazon FC - Dhanbad
Forecasting Product: AUTOMOTIVE_M-1129, Warehouse: Flipkart Hub - Bengaluru
DataFrame shape: (1, 4)
Not enough data (less than 2 rows) for Product: AUTOMOTIVE_M-1129, Warehouse: Flipkart Hub - Bengaluru
Forecasting Product: PET SUPPLIES_E-1251, Warehouse: Amazon FC - Amritsar
DataFrame shape: (2, 4)
Forecasting Product: PLAYERS AND ELECTRONICS_X-2117, Warehouse: Amazon FC - Raipur
DataFrame shape: (1, 4)
Not enough data (less than 2 rows) for Product: PLAYERS AND ELECTRONICS_X-2117, Warehouse: Amazon FC - Raipur
Forecasting Product: DELI_X-6142, Warehouse: Amazon FC - Vijayawada
DataFrame shape: (1, 4)
Not enough data (less than 2 rows) for Product: DELI_X-6142, Warehouse: Amazon FC - Vijayawada
Forecasting Product: AUTOMOTIVE_B-6128, Warehouse: Blue Dart Hub - S

04:41:11 - cmdstanpy - INFO - Chain [1] start processing
04:41:11 - cmdstanpy - INFO - Chain [1] start processing
04:41:11 - cmdstanpy - INFO - Chain [1] start processing
04:41:11 - cmdstanpy - INFO - Chain [1] start processing
04:41:11 - cmdstanpy - INFO - Chain [1] start processing
04:41:11 - cmdstanpy - INFO - Chain [1] start processing
04:41:11 - cmdstanpy - INFO - Chain [1] done processing
04:41:11 - cmdstanpy - INFO - Chain [1] start processing
04:41:11 - cmdstanpy - INFO - Chain [1] done processing
04:41:11 - cmdstanpy - INFO - Chain [1] start processing
04:41:11 - cmdstanpy - INFO - Chain [1] done processing
04:41:11 - cmdstanpy - INFO - Chain [1] start processing
04:41:11 - cmdstanpy - INFO - Chain [1] start processing
04:41:12 - cmdstanpy - INFO - Chain [1] done processing
04:41:12 - cmdstanpy - INFO - Chain [1] done processing
04:41:12 - cmdstanpy - INFO - Chain [1] done processing
04:41:12 - cmdstanpy - INFO - Chain [1] done processing
04:41:12 - cmdstanpy - INFO - Chain [1

Forecasting Product: CLEANING_M-0938, Warehouse: Delhivery Warehouse - Aurangabad
DataFrame shape: (1, 4)
Not enough data (less than 2 rows) for Product: CLEANING_M-0938, Warehouse: Delhivery Warehouse - Aurangabad
Forecasting Product: BEVERAGES_C-0327, Warehouse: Delhivery Warehouse - Belgaum
DataFrame shape: (3, 4)
Forecasting Product: BOOKS_B-6298, Warehouse: Delhivery Warehouse - Nagpur
DataFrame shape: (2, 4)
Forecasting Product: BEAUTY_E-0838, Warehouse: Delhivery Warehouse - Jodhpur
DataFrame shape: (3, 4)
Forecast saved to forecasts/forecast_PLAYERS AND ELECTRONICS-C-0335_DTDC-Hub---Mangalore.csv
Forecasting Product: BREAD/BAKERY_B-6216, Warehouse: Delhivery Warehouse - Nagpur
DataFrame shape: (2, 4)
Forecasting Product: POULTRY_F-001L, Warehouse: Ecom Express - Chennai
DataFrame shape: (2, 4)
Forecasting Product: BEAUTY_E-0930, Warehouse: Ecom Express - Gorakhpur
DataFrame shape: (1, 4)
Not enough data (less than 2 rows) for Product: BEAUTY_E-0930, Warehouse: Ecom Express - Go

04:41:12 - cmdstanpy - INFO - Chain [1] done processing
04:41:12 - cmdstanpy - INFO - Chain [1] start processing
04:41:12 - cmdstanpy - INFO - Chain [1] done processing
04:41:12 - cmdstanpy - INFO - Chain [1] done processing
04:41:12 - cmdstanpy - INFO - Chain [1] done processing
04:41:12 - cmdstanpy - INFO - Chain [1] done processing
04:41:12 - cmdstanpy - INFO - Chain [1] start processing
04:41:12 - cmdstanpy - INFO - Chain [1] done processing
04:41:12 - cmdstanpy - INFO - Chain [1] start processing
04:41:12 - cmdstanpy - INFO - Chain [1] done processing
04:41:12 - cmdstanpy - INFO - Chain [1] start processing
04:41:12 - cmdstanpy - INFO - Chain [1] start processing
04:41:12 - cmdstanpy - INFO - Chain [1] start processing
04:41:12 - cmdstanpy - INFO - Chain [1] done processing
04:41:12 - cmdstanpy - INFO - Chain [1] done processing
04:41:12 - cmdstanpy - INFO - Chain [1] start processing
04:41:12 - cmdstanpy - INFO - Chain [1] start processing
04:41:12 - cmdstanpy - INFO - Chain [1] 

Forecasting Product: BEVERAGES_C-0040, Warehouse: Amazon FC - Hubli
DataFrame shape: (1, 4)
Not enough data (less than 2 rows) for Product: BEVERAGES_C-0040, Warehouse: Amazon FC - Hubli
Forecast saved to forecasts/forecast_BOOKS-B-6298_Delhivery-Warehouse---Nagpur.csv
Forecasting Product: HOME AND KITCHEN II_B-6257, Warehouse: Blue Dart Hub - Tiruchirappalli
DataFrame shape: (1, 4)
Not enough data (less than 2 rows) for Product: HOME AND KITCHEN II_B-6257, Warehouse: Blue Dart Hub - Tiruchirappalli
Forecast saved to forecasts/forecast_GROCERY I-M-0361_Flipkart-FC---Salem.csv
Forecast saved to forecasts/forecast_HOME AND KITCHEN I-C-0121_Amazon-FC---Udaipur.csv
Forecast saved to forecasts/forecast_GROCERY II-E-0088_DTDC-Hub---Mangalore.csv
Forecast saved to forecasts/forecast_LINGERIE-E-1515_Amazon-FC---Vijayawada.csv
Forecast saved to forecasts/forecast_BOOKS-X-7078_Amazon-FC---Coimbatore.csv
Forecast saved to forecasts/forecast_PET SUPPLIES-E-1251_Amazon-FC---Amritsar.csv


OSError: Cannot save file into a non-existent directory: 'forecasts/forecast_BREAD'