<a href="https://colab.research.google.com/github/akash1629/Demand-Forecasting-Inventory-Optimization-via-Gen-AI/blob/main/Deman_forecasting_and_inventory_optimization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ===========================
#  Generative AI–Driven Demand Forecasting & Inventory Optimization
# ===========================

# ==========================================
#  1. SETUP: Install Dependencies in Colab
# ==========================================
# NOTE: If you're running locally, you can install these packages via "pip install" in your environment.
# In Colab, uncomment the following lines to install required libraries:

!pip install statsmodels pmdarima prophet pulp plotly==5.15.0 python-dotenv

# ==========================================
#  2. IMPORTS & GLOBAL SETTINGS
# ==========================================
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import warnings
warnings.filterwarnings("ignore")

from statsmodels.tsa.arima.model import ARIMA
from prophet import Prophet

import pulp  # For linear programming

import datetime
import random

# ==========================================
#  3. GENERATE SYNTHETIC DATA
# ==========================================
# We'll create three datasets:
#   1. sales_data - daily product sales for ~6 months
#   2. inventory_data - hypothetical starting inventory levels for each product
#   3. macroeconomic_data - a mock macroeconomic indicator that might influence sales

np.random.seed(42)

# Parameters
num_days = 180
start_date = datetime.date(2023, 1, 1)
dates = [start_date + datetime.timedelta(days=i) for i in range(num_days)]
product_ids = ["P1", "P2", "P3"]  # Three products for example

# Generate SALES DATA
sales_rows = []
for product in product_ids:
    # We'll create a seasonal pattern + random noise
    # For demonstration, let's keep it simple
    base = np.random.randint(20, 50)  # average daily sales baseline
    seasonal_amplitude = np.random.randint(5, 15)

    for i, d in enumerate(dates):
        # Simple seasonality (e.g., day of year effect)
        season_factor = np.sin(2 * np.pi * (i / 30))  # monthly-like season
        noise = np.random.normal(loc=0, scale=3)
        sales_val = base + seasonal_amplitude * season_factor + noise
        if sales_val < 0:
            sales_val = np.random.randint(5, 10)  # clamp low sales to a minimum
        sales_rows.append((d, product, round(sales_val, 2)))

sales_df = pd.DataFrame(sales_rows, columns=["date", "product_id", "sales"])

# Generate INVENTORY DATA
# Let's assume each product has a starting inventory and daily adjustments
inventory_rows = []
for product in product_ids:
    # base inventory
    inv_level = np.random.randint(500, 700)
    for d in dates:
        # random fluctuation
        change = np.random.randint(-5, 6)
        inv_level += change
        if inv_level < 0:
            inv_level = np.random.randint(100, 200)
        inventory_rows.append((d, product, inv_level))

inventory_df = pd.DataFrame(inventory_rows, columns=["date", "product_id", "inventory_level"])

# Generate MACROECONOMIC DATA
# We'll simulate a single macro indicator that changes slowly over time
macro_vals = []
macro_indicator = 100.0
for d in dates:
    # random walk
    step = np.random.normal(0, 0.2)
    macro_indicator += step
    macro_vals.append((d, round(macro_indicator, 2)))
macro_df = pd.DataFrame(macro_vals, columns=["date", "macro_indicator"])

# Show first few rows (optional)
print("Sales Data (head):")
print(sales_df.head())
print("\nInventory Data (head):")
print(inventory_df.head())
print("\nMacroeconomic Data (head):")
print(macro_df.head())

# ==========================================
#  4. DATA PREPROCESSING & MERGING
# ==========================================
def preprocess_and_merge_data(sales_df, inv_df, macro_df):
    # Merge on both date and product_id for sales/inventory
    merged_df = pd.merge(sales_df, inv_df, on=["date", "product_id"], how="left")

    # Merge macro on date
    merged_df = pd.merge(merged_df, macro_df, on="date", how="left")

    # Basic cleaning
    merged_df.drop_duplicates(inplace=True)
    merged_df.sort_values(["product_id","date"], inplace=True)
    merged_df.reset_index(drop=True, inplace=True)

    return merged_df

combined_df = preprocess_and_merge_data(sales_df, inventory_df, macro_df)
print("\nMerged Dataset (head):")
print(combined_df.head())

# ==========================================
#  5. MOCK GPT-4 FEATURE GENERATION
# ==========================================
# For demonstration, we'll create a simple function that simulates GPT-based feature creation.
# In a real project, you'd call OpenAI API and parse the output.

def mock_gpt_feature(row):
    # Example: we add some "qualitative sentiment" based on macro_indicator
    # We'll just bucket it: "Positive", "Neutral", or "Negative"
    # purely for demonstration
    if row["macro_indicator"] >= 100.5:
        return "Positive"
    elif row["macro_indicator"] >= 99.5:
        return "Neutral"
    else:
        return "Negative"

combined_df["gpt_factor"] = combined_df.apply(mock_gpt_feature, axis=1)

# ==========================================
#  6. TIME-SERIES FORECASTING (ARIMA & PROPHET)
# ==========================================
# We'll focus on one product at a time (e.g., "P1") for a demonstration.
product_of_interest = "P1"
df_p1 = combined_df[combined_df["product_id"] == product_of_interest].copy()

# Prepare data for ARIMA (univariate)
df_p1_arima = df_p1[["date","sales"]].copy()
df_p1_arima["date"] = pd.to_datetime(df_p1_arima["date"])
df_p1_arima.set_index("date", inplace=True)
df_p1_arima.sort_index(inplace=True)

# ARIMA model
from statsmodels.tsa.arima.model import ARIMA

arima_model = ARIMA(df_p1_arima["sales"], order=(2,1,2))
arima_fitted = arima_model.fit()

# Prepare data for Prophet (rename columns to ds, y)
df_p1_prophet = df_p1[["date","sales"]].copy()
df_p1_prophet = df_p1_prophet.rename(columns={"date":"ds","sales":"y"})
df_p1_prophet.sort_values("ds", inplace=True)

prophet_model = Prophet()
prophet_model.fit(df_p1_prophet)

# Forecast horizon (e.g., next 30 days)
forecast_horizon = 30
import pandas as pd
last_date = df_p1_arima.index.max()
future_dates = pd.date_range(start=last_date+pd.Timedelta(days=1), periods=forecast_horizon)

# ARIMA Forecast
arima_forecast = arima_fitted.forecast(steps=forecast_horizon)

# Prophet Forecast
future_df = pd.DataFrame({"ds": future_dates})
prophet_forecast_df = prophet_model.predict(future_df)

# Combine results into one DataFrame
df_forecast = pd.DataFrame({
    "date": future_dates,
    "arima_forecast": arima_forecast.values,
    "prophet_forecast": prophet_forecast_df["yhat"].values
})

# ==========================================
#  7. HYBRID FORECAST (Simple Average)
# ==========================================
df_forecast["hybrid_forecast"] = 0.5 * df_forecast["arima_forecast"] + 0.5 * df_forecast["prophet_forecast"]

print("\nSample Combined Forecasts (head):")
print(df_forecast.head())

# ==========================================
#  8. INVENTORY OPTIMIZATION
# ==========================================
# We'll pick the final "hybrid_forecast" as demand for the next 30 days
demand_forecast = df_forecast["hybrid_forecast"].values

# We'll assume a simple scenario:
#   - holding_cost per unit per day
#   - stockout_cost penalty if demand is unmet
#   - we can produce/stock 'x' units each day within a limit
# We'll do a minimal LP approach with pulp

import pulp

def optimize_inventory_plan(demand_forecast, holding_cost=2.0, stockout_cost=10.0, max_inv=1000):
    n_periods = len(demand_forecast)
    # LP problem
    problem = pulp.LpProblem("Inventory_Optimization", pulp.LpMinimize)

    # Decision Variables
    # inv[i] = inventory level at day i
    inv = pulp.LpVariable.dicts("inv", (range(n_periods)), lowBound=0, upBound=max_inv, cat=pulp.LpContinuous)

    # Objective: Minimize holding + stockout costs
    total_cost = 0
    for i in range(n_periods):
        # holding cost
        total_cost += holding_cost * inv[i]
        # stockout cost if inv[i] < demand[i]
        shortfall = demand_forecast[i] - inv[i]
        # Only apply cost if shortfall > 0
        # We'll represent shortfall as a positive number if demand exceeds inv
        shortfall_var = pulp.LpVariable(f"shortfall_{i}", lowBound=0, cat=pulp.LpContinuous)
        problem += shortfall_var >= shortfall
        total_cost += stockout_cost * shortfall_var

    problem += total_cost

    # Solve
    problem.solve(pulp.PULP_CBC_CMD(msg=0))

    # Extract results
    final_inv = [pulp.value(inv[i]) for i in range(n_periods)]
    shortfalls = []
    for i in range(n_periods):
        # shortfall var
        short_var = [v for v in problem.variables() if v.name == f"shortfall_{i}"][0]
        shortfalls.append(pulp.value(short_var))

    return final_inv, shortfalls, pulp.value(problem.objective)

inv_plan, shortfall_plan, total_opt_cost = optimize_inventory_plan(demand_forecast)

df_forecast["optimized_inventory"] = inv_plan
df_forecast["shortfall"] = shortfall_plan

print("\n=== Inventory Optimization Results ===")
print("Total Cost:", total_opt_cost)
print(df_forecast.head())

# ==========================================
#  9. VISUALIZATION: PLOTLY
# ==========================================
import plotly.express as px
import plotly.graph_objects as go

fig = go.Figure()

# Plot ARIMA forecast
fig.add_trace(go.Scatter(
    x=df_forecast["date"],
    y=df_forecast["arima_forecast"],
    mode="lines",
    name="ARIMA Forecast"
))

# Plot Prophet forecast
fig.add_trace(go.Scatter(
    x=df_forecast["date"],
    y=df_forecast["prophet_forecast"],
    mode="lines",
    name="Prophet Forecast"
))

# Plot Hybrid
fig.add_trace(go.Scatter(
    x=df_forecast["date"],
    y=df_forecast["hybrid_forecast"],
    mode="lines+markers",
    name="Hybrid Forecast"
))

fig.update_layout(
    title="Forecast Comparison (ARIMA vs. Prophet vs. Hybrid)",
    xaxis_title="Date",
    yaxis_title="Forecasted Sales"
)

fig.show()

# Inventory Plan Visualization
fig2 = px.line(df_forecast, x="date", y="optimized_inventory", title="Optimized Inventory Over Forecast Horizon")
fig2.add_scatter(x=df_forecast["date"], y=df_forecast["shortfall"], mode="lines+markers", name="Shortfall")
fig2.update_layout(yaxis_title="Units")
fig2.show()

print("\nDone! You've generated synthetic data, ran ARIMA & Prophet forecasts, integrated a mock GPT factor, ")
print("combined forecasts into a hybrid approach, performed a simple inventory optimization, and visualized results.")


Collecting pmdarima
  Downloading pmdarima-2.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl.metadata (7.8 kB)
Collecting pulp
  Downloading PuLP-2.9.0-py3-none-any.whl.metadata (5.4 kB)
Collecting plotly==5.15.0
  Downloading plotly-5.15.0-py2.py3-none-any.whl.metadata (7.0 kB)
Collecting python-dotenv
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Downloading plotly-5.15.0-py2.py3-none-any.whl (15.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.5/15.5 MB[0m [31m36.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pmdarima-2.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl (2.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m36.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading PuLP-2.9.0-py3-none-any.whl (17.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.7/17.7 MB[0m [31m16.9 MB/s[0m eta [36m0:00:00[0m

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpytdvjdds/oj7ds9l1.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpytdvjdds/0vlad061.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=55019', 'data', 'file=/tmp/tmpytdvjdds/oj7ds9l1.json', 'init=/tmp/tmpytdvjdds/0vlad061.json', 'output', 'file=/tmp/tmpytdvjdds/prophet_modeloot4hgin/prophet_model-20250126170803.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
17:08:03 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
17:08:03 - cmdstanpy - INFO - Chain [1] d


Sample Combined Forecasts (head):
        date  arima_forecast  prophet_forecast  hybrid_forecast
0 2023-06-30       21.279648         25.375977        23.327812
1 2023-07-01       22.847732         25.856275        24.352003
2 2023-07-02       21.817584         24.694954        23.256269
3 2023-07-03       22.581302         24.478603        23.529952
4 2023-07-04       21.984004         25.681117        23.832561

=== Inventory Optimization Results ===
Total Cost: 1427.481602
        date  arima_forecast  prophet_forecast  hybrid_forecast  \
0 2023-06-30       21.279648         25.375977        23.327812   
1 2023-07-01       22.847732         25.856275        24.352003   
2 2023-07-02       21.817584         24.694954        23.256269   
3 2023-07-03       22.581302         24.478603        23.529952   
4 2023-07-04       21.984004         25.681117        23.832561   

   optimized_inventory  shortfall  
0            23.327812        0.0  
1            24.352003        0.0  
2     


Done! You've generated synthetic data, ran ARIMA & Prophet forecasts, integrated a mock GPT factor, 
combined forecasts into a hybrid approach, performed a simple inventory optimization, and visualized results.
