<a href="https://colab.research.google.com/github/alaaguedda/python-Colab-Trainer/blob/main/restaurant_inventory_forecasting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 1) Install libs (run this cell)
!pip install --quiet datasets xgboost scikit-learn pandas pyarrow

# 2) Imports

import numpy as np
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error

from datasets import load_dataset
import pandas as pd



In [None]:
# 1️⃣ First time: download & save only the first 80k rows
ds = load_dataset("Dingdong-Inc/FreshRetailNet-50K", split="train[:80000]")
df = pd.DataFrame(ds)
df.to_parquet("/content/fresh_retail_80k.parquet")  # saves in Colab storage

# 2️⃣ Later: just load from parquet (fast)
df = pd.read_parquet("/content/fresh_retail_80k.parquet")
print(df.shape)


In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Save to Drive
df.to_parquet("/content/drive/MyDrive/fresh_retail_80k.parquet")

# Load from Drive next time
df = pd.read_parquet("/content/drive/MyDrive/fresh_retail_80k.parquet")

Mounted at /content/drive


In [None]:
df = pd.read_parquet("/content/drive/MyDrive/fresh_retail_80k.parquet")

In [None]:
prod_ids = df["product_id"].unique()
print("num unique products:", len(prod_ids))
print("first 20 product_ids:", prod_ids[:20])

chosen = prod_ids[:3]
df = df[df['product_id'].isin(chosen)]
print("final dataset shape:", df.shape)
print("remaining product_ids:", df["product_id"].unique())

num unique products: 257
first 20 product_ids: [ 38 834 411 686 580 596 740 379   4 600 699 548  72 644 638 496 296 631
 310 633]
final dataset shape: (2070, 19)
remaining product_ids: [ 38 834 411]


In [None]:
df["product_id"].value_counts()

In [None]:
df.head()

In [None]:
df["product_id"].unique()

array([ 38, 834, 411])

In [None]:
product_map = {
    38:"bread",
    411:"chicken",
    834:"meat"
}

df_filtred = df

In [None]:
df_filtred["product_name"] = df_filtred['product_id'].map(product_map)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtred["product_name"] = df_filtred['product_id'].map(product_map)


In [None]:
df_filtred["product_name"].value_counts()

In [None]:
df_filtred = df_filtred.drop(columns = ['city_id',
    'management_group_id',
    'first_category_id',
    'second_category_id',
    'third_category_id'])

In [None]:
cols = ['store_id', 'product_name', 'dt', 'sale_amount',
        'discount', 'holiday_flag', 'activity_flag',
        'precpt', 'avg_temperature', 'avg_humidity', 'avg_wind_level']

In [None]:
df_filtred = df_filtred[cols]

In [None]:
df_filtred.head()

In [None]:
df = df_filtred

In [None]:
df.head()

In [None]:
df['sale_amount'] = df['sale_amount'] * 10

In [None]:
df.to_parquet("/content/drive/MyDrive/fresh_retail_80k.parquet")

In [None]:
from google.colab import drive
drive.mount('/content/drive')
df = pd.read_parquet("/content/drive/MyDrive/fresh_retail_80k.parquet")

Mounted at /content/drive


In [None]:
df.head()

Unnamed: 0,store_id,product_name,dt,sale_amount,discount,holiday_flag,activity_flag,precpt,avg_temperature,avg_humidity,avg_wind_level
0,0,bread,2024-03-28,1.0,1.0,0,0,1.6999,15.48,73.54,1.97
1,0,bread,2024-03-29,1.0,1.0,0,0,3.019,15.08,76.56,1.71
2,0,bread,2024-03-30,0.0,1.0,1,0,2.0942,15.91,76.47,1.73
3,0,bread,2024-03-31,1.0,1.0,1,0,1.5618,16.13,77.4,1.76
4,0,bread,2024-04-01,2.0,1.0,0,0,3.5386,15.37,78.26,1.25


In [None]:
df["month"] = pd.to_datetime(df['dt']).dt.month
df["day"] = pd.to_datetime(df['dt']).dt.day

In [None]:
df.head()

In [None]:
from sklearn.multioutput import MultiOutputRegressor
from lightgbm import LGBMRegressor

In [None]:
X_weather = df[['month','day']]
y_weather = df[['avg_temperature', 'avg_humidity', 'precpt', 'avg_wind_level']]

Xw_train ,Xw_test ,yw_train,yw_test = train_test_split(X_weather,y_weather,test_size=0.2,random_state=42)
model_weather =MultiOutputRegressor(LGBMRegressor())
model_weather.fit(Xw_train,yw_train)

models_sales = {}
products = df['product_name'].unique()

for product in products:
  product_df = df[df['product_name'] == product]

  pred_weather = model_weather.predict(product_df[['month','day']])
  pred_weather_df = pd.DataFrame(pred_weather , columns=['avg_temperature', 'avg_humidity', 'precpt', 'avg_wind_level']).reset_index(drop=True)

  X_sales = pd.concat([
      pred_weather_df,
      product_df[['discount', 'holiday_flag', 'activity_flag']].reset_index(drop=True)

  ],axis=1)
  y_sales = product_df['sale_amount'].reset_index(drop=True)

  Xs_train ,Xs_test ,ys_train ,ys_test = train_test_split(X_sales,y_sales,test_size=0.2,random_state=42)

  model_sales = LGBMRegressor()
  model_sales.fit(Xs_train,ys_train)
  models_sales[product] = model_sales

def predict_sales(month, day, discount=1.0, holiday_flag=0, activity_flag=0):
    # Step 1: Predict weather
    predicted_weather = model_weather.predict([[month, day]])[0]  # vector [temp, humidity, precpt, wind]

    # Step 2: Predict sales for each product
    predictions = {}
    for product in products:
        X_input = [*predicted_weather, discount, holiday_flag, activity_flag]
        predictions[product] = models_sales[product].predict([X_input])[0]
    return predictions




In [None]:
# Round values and convert to normal Python floats

clean_predictions = {k: round(float(v), 2) for k, v in predict_sales(1, 31, discount=0.8, holiday_flag=1, activity_flag=0).items()}
print(clean_predictions)


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# Predict on test set
weather_preds = model_weather.predict(Xw_test)

# Loop over each target weather variable
weather_features = ['avg_temperature', 'avg_humidity', 'precpt', 'avg_wind_level']
for i, feature in enumerate(weather_features):
    mae = mean_absolute_error(yw_test.iloc[:, i], weather_preds[:, i])
    rmse = np.sqrt(mean_squared_error(yw_test.iloc[:, i], weather_preds[:, i]))
    print(f"{feature}: MAE = {mae:.2f}, RMSE = {rmse:.2f}")


avg_temperature: MAE = 0.30, RMSE = 0.34
avg_humidity: MAE = 0.24, RMSE = 0.30
precpt: MAE = 0.13, RMSE = 0.20
avg_wind_level: MAE = 0.08, RMSE = 0.11


In [None]:
for product in products:
    product_df = df[df['product_name'] == product]

    # Predict weather for this product's dates
    pred_weather = model_weather.predict(product_df[['month', 'day']])
    pred_weather_df = pd.DataFrame(pred_weather, columns=['avg_temperature', 'avg_humidity', 'precpt', 'avg_wind_level']).reset_index(drop=True)

    # Build the sales feature set
    X_sales = pd.concat([
        pred_weather_df,
        product_df[['discount', 'holiday_flag', 'activity_flag']].reset_index(drop=True)
    ], axis=1)
    y_sales = product_df['sale_amount'].reset_index(drop=True)

    # Predict sales
    preds_sales = models_sales[product].predict(X_sales)

    # Calculate metrics
    mae = mean_absolute_error(y_sales, preds_sales)
    rmse = np.sqrt(mean_squared_error(y_sales, preds_sales))
    print(f"{product} Sales: MAE = {mae:.2f}, RMSE = {rmse:.2f}")


bread Sales: MAE = 1.89, RMSE = 3.14
meat Sales: MAE = 3.87, RMSE = 5.26
chicken Sales: MAE = 2.32, RMSE = 3.28
