In [1]:
import os
import sys
import pandas as pd
import yaml
from datetime import datetime

# === 1. Define Paths ===
PROJECT_ROOT = r"D:\demand_forecasting_system"
SRC_DIR = os.path.join(PROJECT_ROOT, "src")
PIPELINE_DIR = os.path.join(PROJECT_ROOT, "data_pipeline")
FEATURE_DIR = os.path.join(PIPELINE_DIR, "feature_engineering")

# === 2. Add necessary directories to Python path ===
for path in [PROJECT_ROOT, SRC_DIR, PIPELINE_DIR, FEATURE_DIR]:
    if path not in sys.path:
        sys.path.append(path)

# === 3. Import your project modules ===
from utils.logger import logger   
from feature_engineering.orders_features import generate_orders_features

In [2]:
config_path = os.path.join(PROJECT_ROOT, "data_pipeline", "feature_engineering", "config.yaml")

with open(config_path, "r") as file:
    config = yaml.safe_load(file)

base_path = config["data_paths"]["base_path"]

print("Base Path:", base_path)
print("Files:")
for key, val in config["data_paths"].items():
    print(f"  {key}: {val}")



Base Path: D:/DEMAND_FORECASTING_SYSTEM/data/processed
Files:
  base_path: D:/DEMAND_FORECASTING_SYSTEM/data/processed
  orders: blinkit_orders_clean.csv
  order_items: blinkit_order_items_clean.csv
  customers: blinkit_customers_clean.csv
  products: blinkit_products_clean.csv
  inventory: blinkit_inventory_clean.csv
  marketing: blinkit_marketing_clean.csv
  weather: blinkit_weather_clean.csv


In [3]:
def load_data(file_path: str) -> pd.DataFrame:
    if not os.path.exists(file_path):
        logger.error(f"File not found: {file_path}")
        raise FileNotFoundError(f"Missing file: {file_path}")
    logger.info(f"Loading data from {file_path}")
    return pd.read_csv(file_path)

orders_df = load_data(os.path.join(base_path, config["data_paths"]["orders"]))
order_items_df = load_data(os.path.join(base_path, config["data_paths"]["order_items"]))

orders_df.head(2), order_items_df.head(2)


2025-11-08 22:13:51,545 | INFO | data_pipeline | Loading data from D:/DEMAND_FORECASTING_SYSTEM/data/processed\blinkit_orders_clean.csv
2025-11-08 22:13:51,610 | INFO | data_pipeline | Loading data from D:/DEMAND_FORECASTING_SYSTEM/data/processed\blinkit_order_items_clean.csv


(   order_id  customer_id           order_date  order_total payment_method  \
 0     60465     15808945  2024-10-23 05:23:29   589.469971            Upi   
 1   2237858     48281892  2023-04-02 03:45:11  3835.260010         Wallet   
 
    store_id promised_delivery_time actual_delivery_time delivery_status  \
 0      3943    2024-10-23 05:34:29  2024-10-23 05:39:29         On Time   
 1      1987    2023-04-02 04:00:11  2023-04-02 04:03:11         On Time   
 
    delivery_delay_hrs  
 0            0.083333  
 1            0.050000  ,
    order_id  product_id  quantity  unit_price  total_price
 0     60465      361361         2  826.210022  1652.420044
 1   2237858      585299         1  799.460022   799.460022)

In [4]:
orders_features = generate_orders_features(orders_df, order_items_df)
display(orders_features.head(5))


2025-11-08 22:14:20,833 | INFO | data_pipeline | Starting feature engineering for Orders...
--- Logging error ---
Traceback (most recent call last):
  File "C:\Program Files\Python313\Lib\logging\__init__.py", line 1153, in emit
    stream.write(msg + self.terminator)
    ~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Program Files\Python313\Lib\encodings\cp1252.py", line 19, in encode
    return codecs.charmap_encode(input,self.errors,encoding_table)[0]
           ~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
UnicodeEncodeError: 'charmap' codec can't encode character '\u2705' in position 49: character maps to <undefined>
Call stack:
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "d:\demand_forecasting_system\.venv\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "d:\demand_forecasting_system\.venv\Lib\site-packages\traitlets\config\application.py", line 1075, i

Unnamed: 0,order_id,customer_id,order_date,order_total,payment_method,store_id,promised_delivery_time,actual_delivery_time,delivery_status,delivery_delay_hrs,total_quantity,avg_unit_price,total_sales_value,order_dayofweek,order_week,order_month,is_cod,is_prepaid
0,60465,15808945,2024-10-23 05:23:29,589.469971,Upi,3943,2024-10-23 05:34:29,2024-10-23 05:39:29,On Time,0.083333,2,826.210022,1652.420044,2,43,10,0,1
1,2237858,48281892,2023-04-02 03:45:11,3835.26001,Wallet,1987,2023-04-02 04:00:11,2023-04-02 04:03:11,On Time,0.05,1,799.460022,799.460022,6,13,4,0,1
2,3101265,89617089,2024-05-23 03:21:47,3567.72998,Upi,974,2024-05-23 03:37:47,2024-05-23 03:40:47,On Time,0.05,1,44.349998,44.349998,3,21,5,0,1
3,5120698,44174426,2023-06-09 12:10:20,3064.360107,Wallet,3184,2023-06-09 12:25:20,2023-06-09 12:40:20,Slightly Delayed,0.25,1,53.490002,53.490002,4,23,6,0,1
4,5512907,51476157,2023-04-30 20:52:21,861.530029,Wallet,419,2023-04-30 21:11:21,2023-04-30 21:26:21,Slightly Delayed,0.25,3,319.339996,958.019989,6,17,4,0,1


In [5]:
# Save processed features
output_path = os.path.join(config["output"]["processed_dir"], config["output"]["orders_features"])
os.makedirs(config["output"]["processed_dir"], exist_ok=True)

orders_features.to_csv(output_path, index=False)
logger.info(f"✅ Orders features saved successfully at {output_path}")

print("Saved to:", output_path)


--- Logging error ---
Traceback (most recent call last):
  File "C:\Program Files\Python313\Lib\logging\__init__.py", line 1153, in emit
    stream.write(msg + self.terminator)
    ~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Program Files\Python313\Lib\encodings\cp1252.py", line 19, in encode
    return codecs.charmap_encode(input,self.errors,encoding_table)[0]
           ~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
UnicodeEncodeError: 'charmap' codec can't encode character '\u2705' in position 49: character maps to <undefined>
Call stack:
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "d:\demand_forecasting_system\.venv\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "d:\demand_forecasting_system\.venv\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
    app.start()
  File "d:\demand_forecasting_system\.venv\Lib\site-packag

Saved to: D:/DEMAND_FORECASTING_SYSTEM/data/processed\orders_features.csv
