## This notebook is used for training the model used for forecasting demands

### First we import the modules and load the datasets

In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load the dataset
df = pd.read_csv('./sales_data_2023.csv')
df.dtypes

product_id      int64
date           object
units_sold      int64
price         float64
dtype: object

### Feature Engineering

In [15]:
# Convert date column to datetime
df['date'] = pd.to_datetime(df['date'])
df.dtypes
# # Extract day of the week
df['day_of_week'] = df['date'].dt.dayofweek

# # Add holiday indicator
holidays = [
    "2023-01-01", "2023-01-02", "2023-02-21", "2023-04-07", "2023-04-08", 
    "2023-04-09", "2023-04-10", "2023-04-18", "2023-05-01", "2023-05-25", 
    "2023-08-14", "2023-08-15", "2023-08-23", "2023-12-22", "2023-12-25", 
    "2023-12-26"
]
df['is_holiday'] = df['date'].isin(pd.to_datetime(holidays)).astype(int)

# # Add season indicator (1: Winter, 0: Other)
df['is_winter'] = df['date'].dt.month.isin([5, 6, 7]).astype(int)

# # Add season indicator (1: Summer, 0: Other)
df['is_summer'] = df['date'].dt.month.isin([12, 1, 2]).astype(int)

# # Drop the original date column
df.drop(columns=['date'], inplace=True)

# # One-hot encode the product_id
df = pd.get_dummies(df, columns=['product_id'])
df
# # Define the features and target variable
# X = df.drop(columns=['units_sold', 'price'])
# y = df['units_sold']

Unnamed: 0,units_sold,price,day_of_week,is_holiday,is_winter,is_summer,product_id_1,product_id_2,product_id_3
0,75,1.00,6,1,0,1,True,False,False
1,105,1.51,6,1,0,1,False,True,False
2,45,2.01,6,1,0,1,False,False,True
3,75,0.96,0,1,0,1,True,False,False
4,105,1.55,0,1,0,1,False,True,False
...,...,...,...,...,...,...,...,...,...
1090,84,1.52,5,0,0,1,False,True,False
1091,36,2.04,5,0,0,1,False,False,True
1092,60,0.96,6,0,0,1,True,False,False
1093,84,1.44,6,0,0,1,False,True,False
