In [11]:
!pip install statsmodels

You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [12]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error, r2_score



In [13]:
from fosforml.model_manager.snowflakesession import get_session
my_session = get_session()
 
table_name = '"FACT_CUSTOMER_ORDERS"'
 
sf_df = my_session.sql("select * from {}".format(table_name))
df = sf_df.to_pandas()

In [14]:
data = df[['PRODUCT_ID','ORDER_DATE','ORDER_QTY']]

In [15]:
data['ORDER_DATE']=pd.to_datetime(data['ORDER_DATE'])

In [16]:
data_grouped = data.groupby(['PRODUCT_ID',data['ORDER_DATE'].dt.to_period('M')]).agg(Total_Order_Qty=('ORDER_QTY','sum')).reset_index()

In [17]:
data_grouped['ORDER_MONTH']=data_grouped['ORDER_DATE'].dt.to_timestamp()

In [18]:
data_grouped = data_grouped[['PRODUCT_ID','ORDER_MONTH','Total_Order_Qty']]

In [19]:
data_grouped

Unnamed: 0,PRODUCT_ID,ORDER_MONTH,Total_Order_Qty
0,Product_001,2019-01-01,148400
1,Product_001,2019-02-01,146650
2,Product_001,2019-03-01,133150
3,Product_001,2019-04-01,153600
4,Product_001,2019-05-01,154650
...,...,...,...
2987,Product_100,2024-04-01,30200
2988,Product_100,2024-05-01,27850
2989,Product_100,2024-06-01,33200
2990,Product_100,2024-07-01,33300


In [20]:
data_grouped.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2992 entries, 0 to 2991
Data columns (total 3 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   PRODUCT_ID       2992 non-null   object        
 1   ORDER_MONTH      2992 non-null   datetime64[ns]
 2   Total_Order_Qty  2992 non-null   int64         
dtypes: datetime64[ns](1), int64(1), object(1)
memory usage: 70.2+ KB


In [21]:
data_grouped = data_grouped.set_index('ORDER_MONTH')

In [22]:
data_grouped.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2992 entries, 2019-01-01 to 2024-08-01
Data columns (total 2 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   PRODUCT_ID       2992 non-null   object
 1   Total_Order_Qty  2992 non-null   int64 
dtypes: int64(1), object(1)
memory usage: 70.1+ KB


In [47]:
ORD_grouped_data = data_grouped.groupby('ORDER_MONTH')

In [48]:
ORD_grouped_data

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7f0a31654d60>

In [23]:
# Group data by 'product_id'
PID_grouped_data = data_grouped.groupby('PRODUCT_ID')

In [42]:
# Initialize an empty DataFrame for forecast results
test_results = pd.DataFrame(columns=['PRODUCT_ID','ORDER_MONTH', 'test_forecast_orders','rse', 'rmse','r2_score'])
test_results.set_index('ORDER_MONTH')

Unnamed: 0_level_0,PRODUCT_ID,test_forecast_orders,rse,rmse,r2_score
ORDER_MONTH,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1


In [43]:
# Initialize an empty DataFrame for forecast results
forecast_results = pd.DataFrame(columns=['PRODUCT_ID','ORDER_MONTH', 'forecast_order_qty'])
forecast_results.set_index('ORDER_MONTH')

Unnamed: 0_level_0,PRODUCT_ID,forecast_order_qty
ORDER_MONTH,Unnamed: 1_level_1,Unnamed: 2_level_1


In [31]:
print(PID_grouped_data)

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7f0a2bc38b80>


In [46]:
for product_id, group in PID_grouped_data:
    # Sort data by order date
    group.sort_index(inplace=True)
    
    # Split data into train and test sets (80% train, 20% test)
    train_size = int(0.8 * len(group))
    train_data, test_data = group.iloc[:train_size], group.iloc[train_size:]

    
    
    # Create SARIMAX model
    model = SARIMAX(train_data['Total_Order_Qty'], order=(1, 1, 1), seasonal_order=(1, 1, 1, 12))
    model_fit = model.fit(disp=False)
    
    # Forecast the next period
    test_model = model_fit.get_forecast(steps=len(test_data))
    test_df = test_model.predicted_mean

    #print(f'test_df:{test_df.index}')
    
    # Calculate RMSE for order quantity
    #rmse_order_qty = np.sqrt(mean_squared_error(test_data['ORDER_QTY'], forecast_order_qty))
    
    # Evaluate the model
    rse = np.sqrt(((test_df - test_data) ** 2).sum().sum() / (test_data.shape[0] * test_data.shape[1] - len(model_fit.params)))
    rmse = np.sqrt(mean_squared_error(test_data['Total_Order_Qty'], test_df))
    r2 = r2_score(test_data['Total_Order_Qty'], test_df)

    
    date_list = test_df.index.to_list()
    

    
    #print(f"Order_date: {date_list}")    
    new_row = {
        'PRODUCT_ID': product_id,
        'ORDER_MONTH': date_list,
        'test_forecast_orders': test_df,
        'rse': rse,
        'rmse': rmse,
        'r2_score': r2
    }
    
    df_new = pd.DataFrame(new_row)

    df_new.set_index('ORDER_MONTH')
    
    test_results=pd.concat([test_results,df_new],ignore_index=True)
    print(f'{test_results}')
    # Forecast future values
    forecast = model_fit.get_forecast(steps=25)#, steps=n_forecast)
    forecast_df = forecast.predicted_mean
    
    date_list2= forecast_df.index.to_list()
    
    new_row2 = {
        'product_id': product_id,
        'order_date': date_list2,
        'forecast_order_qty': forecast_df,
    }
    
    df_new2 = pd.DataFrame(new_row2)
    
    forecast_results=pd.concat([forecast_results,df_new2],ignore_index=True)

      PRODUCT_ID ORDER_MONTH  test_forecast_orders  rse          rmse  \
0            NaN         NaT         118823.028009  0.0  15697.565379   
1            NaN         NaT         101123.262760  0.0  15697.565379   
2            NaN         NaT         105282.149996  0.0  15697.565379   
3            NaN         NaT         117745.845182  0.0  15697.565379   
4            NaN         NaT         110611.593083  0.0  15697.565379   
..           ...         ...                   ...  ...           ...   
625  Product_001  2024-04-01         122617.484315  0.0  15697.565379   
626  Product_001  2024-05-01         113678.212178  0.0  15697.565379   
627  Product_001  2024-06-01         122553.505870  0.0  15697.565379   
628  Product_001  2024-07-01         123281.759280  0.0  15697.565379   
629  Product_001  2024-08-01         106051.738107  0.0  15697.565379   

     r2_score   product_id order_month  
0    -0.05509  Product_001  2023-07-01  
1    -0.05509  Product_001  2023-08-01  
