In [1]:
!pip install statsmodels

You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [2]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error, r2_score



In [3]:
from fosforml.model_manager.snowflakesession import get_session
my_session = get_session()
 
table_name = '"FACT_CUSTOMER_ORDERS"'
 
sf_df = my_session.sql("select * from {}".format(table_name))
df = sf_df.to_pandas()

In [4]:
data = df[['PRODUCT_ID','ORDER_DATE','ORDER_QTY']]

In [5]:
data['ORDER_DATE']=pd.to_datetime(data['ORDER_DATE'])

In [6]:
data_grouped = data.groupby(['PRODUCT_ID',data['ORDER_DATE'].dt.to_period('M')]).agg(Total_Order_Qty=('ORDER_QTY','sum')).reset_index()

In [7]:
data_grouped['ORDER_MONTH']=data_grouped['ORDER_DATE'].dt.to_timestamp()

In [9]:
data_grouped = data_grouped[['PRODUCT_ID','ORDER_MONTH','Total_Order_Qty']]

In [10]:
data_grouped

Unnamed: 0,PRODUCT_ID,ORDER_MONTH,Total_Order_Qty
0,Product_001,2019-01-01,148400
1,Product_001,2019-02-01,146650
2,Product_001,2019-03-01,133150
3,Product_001,2019-04-01,153600
4,Product_001,2019-05-01,154650
...,...,...,...
2987,Product_100,2024-04-01,30200
2988,Product_100,2024-05-01,27850
2989,Product_100,2024-06-01,33200
2990,Product_100,2024-07-01,33300


In [11]:
data_grouped = data_grouped.set_index('ORDER_MONTH')

In [12]:
data_grouped

Unnamed: 0_level_0,PRODUCT_ID,Total_Order_Qty
ORDER_MONTH,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-01-01,Product_001,148400
2019-02-01,Product_001,146650
2019-03-01,Product_001,133150
2019-04-01,Product_001,153600
2019-05-01,Product_001,154650
...,...,...
2024-04-01,Product_100,30200
2024-05-01,Product_100,27850
2024-06-01,Product_100,33200
2024-07-01,Product_100,33300


In [17]:
# Group data by 'product_id'
PID_grouped_data = data_grouped.groupby('PRODUCT_ID')

In [47]:
# Initialize an empty DataFrame for forecast results
test_results = pd.DataFrame(columns=['PRODUCT_ID','ORDER_MONTH', 'test_forecast_orders','rse', 'rmse','r2_score'])

In [48]:
forecast_results = pd.DataFrame(columns=['PRODUCT_ID','ORDER_MONTH', 'forecast_order_qty'])

In [49]:
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

In [None]:
import itertools
import numpy as np
import pandas as pd
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error, r2_score

# Function to perform a grid search to find optimal SARIMAX parameters
def sarimax_grid_search(y, p_values, d_values, q_values, P_values, D_values, Q_values, s):
    best_aic = np.inf
    best_order = None
    best_seasonal_order = None
    
    for p, d, q in itertools.product(p_values, d_values, q_values):
        for P, D, Q in itertools.product(P_values, D_values, Q_values):
            try:
                model = SARIMAX(y, order=(p, d, q), seasonal_order=(P, D, Q, s),
                                enforce_stationarity=False, enforce_invertibility=False)
                result = model.fit(disp=False)
                if result.aic < best_aic:
                    best_aic = result.aic
                    best_order = (p, d, q)
                    best_seasonal_order = (P, D, Q, s)
            except:
                continue
                
    return best_order, best_seasonal_order

# Example ranges for grid search
p_values = range(0, 3)
d_values = range(0, 2)
q_values = range(0, 3)
P_values = range(0, 3)
D_values = range(0, 2)
Q_values = range(0, 3)
s = 12  # Seasonality (monthly data with yearly seasonality)

# Loop over product groups
test_results = pd.DataFrame()
forecast_results = pd.DataFrame()

for product_id, group in PID_grouped_data:
    # Sort data by order date
    group.sort_index(inplace=True)
    
    # Split data into train and test sets (80% train, 20% test)
    train_size = int(0.8 * len(group))
    train_data, test_data = group.iloc[:train_size], group.iloc[train_size:]

    # Grid search to find the optimal order and seasonal_order
    optimal_order, optimal_seasonal_order = sarimax_grid_search(train_data['Total_Order_Qty'], 
                                                                p_values, d_values, q_values, 
                                                                P_values, D_values, Q_values, s)
    
    # Create SARIMAX model with the optimal parameters
    model = SARIMAX(train_data['Total_Order_Qty'], order=optimal_order, seasonal_order=optimal_seasonal_order)
    model_fit = model.fit(disp=False)
    
    # Forecast the next period
    test_model = model_fit.get_forecast(steps=len(test_data))
    test_df = test_model.predicted_mean

    # Calculate evaluation metrics
    rse = np.sqrt(((test_df - test_data['Total_Order_Qty']) ** 2).sum() / (test_data.shape[0] - len(model_fit.params)))
    rmse = np.sqrt(mean_squared_error(test_data['Total_Order_Qty'], test_df))
    r2 = r2_score(test_data['Total_Order_Qty'], test_df)

    print(f'Product ID: {product_id}, R2: {r2}')
    
    # Store test forecast results
    date_list = test_df.index.to_list()
    new_row = {
        'PRODUCT_ID': product_id,
        'ORDER_MONTH': date_list,
        'test_forecast_orders': test_df,
        'rse': rse,
        'rmse': rmse,
        'r2_score': r2
    }
    
    df_new = pd.DataFrame(new_row)
    df_new.set_index('ORDER_MONTH', inplace=True)
    
    test_results = pd.concat([test_results, df_new], ignore_index=True)

    # Forecast future values (e.g., next 25 periods)
    forecast = model_fit.get_forecast(steps=25)
    forecast_df = forecast.predicted_mean
    date_list2 = forecast_df.index.to_list()
    
    # Store forecast results
    new_row2 = {
        'PRODUCT_ID': product_id,
        'ORDER_MONTH': date_list2,
        'forecast_order_qty': forecast_df
    }
    
    df_new2 = pd.DataFrame(new_row2)
    forecast_results = pd.concat([forecast_results, df_new2], ignore_index=True)

Product ID: Product_001, R2: 0.1716301694156649
Product ID: Product_002, R2: -0.7315736111672213
Product ID: Product_003, R2: -0.8482879099385898


In [54]:
for product_id, group in PID_grouped_data:
    # Sort data by order date
    group.sort_index(inplace=True)

    #print(f'{group}')
    
    # Split data into train and test sets (80% train, 20% test)
    train_size = int(0.8 * len(group))
    train_data, test_data = group.iloc[:train_size], group.iloc[train_size:]

    #print(f'{train_data.index}')
    
    # Create SARIMAX model
    model = SARIMAX(train_data['Total_Order_Qty'], order=(1, 0, 1), seasonal_order=(1, 1, 1, 12))
    model_fit = model.fit(disp=False)
    
    # Forecast the next period
    test_model = model_fit.get_forecast(steps=len(test_data))
    test_df = test_model.predicted_mean

    #print(f'test_df:{test_df.index}')
    
    # Calculate RMSE for order quantity
    #rmse_order_qty = np.sqrt(mean_squared_error(test_data['ORDER_QTY'], forecast_order_qty))
    
    # Evaluate the model
    rse = np.sqrt(((test_df - test_data) ** 2).sum().sum() / (test_data.shape[0] * test_data.shape[1] - len(model_fit.params)))
    rmse = np.sqrt(mean_squared_error(test_data['Total_Order_Qty'], test_df))
    r2 = r2_score(test_data['Total_Order_Qty'], test_df)

    print(f'{product_id:},{r2}')
    date_list = test_df.index.to_list()
    

    
    #print(f"Order_date: {date_list}")    
    new_row = {
        'PRODUCT_ID': product_id,
        'ORDER_MONTH': date_list,
        'test_forecast_orders': test_df,
        'rse': rse,
        'rmse': rmse,
        'r2_score': r2
    }
    
    df_new = pd.DataFrame(new_row)

    df_new.set_index('ORDER_MONTH')
    
    test_results=pd.concat([test_results,df_new],ignore_index=True)
    # Forecast future values
    forecast = model_fit.get_forecast(steps=25)#, steps=n_forecast)
    forecast_df = forecast.predicted_mean
    #print(f'{forecast_df.index}')
    date_list2= forecast_df.index.to_list()
    
    new_row2 = {
        'PRODUCT_ID': product_id,
        'ORDER_MONTH': date_list2,
        'forecast_order_qty': forecast_df,
    }
    
    df_new2 = pd.DataFrame(new_row2)
    
    forecast_results=pd.concat([forecast_results,df_new2],ignore_index=True)

Product_001,-0.28906328106307533
Product_002,-0.7183426501535135
Product_003,-0.7657530975914217
Product_004,-0.31802143727160037
Product_007,-0.050741196669161104
Product_010,0.1389804070098586
Product_015,0.10875625299168268
Product_016,-0.36075808932955344
Product_019,-0.26103506174280455
Product_023,-1.0224324052521938
Product_047,-1.2547198683775806
Product_057,-1.1571779731610903
Product_059,-1.148909562909325
Product_060,-1.670628667825059
Product_061,-1.5422815968866956
Product_063,-0.6389099215529042
Product_065,-0.6507640375658055
Product_066,-0.6516076513473115
Product_067,-0.21990079893419234
Product_069,-0.7993062052001618
Product_070,-0.7537980958415675
Product_071,-1.1354808867503898
Product_072,-0.40976389892031206
Product_074,-0.8611845083164098
Product_075,-0.779220848194504
Product_076,-0.3794084133645339
Product_077,-2.313618565115233
Product_078,-0.76501309380358
Product_079,-0.04977086528429475
Product_082,-0.02754418877481446
Product_083,-0.6013158638112692
Produ

In [33]:
test_results

Unnamed: 0,PRODUCT_ID,ORDER_MONTH,test_forecast_orders,rse,rmse,r2_score
0,Product_001,2023-07-01,118823.028009,0.0,15697.565379,-0.055090
1,Product_001,2023-08-01,101123.262760,0.0,15697.565379,-0.055090
2,Product_001,2023-09-01,105282.149996,0.0,15697.565379,-0.055090
3,Product_001,2023-10-01,117745.845182,0.0,15697.565379,-0.055090
4,Product_001,2023-11-01,110611.593083,0.0,15697.565379,-0.055090
...,...,...,...,...,...,...
611,Product_100,2024-04-01,18561.642690,0.0,8128.208481,-2.288506
612,Product_100,2024-05-01,21368.799836,0.0,8128.208481,-2.288506
613,Product_100,2024-06-01,24816.242093,0.0,8128.208481,-2.288506
614,Product_100,2024-07-01,21318.819139,0.0,8128.208481,-2.288506


In [34]:
forecast_results

Unnamed: 0,PRODUCT_ID,ORDER_MONTH,forecast_order_qty
0,Product_001,2023-07-01,118823.028009
1,Product_001,2023-08-01,101123.262760
2,Product_001,2023-09-01,105282.149996
3,Product_001,2023-10-01,117745.845182
4,Product_001,2023-11-01,110611.593083
...,...,...,...
1095,Product_100,2025-03-01,26101.562368
1096,Product_100,2025-04-01,17079.529558
1097,Product_100,2025-05-01,20108.877208
1098,Product_100,2025-06-01,22627.141399


In [42]:
sf_df = my_session.createDataFrame(test_results)
sf_df.write.mode("overwrite").save_as_table("ORDER_FORECAST_TEST")
my_session.table("ORDER_FORECAST_TEST").show()

-------------------------------------------------------------------------------------------------------------------
|"PRODUCT_ID"  |"ORDER_MONTH"        |"test_forecast_orders"  |"rse"  |"rmse"              |"r2_score"            |
-------------------------------------------------------------------------------------------------------------------
|Product_001   |2023-07-01 00:00:00  |118823.0280087485       |0.0    |15697.565379119542  |-0.05508991816590081  |
|Product_001   |2023-08-01 00:00:00  |101123.26275967901      |0.0    |15697.565379119542  |-0.05508991816590081  |
|Product_001   |2023-09-01 00:00:00  |105282.14999635714      |0.0    |15697.565379119542  |-0.05508991816590081  |
|Product_001   |2023-10-01 00:00:00  |117745.8451819192       |0.0    |15697.565379119542  |-0.05508991816590081  |
|Product_001   |2023-11-01 00:00:00  |110611.59308316885      |0.0    |15697.565379119542  |-0.05508991816590081  |
|Product_001   |2023-12-01 00:00:00  |109442.18813553792      |0.0    |1

In [43]:
sf_df = my_session.createDataFrame(forecast_results)
sf_df.write.mode("overwrite").save_as_table("ORDER_FORECAST")
my_session.table("ORDER_FORECAST").show()

-------------------------------------------------------------
|"PRODUCT_ID"  |"ORDER_MONTH"        |"forecast_order_qty"  |
-------------------------------------------------------------
|Product_001   |2023-07-01 00:00:00  |118823.0280087485     |
|Product_001   |2023-08-01 00:00:00  |101123.26275967901    |
|Product_001   |2023-09-01 00:00:00  |105282.14999635714    |
|Product_001   |2023-10-01 00:00:00  |117745.8451819192     |
|Product_001   |2023-11-01 00:00:00  |110611.59308316885    |
|Product_001   |2023-12-01 00:00:00  |109442.18813553792    |
|Product_001   |2024-01-01 00:00:00  |123789.85901559627    |
|Product_001   |2024-02-01 00:00:00  |121702.67368633783    |
|Product_001   |2024-03-01 00:00:00  |126125.91939069977    |
|Product_001   |2024-04-01 00:00:00  |122617.48431518582    |
-------------------------------------------------------------

