In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import gymnasium as gym
from stable_baselines3 import PPO   # Imports the PPO algorithm
from stable_baselines3.common.vec_env import DummyVecEnv
import joblib   

# load test set
path = '/data/x_test.csv'
df = pd.read_csv(path)
print(df.shape)

# Load the trained XGBoost model
path = '/models/xgboost_model.pkl'
xgb_model = joblib.load(path)

(1157, 42)


In [29]:
y=df['Next_Purchase_Volume']
x=df.drop(columns=['Next_Purchase_Volume'], axis=1)

print(x.shape, y.shape)
y_pred = xgb_model.predict(x)

(1157, 41) (1157,)


In [30]:
## Evaluate performance
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error, root_mean_squared_error
# Compute RMSE and R² on training set
train_rmse = root_mean_squared_error(y, y_pred)
train_mae = mean_absolute_error(y, y_pred)
train_r2 = r2_score(y, y_pred)


print(f"RMSE: {train_rmse:.4f}")
print(f"MAE: {train_mae:.4f}")
print(f"R² Score: {train_r2:.4f}")

RMSE: 21.2712
MAE: 13.2370
R² Score: 0.4243


In [31]:
# Prepare data

# Prepare y and y_pred for merge
y = y.reset_index(drop=True)
y_pred_series = pd.Series(y_pred, name='Predicted_Next_Purchase_Volume')

# Combine into one DataFrame
merged_df = pd.concat([x.reset_index(drop=True), y, y_pred_series], axis=1)
# merged_df.rename(columns={'Next_Purchase_Volume': 'y_true'}, inplace=True)

merged_df["DATE"] = pd.to_datetime(merged_df["DATE"], unit='s')

# Compute squared error and RMSE per row
merged_df['Model_RMSE'] = np.sqrt((merged_df['Next_Purchase_Volume'] - merged_df['Predicted_Next_Purchase_Volume']) ** 2)
print(merged_df.shape)
merged_df.head()

(1157, 44)


Unnamed: 0,DATE,PRODUCT_ID,PRODUCT_BRAND,PACK_SIZE,UNIT_PRICE,SALES_QTY,SALES_VALUE,Year,Month,Day,...,Product_Sale_Count,Days_Since_Last_Sale,Avg_Days_Between_Sales,Month_sin,Month_cos,Dayofweek_sin,Dayofweek_cos,Next_Purchase_Volume,Predicted_Next_Purchase_Volume,Model_RMSE
0,2024-03-24,25,6,11,3.5,4,16,2024,3,24,...,71,1.0,1.0,1.0,6.123234000000001e-17,-0.781831,0.62349,4.0,10.465887,6.465887
1,2024-11-21,17,4,5,4.5,12,55,2024,11,21,...,64,1.0,1.4,-0.5,0.8660254,0.433884,-0.900969,9.0,15.824268,6.824268
2,2024-09-20,18,4,6,2.0,48,101,2024,9,20,...,162,1.0,1.0,-1.0,-1.83697e-16,-0.433884,-0.900969,4.0,27.180092,23.180092
3,2024-12-30,23,6,6,2.0,2,5,2024,12,30,...,306,1.0,1.0,-2.449294e-16,1.0,0.0,1.0,2.0,9.247106,7.247106
4,2024-03-28,1,0,4,5.5,25,130,2024,3,28,...,62,1.0,1.2,1.0,6.123234000000001e-17,0.433884,-0.900969,37.0,45.32552,8.32552


In [32]:
# Load the PPO Model 
from stable_baselines3 import PPO

path = '/Users/cypacyt/Documents/IAIAProject/models/ppo_model'
ppo_model = PPO.load(path)


In [33]:
# Prepare the Data
df_rl = merged_df[[
    'Predicted_Next_Purchase_Volume', 
    'lag_qty_1', 'lag_qty_2', 'lag_qty_3', 
    'Next_Purchase_Volume'
]].copy()

df_rl.columns = ['xgb_pred', 'lag_1', 'lag_2', 'lag_3', 'y_true']

In [34]:

class ForecastCorrectionEnv(gym.Env):
    def __init__(self, data):
        super().__init__()
        self.data = data.reset_index(drop=True)
        self.current_step = 0
        self.total_steps = len(data)
        self.observation_space = gym.spaces.Box(low=-1, high=1, shape=(4,), dtype=float)
        # self.action_space = gym.spaces.Box(low=-1, high=1, shape=(1,), dtype=float)
        self.action_space = gym.spaces.Box(low=-10, high=10, shape=(1,), dtype=np.float32)
        
    def _get_obs(self):
        row = self.data.iloc[self.current_step]
        return np.array([
            row['xgb_pred'],
            row['lag_1'],
            row['lag_2'],
            row['lag_3']
        ], dtype=np.float32)

    def reset(self, **kwargs):
        self.current_step = 0
        return self._get_obs(), {}

    # def step(self, action):
    #     correction = action[0]
    #     xgb_pred = self.data.iloc[self.current_step]['xgb_pred']
    #     y_true = self.data.iloc[self.current_step]['y_true']
    #     corrected = xgb_pred + correction
    #     reward = -((corrected - y_true) ** 2)  # Square error = penalizes large errors more
    #     # reward = -abs(corrected - y_true)

    #     self.current_step += 1
    #     done = self.current_step >= self.total_steps

    #     if not done:
    #         obs = self._get_obs()
    #     else:
    #         obs = np.zeros(5, dtype=np.float32)

    #     return obs, reward, done, False, {}
    def step(self, action):
        correction = action[0]
        row = self.data.iloc[self.current_step]
        xgb_pred = row['xgb_pred']
        y_true = row['y_true']
        
        corrected = xgb_pred + correction
        base_error = abs(xgb_pred - y_true)
        corrected_error = abs(corrected - y_true)

        # Reward = improvement over XGBoost prediction
        reward = base_error - corrected_error

        # Optional: scale reward to keep in range
        reward = np.clip(reward, -10, 10)

        self.current_step += 1
        done = self.current_step >= self.total_steps
        obs = self._get_obs() if not done else np.zeros(4, dtype=np.float32)

        return obs, reward, done, False, {}
    
env = ForecastCorrectionEnv(df_rl)
# vec_env = DummyVecEnv([lambda: env])

In [35]:
# 4. Use PPO model to predict corrections
observations = []
corrections = []
corrected_preds = []

obs, _ = env.reset()
for _ in range(len(df_rl)):
    action, _ = ppo_model.predict(obs, deterministic=True)
    observations.append(obs)
    corrections.append(action[0])
    xgb_pred = obs[0]
    corrected_preds.append(xgb_pred + action[0])
    obs, _, done, _, _ = env.step(action)
    if done:
        break

# 5. Save or analyze results
df_rl['correction'] = corrections
df_rl['corrected_pred'] = corrected_preds

In [36]:
from sklearn.metrics import mean_squared_error, r2_score, root_mean_squared_error, mean_absolute_error

y_true = df_rl["y_true"].values
y_pred = df_rl["corrected_pred"].values

rmse = root_mean_squared_error(y_true, y_pred)
mae = mean_absolute_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)

print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"R² Score: {r2:.4f}")

RMSE: 21.2096
MAE: 12.9014
R² Score: 0.4276
