In [2]:
import gym
import numpy as np
import pandas as pd
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from sklearn.metrics import mean_absolute_error, mean_squared_error


In [3]:
data = pd.read_csv('woolballhistory.csv')

In [4]:
data['Report Date'] = pd.to_datetime(data['Report Date'])
data = data.sort_values('Report Date')

data['Product Price'] = data['Product Price'].fillna(data['Product Price'].mean())
data['Total Sales'] = data['Total Sales'].fillna(data['Total Sales'].median())
data['Predicted Sales'] = data['Predicted Sales'].fillna(data['Predicted Sales'].median())
data['Organic Conversion Percentage'] = data['Organic Conversion Percentage'].fillna(data['Organic Conversion Percentage'].mean())
data['Ad Conversion Percentage'] = data['Ad Conversion Percentage'].fillna(data['Ad Conversion Percentage'].mean())
data['Total Profit'] = data['Total Profit'].fillna(data['Total Profit'].median())

data

Unnamed: 0,Report Date,Product Price,Organic Conversion Percentage,Ad Conversion Percentage,Total Profit,Total Sales,Predicted Sales
111,2024-08-27,25.566,17.670659,0.0,0.0,26.5,121.147960
110,2024-08-28,25.566,17.670659,0.0,0.0,26.5,121.147960
109,2024-08-29,25.566,17.670659,0.0,0.0,26.5,121.147960
108,2024-08-30,25.566,17.670659,0.0,0.0,26.5,121.147960
107,2024-08-31,25.566,17.670659,0.0,0.0,26.5,121.147960
...,...,...,...,...,...,...,...
4,2025-01-18,25.566,0.000000,0.0,0.0,0.0,195.190547
3,2025-01-19,25.566,0.000000,0.0,0.0,0.0,339.065959
2,2025-01-20,25.566,0.000000,0.0,0.0,0.0,398.965492
1,2025-01-21,25.566,0.000000,0.0,0.0,0.0,213.332405


In [5]:
class PricingEnv(gym.Env):
    def __init__(self, data):
        super(PricingEnv, self).__init__()
        self.data = data
        self.index = 0
        self.median_price = self.data['Product Price'].median()
        self.action_space = gym.spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float32)  # Adjust price
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=(5,), dtype=np.float32)  # State features

    def reset(self):
        self.index = 0
        self.state = self._get_state(self.index)
        return self.state

    def step(self, action):
        price_change = action[0] * 5  # Scale price change
        new_price = self.data.loc[self.index, 'Product Price'] + price_change

        # Clip new price to avoid extreme values
        new_price = max(0, new_price)
        self.data.loc[self.index, 'Product Price'] = new_price

        # Calculate rewards based on historical data
        sales = self.data.loc[self.index, 'Total Sales']
        predicted_sales = self.data.loc[self.index, 'Predicted Sales']
        organic_conversion = self.data.loc[self.index, 'Organic Conversion Percentage']
        ad_conversion = self.data.loc[self.index, 'Ad Conversion Percentage']

        # Reward calculation
        reward = 0
        if new_price > self.median_price:
            reward += 0.1 * (new_price - self.median_price)  # Reward for pushing price above median
        reward += (sales / max(predicted_sales, 1)) + organic_conversion + ad_conversion
        
        if sales < predicted_sales:
            reward -= 1  # Penalize if sales drop below predicted sales

        self.index += 1
        done = self.index >= len(self.data) - 1

        self.state = self._get_state(self.index) if not done else np.zeros(self.observation_space.shape)
        return self.state, reward, done, {}

    def _get_state(self, index):
        # Normalize features for state representation
        price = self.data.loc[index, 'Product Price'] / self.data['Product Price'].max()
        organic_conversion = self.data.loc[index, 'Organic Conversion Percentage'] / 100
        ad_conversion = self.data.loc[index, 'Ad Conversion Percentage'] / 100
        total_sales = self.data.loc[index, 'Total Sales'] / self.data['Total Sales'].max()
        total_profit = self.data.loc[index, 'Total Profit'] / self.data['Total Profit'].max()
        return np.array([price, organic_conversion, ad_conversion, total_sales, total_profit])

    def render(self, mode='human'):
        pass

In [6]:
env = DummyVecEnv([lambda: PricingEnv(data)])



In [7]:
model = PPO('MlpPolicy', env, verbose=1)
model.learn(total_timesteps=10000)

Using cpu device
-----------------------------
| time/              |      |
|    fps             | 537  |
|    iterations      | 1    |
|    time_elapsed    | 3    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 380          |
|    iterations           | 2            |
|    time_elapsed         | 10           |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 3.551159e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.42        |
|    explained_variance   | 0.000261     |
|    learning_rate        | 0.0003       |
|    loss                 | 1.15e+05     |
|    n_updates            | 10           |
|    policy_gradient_loss | 1.03e-05     |
|    std                  | 1            |
|    value_loss           | 2.07e+05     |

<stable_baselines3.ppo.ppo.PPO at 0x2a2d7e9ffd0>

In [8]:
state = env.reset()
done = False
predicted_prices = []
true_prices = []

In [9]:
while not done:
    action, _ = model.predict(state)
    state, _, done, _ = env.step(action)

    # Get the predicted price
    predicted_price = state[0][0] * data['Product Price'].max()  # Rescale price to original
    predicted_prices.append(predicted_price)
    
    
    # Append the true price at the same index
    if env.envs[0].index < len(data):
        true_price = data.loc[env.envs[0].index, 'Product Price']
        true_prices.append(true_price)
    else:
        true_prices.append(0)  # Handle edge case
print(f"State[0][0]: {state[0][0]}, Predicted Price: {predicted_price}")
# Print the lists for debugging
print("Predicted Prices:", predicted_prices)
print("True Prices:", true_prices)

State[0][0]: 0.32094916701316833, Predicted Price: 29.671523594716067
Predicted Prices: [9.947988552897169, 43.70303006306306, 13.67107463044813, 49.728445882264474, 35.39377828581306, 10.22093346708804, 0.0, 34.629966544397334, 88.26204946719483, 34.21145171387658, 34.6867518931111, 26.36541148137291, 5.867553298374049, 49.94995736014935, 10.4091717349652, 20.082442354791304, 14.535607158056031, 63.348259162949255, 21.773357664624214, 4.321385711158883, 41.74826993191239, 4.748803895205467, 37.03481908776092, 35.80959931728768, 27.173267494667993, 33.96589970023146, 65.45141630640113, 29.335077413121578, 56.730050633965696, 48.42763571600272, 36.484194396323666, 11.560826190757258, 8.216847247442766, 17.228603806295375, 56.761972423291596, 7.088503880813416, 8.96835452477669, 6.360028728274063, 34.643814799648744, 27.495857757177383, 3.155319473948178, 28.771745722515405, 25.21109687647036, 45.489018174027365, 35.89249788428558, 48.19206580513058, 25.287176314469335, 47.12799514011933

In [10]:
while not done:
    action, _ = model.predict(state)
    state, _, done, _ = env.step(action)
    predicted_price = state[0][0] * data['Product Price'].max()  # Rescale price to original
    predicted_prices.append(predicted_price)
    true_prices.append(data.loc[env.envs[0].index, 'Product Price'])
    print(f"State[0][0]: {state[0][0]}, Predicted Price: {predicted_price}")

ideal_price = predicted_prices[-1]
print(f"Ideal price for tomorrow: ${ideal_price:.2f}")

Ideal price for tomorrow: $29.67


In [11]:
mae = mean_absolute_error(true_prices, predicted_prices)
mse = mean_squared_error(true_prices, predicted_prices)
rmse = np.sqrt(mse)

print("Performance Metrics:")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")

Performance Metrics:
Mean Absolute Error (MAE): 0.00
Mean Squared Error (MSE): 0.00
Root Mean Squared Error (RMSE): 0.00
