# Reinforcement Learning for Dynamic Pricing
## Incorporating Price Elasticity of Demand (PED)

In [2]:
# Import required libraries
import numpy as np
import pandas as pd
import random
from sklearn.preprocessing import StandardScaler, LabelEncoder
import gym
from gym import spaces
import matplotlib.pyplot as plt


## Load and Preprocess Data

In [None]:
# Load dataset
df = pd.read_csv("Grocery_Sales_Final.csv")

# Encode categorical variables
label_encoders = {}
for col in ['Outlet_Type', 'Item_Fat_Content', 'Item_Type']:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Standardize numerical variables
scaler = StandardScaler()
df[['Item_MRP', 'Item_Visibility']] = scaler.fit_transform(df[['Item_MRP', 'Item_Visibility']])

# Compute Price Elasticity of Demand (PED)
df['PED'] = (df['Item_Outlet_Sales'].pct_change() / df['Item_MRP'].pct_change()).fillna(0)

# Fill missing PED values with median (to handle NaNs from pct_change)
df['PED'].fillna(df['PED'].median(), inplace=True)

## Define Custom Gym Environment for RL

In [None]:
class PricingEnv(gym.Env):
    def __init__(self, df):
        super(PricingEnv, self).__init__()
        self.df = df.copy()
        self.current_index = 0
        
        # Define action and observation space
        self.action_space = spaces.Discrete(3)  # [Decrease Price, Keep Price, Increase Price]
        self.observation_space = spaces.Box(
            low=np.array([-3, -1, 0, 0, -3]),  # Lower bound for state variables
            high=np.array([3, 1, 1, 1, 3]),    # Upper bound for state variables
            dtype=np.float32
        )
    
    def reset(self):
        self.current_index = random.randint(0, len(self.df) - 1)
        return self._get_state()
    
    def _get_state(self):
        row = self.df.iloc[self.current_index]
        return np.array([
            row['Item_MRP'],
            row['Item_Visibility'],
            row['Outlet_Type'],
            row['Item_Fat_Content'],
            row['PED']
        ], dtype=np.float32)
    
    def step(self, action):
        row = self.df.iloc[self.current_index]
        
        # Define action space dynamically based on PED
        ped_value = row['PED']
        if ped_value < -1:
            price_change = [-0.05, 0, 0.02][action]  # More decrease, less increase
        else:
            price_change = [-0.02, 0, 0.05][action]  # Less decrease, more increase
        
        new_price = row['Item_MRP'] * (1 + price_change)
        new_sales = row['Item_Outlet_Sales'] * (1 + ped_value * price_change)
        revenue = new_price * new_sales
        
        # Reward = Revenue Change
        reward = revenue - (row['Item_MRP'] * row['Item_Outlet_Sales'])
        
        # Move to next product
        self.current_index = (self.current_index + 1) % len(self.df)
        
        return self._get_state(), reward, False, {}


## Train the RL Model Using Q-Learning

In [None]:
env = PricingEnv(df)
q_table = np.zeros((len(df), 3))  # Q-table for actions

learning_rate = 0.1
discount_factor = 0.9
epsilon = 0.1  # Exploration factor

# Training loop
for episode in range(1000):
    state = env.reset()
    done = False
    while not done:
        if np.random.rand() < epsilon:
            action = env.action_space.sample()
        else:
            action = np.argmax(q_table[env.current_index])
        
        new_state, reward, done, _ = env.step(action)
        q_table[env.current_index, action] = q_table[env.current_index, action] + learning_rate * (
            reward + discount_factor * np.max(q_table[env.current_index]) - q_table[env.current_index, action]
        )

print("RL Training Completed!")