In [None]:
import tensorflow as tf
from tensorflow.keras import layers
from collections import deque
import random
import numpy as np
import pandas as pd

# Hyperparameters for Deep Q-Learning
discount_factor = 0.99
epsilon = 1.0
epsilon_min = 0.01
epsilon_decay = 0.995
learning_rate = 0.001
batch_size = 64
memory_size = 2000
num_episodes = 10

state_size = 5  
action_size_bau = 3  
action_size_event = 5 

def build_model(state_size, action_size):
    model = tf.keras.Sequential()
    model.add(layers.Input(shape=(state_size,)))
    model.add(layers.Dense(24, activation='relu'))
    model.add(layers.Dense(24, activation='relu'))
    model.add(layers.Dense(action_size, activation='linear'))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), loss='mse')
    return model

class DQNAgent:
    def __init__(self, state_size, action_size_bau, action_size_event):
        self.state_size = state_size
        self.action_size_bau = action_size_bau
        self.action_size_event = action_size_event
        self.memory = deque(maxlen=memory_size)
        self.epsilon = epsilon
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay

        self.model_bau = build_model(state_size, action_size_bau)
        self.model_event = build_model(state_size, action_size_event)

    def remember(self, state, action, reward, next_state, done, event_type):
        self.memory.append((state, action, reward, next_state, done, event_type))

    
    def act(self, state, event_type):
        if np.random.rand() <= self.epsilon:
            if event_type == 'BAU':
                return random.randrange(self.action_size_bau)  
            else:
                return random.randrange(self.action_size_event)  
        else:
            if event_type == 'BAU':
                q_values = self.model_bau.predict(state)
                return np.argmax(q_values[0])  
            else:
                q_values = self.model_event.predict(state)
                return np.argmax(q_values[0])  

    
    def replay(self, batch_size):
        if len(self.memory) < batch_size:
            return
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done, event_type in minibatch:
            target = reward
            if not done:
                if event_type == 'BAU':
                    target = reward + discount_factor * np.amax(self.model_bau.predict(next_state)[0])
                    target_f = self.model_bau.predict(state)
                    target_f[0][action] = target
                    self.model_bau.fit(state, target_f, epochs=1, verbose=0)
                else:
                    target = reward + discount_factor * np.amax(self.model_event.predict(next_state)[0])
                    target_f = self.model_event.predict(state)
                    target_f[0][action] = target
                    self.model_event.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay


agent = DQNAgent(state_size, action_size_bau, action_size_event)


df = pd.read_csv(r"/content/synthetic_sales_data.csv")


def run_episode(df):
    total_reward = 0
    for i in range(len(df)):
        
        state = np.array([df['Sales'][i], df['Price'][i], df['Discount_Rate'][i], 100, 0]).reshape(1, -1)
        event_type = df['Event_Type'][i]

        action = agent.act(state, event_type)

        next_state = np.array([df['Sales'][i], df['Price'][i], df['Discount_Rate'][i], 100, 0]).reshape(1, -1)
        reward = df['Revenue'][i]
        done = i == len(df) - 1

        agent.remember(state, action, reward, next_state, done, event_type)
        total_reward += reward

        if done:
            print(f"Episode finished with total reward: {total_reward}")
            break
    return total_reward


for e in range(num_episodes):
    total_reward = run_episode(df)
    print(f"Episode {e+1}/{num_episodes}, Total Reward: {total_reward}")
    agent.replay(batch_size)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step