In [1]:
import numpy as np

In [21]:
class QLearning:

    def __init__(self, num_servers, action_space, learning_rate=0.1, discount_factor=0.9, exploration_proba=0.1):
        self.num_servers = num_servers
        self.action_space = action_space
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_proba = exploration_proba

        # Initialize Q-table with zeros
        self.q_table = np.zeros((num_servers + 1, len(action_space)))


    def choose_action(self, state):
        if np.random.uniform(0, 1) < self.exploration_proba:
            # Explore: randomly choose an action
            return np.random.choice(self.action_space)
        else:
            # Exploit: choose the action with the highest Q-value
            return np.argmax(self.q_table[state, :])
        
    def update_q_table(self, state, action, reward, next_state):
        best_next_action = np.max(self.q_table[next_state, :])
        self.q_table[state, action] = (1 - self.learning_rate) * self.q_table[state, action] + \
                                      self.learning_rate * (reward + self.discount_factor * best_next_action)


In [24]:
def simulate_q_learning(num_episodes):
    num_servers = 15
    action_space = list(range(5, 16))
    ql_agent = QLearning(num_servers, action_space)

    rewards_per_episode = []

    for episode in range(num_episodes):
        # Reset the environment at the beginning of each episode
        queue_length = np.random.poisson(10)
        total_reward = 0

        # Ensure initial queue length is within the desired range [0, 10]
        queue_length = min(max(queue_length, 0), 10)

        print(f"Initial queue length: {queue_length}")

        for _ in range(queue_length):
            # Choose action based on Q-learning policy
            action = ql_agent.choose_action(queue_length)

            # Simulate server processing time
            server_rate = action
            processing_time = np.random.exponential(1 / server_rate)

            # Update queue length based on server processing
            queue_length = min(max(queue_length - 1, 0), 10)

            # Calculate reward
            reward = 100 if queue_length == 5 else 0
            total_reward += reward

            # Update Q-table
            ql_agent.update_q_table(queue_length, action - 5, reward, queue_length)

            print(f"Queue length after processing: {queue_length}")

        rewards_per_episode.append(total_reward)

    return rewards_per_episode


In [25]:
# Example usage
num_episodes = 1000
rewards_history = simulate_q_learning(num_episodes)
average_reward = sum(rewards_history) / num_episodes
print(f"Average Reward over {num_episodes} episodes: {average_reward}")

Initial queue length: 9
Queue length after processing: 8
Queue length after processing: 7
Queue length after processing: 6
Queue length after processing: 5
Queue length after processing: 4
Queue length after processing: 3
Queue length after processing: 2
Queue length after processing: 1
Queue length after processing: 0
Initial queue length: 7
Queue length after processing: 6
Queue length after processing: 5
Queue length after processing: 4
Queue length after processing: 3
Queue length after processing: 2
Queue length after processing: 1
Queue length after processing: 0
Initial queue length: 10
Queue length after processing: 9
Queue length after processing: 8
Queue length after processing: 7
Queue length after processing: 6
Queue length after processing: 5
Queue length after processing: 4
Queue length after processing: 3
Queue length after processing: 2
Queue length after processing: 1
Queue length after processing: 0
Initial queue length: 10
Queue length after processing: 9
Queue lengt

  processing_time = np.random.exponential(1 / server_rate)



Queue length after processing: 3
Queue length after processing: 2
Queue length after processing: 1
Queue length after processing: 0
Initial queue length: 9
Queue length after processing: 8
Queue length after processing: 7
Queue length after processing: 6
Queue length after processing: 5
Queue length after processing: 4
Queue length after processing: 3
Queue length after processing: 2
Queue length after processing: 1
Queue length after processing: 0
Initial queue length: 10
Queue length after processing: 9
Queue length after processing: 8
Queue length after processing: 7
Queue length after processing: 6
Queue length after processing: 5
Queue length after processing: 4
Queue length after processing: 3
Queue length after processing: 2
Queue length after processing: 1
Queue length after processing: 0
Initial queue length: 10
Queue length after processing: 9
Queue length after processing: 8
Queue length after processing: 7
Queue length after processing: 6
Queue length after processing: 5
Q

Queue length after processing: 2
Queue length after processing: 1
Queue length after processing: 0
Initial queue length: 9
Queue length after processing: 8
Queue length after processing: 7
Queue length after processing: 6
Queue length after processing: 5
Queue length after processing: 4
Queue length after processing: 3
Queue length after processing: 2
Queue length after processing: 1
Queue length after processing: 0
Initial queue length: 8
Queue length after processing: 7
Queue length after processing: 6
Queue length after processing: 5
Queue length after processing: 4
Queue length after processing: 3
Queue length after processing: 2
Queue length after processing: 1
Queue length after processing: 0
Initial queue length: 10
Queue length after processing: 9
Queue length after processing: 8
Queue length after processing: 7
Queue length after processing: 6
Queue length after processing: 5
Queue length after processing: 4
Queue length after processing: 3
Queue length after processing: 2
Que

Queue length after processing: 0
Initial queue length: 4
Queue length after processing: 3
Queue length after processing: 2
Queue length after processing: 1
Queue length after processing: 0
Initial queue length: 8
Queue length after processing: 7
Queue length after processing: 6
Queue length after processing: 5
Queue length after processing: 4
Queue length after processing: 3
Queue length after processing: 2
Queue length after processing: 1
Queue length after processing: 0
Initial queue length: 9
Queue length after processing: 8
Queue length after processing: 7
Queue length after processing: 6
Queue length after processing: 5
Queue length after processing: 4
Queue length after processing: 3
Queue length after processing: 2
Queue length after processing: 1
Queue length after processing: 0
Initial queue length: 10
Queue length after processing: 9
Queue length after processing: 8
Queue length after processing: 7
Queue length after processing: 6
Queue length after processing: 5
Queue length

Queue length after processing: 5
Queue length after processing: 4
Queue length after processing: 3
Queue length after processing: 2
Queue length after processing: 1
Queue length after processing: 0
Initial queue length: 10
Queue length after processing: 9
Queue length after processing: 8
Queue length after processing: 7
Queue length after processing: 6
Queue length after processing: 5
Queue length after processing: 4
Queue length after processing: 3
Queue length after processing: 2
Queue length after processing: 1
Queue length after processing: 0
Initial queue length: 7
Queue length after processing: 6
Queue length after processing: 5
Queue length after processing: 4
Queue length after processing: 3
Queue length after processing: 2
Queue length after processing: 1
Queue length after processing: 0
Initial queue length: 10
Queue length after processing: 9
Queue length after processing: 8
Queue length after processing: 7
Queue length after processing: 6
Queue length after processing: 5
Qu