# Agent Learning Project

**This document is specific to running the Graphical Interface**

## Imports 

We start by importing the required libaries. Uncomment & run the next cell in case some of the libraries aren't installed in your environment. 

In [1]:
# !pip install numpy matplotlib

In [2]:
import numpy as np # Useful for computations and probabilistic distributions
import tkinter as tk # Graphical Interface library 

## Defining the Agent class

We define a type of agent with the following attributes: 
- Agent name : Name of the agent
- Service probability : If at the counter, shows the probability that this agent will be serviced.
- Reward : Reward the counter gets for servicing this agent
- Time in queue : Time for which the agent has been in the queue. 

In [3]:
class Agent:
    def __init__(self, agent_name):
        self.agent_name = agent_name
        self.service_prob, self.reward, self.penalty = agents[agent_name][:3]
        self.time_in_queue = 0

    def __str__(self):
        return f"Agent {self.agent_name} ({self.time_in_queue})"

## Defining the Queue mechanism 

In [4]:
class QueueSimulation:
    def __init__(self, agents, arrival_prob, init, alpha, discount):
        # Define the agents, their distribution, and the strategy
        self.agents = agents
        self.agent_distribution = np.array([agent[3] for agent in agents.values()])
        self.agent_distribution = self.agent_distribution / np.sum(self.agent_distribution)
        
        # Define the hyper-parameters of the simulation
        self.arrival_prob, self.init, self.alpha, self.discount = arrival_prob, init, alpha, discount
        self.fixed_discount, self.discount = discount, 1 # Optimization trick to avoid recomputing power at each step
        
        # Define the Queue
        self.queue = [self.create_agent() for _ in range(init)] if type(init) == int else init
        self.current_agent = None if len(self.queue) == 0 else self.queue[0]
        
        # Variables that are used to analyze our code
        self.time, self.total_reward, self.discounted_reward = 0, 0, 0

    def create_agent(self):
        agent_name = np.random.choice(list(self.agents.keys()), p=self.agent_distribution)
        return Agent(agent_name)

    def step(self):
        """
        This function is the core of the simulation. It defines what happens at every step. The order in which 
        the computations are made is important so we have to be careful. In our case, we proceed as follows:
        
        1. Determine whether an agent is joining the queue
        2. Detemine if the agent in service is going to complete service or not and potentially add reward 
        3. Apply the strategy to determine what happens next
        4. Add penalty to the reward
        """
        self.time += 1
        self.discount *= self.fixed_discount
        
        # Add new agent to the queue based on arrival probability
        if np.random.rand() < self.arrival_prob:
            new_agent = self.create_agent()
            self.queue.append(new_agent)

        # Check if the current agent is done being served
        if self.current_agent:
            if np.random.rand() < self.current_agent.service_prob:
                self.total_reward += self.current_agent.reward
                self.discounted_reward += self.current_agent.reward*self.discount
                self.current_agent = None
                
        # Apply the strategy to select the next agent
        if self.queue:
            queue_index = -1 if self.current_agent else 0
            self.swap_agents(queue_index)
        
        length = len(self.queue)
        
        # Apply penalty on reward
        for agent in self.queue: 
            agent.time_in_queue += 1
            self.total_reward -= agent.penalty
            self.discounted_reward -= agent.penalty*self.discount
        self.total_reward -= regularization(self.alpha,length)
        self.discounted_reward -= regularization(self.alpha,length)*self.discount

    def swap_agents(self, queue_index):
        if 0 <= queue_index < len(self.queue):
            if self.current_agent:
                self.queue[queue_index], self.current_agent = (self.current_agent,self.queue[queue_index])
            else:
                self.current_agent = self.queue.pop(queue_index)


In [5]:
# Compute average service probability
def service():
    service_prob = np.array([agent[0] for agent in agents.values()])
    weights = np.array([agent[3] for agent in agents.values()])
    distribution = weights / np.sum(weights)
    x = np.dot(service_prob, distribution.T)
    return x,1/x

# Create the initial list
def create(l, shuffle=False):
    queue = []
    for name, quantity in l:
        queue += [Agent(name)] * quantity
    if shuffle:
        np.random.shuffle(queue)
    return queue

## Designing our Graphical User Interface

In [6]:
class QueueSimulationGUI:
    def __init__(self, master, simulation):
        self.master = master
        self.simulation = simulation
        self.master.title("Queue Simulation")

        # Frame for displaying the time
        self.time_frame = tk.Frame(master)
        self.time_frame.pack(pady=10)

        self.time_label = tk.Label(self.time_frame, text="Time: 0", font=("Helvetica", 14))
        self.time_label.pack()

        # Frame for displaying the queue with scrollable canvas
        self.queue_frame = tk.Frame(master)
        self.queue_frame.pack(pady=10, fill=tk.BOTH, expand=True)

        self.queue_label = tk.Label(
            self.queue_frame,
            text=f"Queue - Length: {len(self.simulation.queue)}",
            font=("Helvetica", 14),)
        self.queue_label.pack()

        self.queue_canvas = tk.Canvas(self.queue_frame, width=600, height=150, bg="white")
        self.queue_canvas.pack(side=tk.TOP, fill=tk.BOTH, expand=True) 

        self.scrollbar = tk.Scrollbar(self.queue_frame, orient=tk.HORIZONTAL, command=self.queue_canvas.xview)
        self.scrollbar.pack(side=tk.TOP, fill=tk.X)  # Change from BOTTOM to TOP
        
        self.queue_canvas.config(xscrollcommand=self.scrollbar.set)

        self.inner_queue_frame = tk.Frame(self.queue_canvas)
        self.queue_canvas.create_window((0, 0), window=self.inner_queue_frame, anchor='nw')

        # Frame for displaying the current agent at the counter
        self.counter_frame = tk.Frame(master)
        self.counter_frame.pack(pady=10)

        self.counter_label = tk.Label(self.counter_frame, text="Counter", font=("Helvetica", 14))
        self.counter_label.pack()

        self.counter_canvas = tk.Canvas(self.counter_frame, width=200, height=100, bg="white")
        self.counter_canvas.pack()

        # Frame for displaying the total reward
        self.reward_frame = tk.Frame(master)
        self.reward_frame.pack(pady=10)

        self.reward_label = tk.Label(self.reward_frame, text="Total Reward: 0", font=("Helvetica", 14))
        self.reward_label.pack()

        # Frame for action buttons
        self.action_frame = tk.Frame(master)
        self.action_frame.pack(pady=10)

        self.next_step_button = tk.Button(self.action_frame, text="Next Step", command=self.next_step)
        self.next_step_button.pack(side=tk.LEFT, padx=10)

        self.reset_button = tk.Button(self.action_frame, text="Reset", command=self.reset_simulation)
        self.reset_button.pack(side=tk.LEFT, padx=10)

        # Frame for input and switch button
        self.input_frame = tk.Frame(master)
        self.input_frame.pack(pady=10)

        self.index_label = tk.Label(self.input_frame, text="Enter Queue Index:", font=("Helvetica", 14))
        self.index_label.pack(side=tk.LEFT)

        self.index_entry = tk.Entry(self.input_frame, width=5)
        self.index_entry.pack(side=tk.LEFT, padx=5)

        self.switch_button = tk.Button(self.input_frame, text="Switch", command=self.switch_agent)
        self.switch_button.pack(side=tk.LEFT, padx=10)

        self.reset_simulation()

    def next_step(self):
        self.simulation.step()
        self.update_labels()

    def switch_agent(self):
        try:
            agent_index = int(self.index_entry.get())
            self.simulation.swap_agents(agent_index)
            self.update_labels()
        except Exception:
            pass  # We could imagine adding an error message here

    def update_labels(self):
        self.time_label.config(text=f"Time: {self.simulation.time}")
        self.reward_label.config(text=f"Total & Discounted Reward: {self.simulation.total_reward} & {self.simulation.discounted_reward}")
        self.queue_label.config(text=f"Queue - Length: {len(self.simulation.queue)}",)

        # Update queue visualization
        self.queue_canvas.delete("all")
        box_width = 150
        for idx, agent in enumerate(self.simulation.queue):
            self.queue_canvas.create_rectangle(10 + box_width * idx, 10, box_width + box_width * idx, 60, fill="lightblue")
            self.queue_canvas.create_text(10 + box_width * idx + box_width / 2, 35, text=str(agent))

        self.queue_canvas.config(scrollregion=self.queue_canvas.bbox("all"))

        # Update counter visualization
        self.counter_canvas.delete("all")
        if self.simulation.current_agent:
            self.counter_canvas.create_rectangle(50, 10, 150, 60, fill="lightgreen")
            self.counter_canvas.create_text(100, 35, text=str(self.simulation.current_agent.agent_name))
        else:
            self.counter_canvas.create_text(100, 35, text="Empty")

    def reset_simulation(self):
        self.simulation = QueueSimulation(
            self.simulation.agents,
            self.simulation.arrival_prob,
            self.simulation.init,
            self.simulation.alpha,
            self.simulation.discount,
        )
        self.update_labels()


## Choosing base parameters

In [7]:
# Defining Agents
agents = {
    ### Agent name : [service_probability, reward, penalty, distrib_weight]
    # service_probability: float in [0,1]
    # reward, penalty, distrib_weight: float (unconstrained)
    "Alex" : [0.7, 1, 0.1, 6], 
    "Ben" : [0.5, 20, 0.4, 3],
    "Cameron" : [0.3, 30, 0.2, 4],
    "Dennis" : [0.4, 20, 0.01, 2],
    "Eric": [0.8, 4, 0.2, 3],
    "Fabien": [0.5, 50, 0.4, 1],
}

# Expected average service probability & time
print(service())

# Probabilty that a new agent joins the queue
arrival_prob = 0.55

# Number of agents in the queue at t = 0
# We either choose a random weighted queue or a specific queue
start = 5
queue = create([("Fabien", 20), ("Alex", 10)], True) # [(name, quantity),(name, quantity),...], shuffle=True/False 
init = start # Choose a mode here

# Choose a discount factor for discounted reward computation
discount = 0.99

# Regularization parameter and function
alpha = 0.00
def regularization(alpha,length):
    return alpha*length

(0.5578947368421051, 1.7924528301886797)


## Playing the game manually

Execute the main code manually and play the game by making your own decisions through the graphical user interface.

In [8]:
simulation = QueueSimulation(agents, arrival_prob, init, alpha, discount)
root = tk.Tk()
gui = QueueSimulationGUI(root, simulation)
root.mainloop()