In [1]:
import gymnasium as gym
import numpy as np
from gymnasium import spaces
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
from pyworkforce.queuing import ErlangC
import torch
import math

## Scheduling environement VS Random Uniform actions

In [6]:
def genArrivals():

        # Define arrival rate (e.g., 10 arrivals per hour)
        lambda_per_hour = 10

        # Define number of intervals in the day (48 intervals for 30-minute spacing over 24 hours)
        num_intervals = 48

        # Generate arrivals for each interval
        arrivals = np.random.poisson(lambda_per_hour / 2, num_intervals)

        # Generate time intervals
        time_intervals = pd.date_range(start="00:00:00", periods=num_intervals, freq="30T").time

        df=pd.DataFrame({'time':time_intervals,'arrivals':arrivals})

        df.to_csv("../data/arrivals.csv")

def get_agents_schedule(shifts, shift_duration):
    # Convert shift duration to equivalent number of 30-min slots
    slots_for_shift = 2 * shift_duration

    # Initialize a dictionary
    agents_per_30mins_dict = {}

    # Function to convert index to HH:MM time format
    def index_to_time(index):
        hh = index // 2
        mm = (index % 2) * 30
        return f"{hh:02d}:{mm:02d}"

    # Function to convert HH:MM time to an index
    def time_to_index(time_str):
        hh, mm = map(int, time_str.split(":"))
        return 2 * hh + mm // 30

    # Ensure all time slots are present in the dict
    for i in range(48):
        time_key = index_to_time(i)
        agents_per_30mins_dict[time_key] = 0

    for count, time_str in shifts:
        start_index = time_to_index(time_str)
        for i in range(slots_for_shift):
            if start_index + i < 48:
                time_key = index_to_time(start_index + i)
                agents_per_30mins_dict[time_key] += count

    # Sort the dictionary by keys
    sorted_agents_dict = dict(sorted(agents_per_30mins_dict.items()))

    return sorted_agents_dict

In [None]:

def calculate_reward(action,state,arrivals):
    """
    Calculate a service level reward. 

    Parameters:
    - action (element): action (number of agents) for a step.
    - arrivals (list): List of of arrivals (calls volume for every 30 minutes).

    Returns:
    - The reward score.
    """
    arrival=list(arrivals.arrivals.values)[state]
    
    # Service Level: The expected percentage of transactions that don’t wait in the queue longer than the target ASA
    # Average speed of answer (ASA): Average time that a transaction waits in the queue to be attended by a resource
    #Average handle time (AHT): Average time that takes to a single resource to attend a transaction
    # The requirement is fixed by the call center for service level is 80%
    # the reward score is arbitratry +30 and -30

    if arrival>0 and action>0:
            erlang = ErlangC(transactions=arrival, asa=20/60, aht=2, interval=30, shrinkage=0)
            sla=erlang.service_level(positions=action)
    
 
    if arrival==0 and action>0:
        sla=1
        
    if arrival>=0 and action==0:
        sla=0
        
    
    if sla<0.80:
        reward=-30
    else:
        reward=30
        
    # the reward will contains the cost of agents
    reward+=action * -10
    
            
            
    return reward
    

In [None]:
genArrivals()

In [None]:
arrivals=pd.read_csv("../data/arrivals.csv")
arrivals.head()


In [None]:
plt.plot(arrivals.arrivals)

In [None]:
calculate_reward(1,4,arrivals)

In [None]:
class scheduling(gym.Env):
    """ Scheduling Environment """

    

    def time_slots(self):
       # Define the start and end times for the day
        start_time = datetime.strptime("00:00", "%H:%M")
        end_time = datetime.strptime("23:59", "%H:%M")

        # Initialize a dictionary to store the index and time slot pairs
        time_slots_dict = {}

        # Generate time slots for every 30 minutes and store them with an index
        current_time = start_time
        index = 1
        while current_time <= end_time:
            time_slots_dict[index] = current_time.strftime("%H:%M")
            current_time += timedelta(minutes=30)
            index += 1

        # Example usage:
        # Print all index-time slot pairs
        # for index, time_slot in time_slots_dict.items():
        #     print(f"Index {index}: {time_slot}")

        return time_slots_dict


    def __init__(self,n_agents=11,n_services=1):
        super().__init__()

        # number of start time
        self.n_slots=48
        # number of agents
        self.n_agents=n_agents

        # time slots
        self.ts=self.time_slots()
        # number of services
        self.n_services=n_services

        # observation space
        self.observation_space = spaces.Discrete(self.n_slots)
        self.observation_space=0
       
        # Action space 
        self.action_space = spaces.Tuple((
                    spaces.Discrete(self.n_agents),  # number of agents 
                ))
       
        # termination of episode condition
        self.terminated=False

        # list of action
        self.gk=[]


    def step(self, action,arrivals):
        

        if self.observation_space!=self.n_slots-1:
            reward=calculate_reward(action,self.observation_space,arrivals)
            self.gk.append(action)
            self.observation_space+=1
            
        else:
            self.terminated=True
            reward=calculate_reward(action,self.observation_space,arrivals)
            self.gk.append(action)
            self.observation_space+=1
            
            

            
        return self.observation_space, reward, self.terminated
        
        

           
    
    def reset(self, n_agents=11,seed=None, options=None):
         self.observation_space=0
         self.gk=[]
         self.n_agents=n_agents
         

         self.terminated=False
    
    def render(self, V,mode='human'):
       pass


In [None]:
s=scheduling(n_agents=10,n_services=1)



In [None]:
# looping over multiple episodes
reward_list=[]
for _ in range(1000):
    s=scheduling(n_agents=10,n_services=1)
    t=False
    
    # Loop over an episode of 4 shift 
    while not t:

        # pick random number of agent
        n_ag = random.randint(0, s.n_agents)


        print("############## The state is:",s.observation_space)
        print("number of agents dispatched:",n_ag)
       
        # step using random action 
        o,reward,t=s.step(action=n_ag,arrivals=arrivals)
        print("the reward is:", reward)
        
        reward_list.append(reward)
    print("##################################################################################################")


In [None]:
import matplotlib.pyplot as plt

# Your data

# Splitting the data into two lists: one for time and one for counts
times = list(s.time_slots().values())
counts = s.gk

# Creating the bar chart
plt.figure(figsize=(10, 5))  # Set the figure size (optional)
# plt.bar(times, counts, color='blue')
plt.xticks(rotation=90)  # Rotate the x-axis labels for better readability
plt.xlabel('Time')
plt.ylabel("Volume d'appels entrants")
plt.title('Counts per 30-minute interval over a 24-hour period')
plt.tight_layout()  # Adjust the layout to prevent clipping of labels
plt.plot(times,arrivals['arrivals'])
print("The reward at the end of the episode is: ",reward)
print("The maximum reward we could get is:",48*30)
plt.show()  # Display the chart

In [None]:
def moving_average(data, window_size=1000):
    """
    Compute the moving average of a list.

    Parameters:
    - data (list or iterable): The input data.
    - window_size (int): The size of the moving average window.

    Returns:
    - list: The moving average values.
    """
    moving_avg = []
    for i in range(len(data) - window_size + 1):
        avg = sum(data[i:i + window_size]) / window_size
        moving_avg.append(avg)
    return moving_avg


averaged_data = moving_average(reward_list)

plt.plot(averaged_data)
reward_list[-1],np.mean(reward_list),np.std(reward_list)

In [None]:
plt.hist(reward_list)

## On Policy control with SARSA 


![Sarsa.PNG](attachment:Sarsa.PNG)

In [None]:
torch.manual_seed(42)
#env init
sc=scheduling(n_agents=10,n_services=1)
# initialize the Q(s,a)
Q=torch.zeros((sc.n_slots,sc.n_agents))
#Assuming we are at state 0
Q.shape

In [None]:
### epislon greedy policy
def pi(s,Q,sc,eps=0.1):
    

    # lets generate a random number between 0 and 1
    
    explore=torch.rand(1)<eps
    print("Are we exploring ?: ",explore)

    if explore:
        return random.randint(0, sc.n_agents-1)
           

    else:
        return Q[s].argmax().item()


In [None]:
reward_list=[]
alpha=0.1
gamma=0.5
sc=scheduling(n_agents=10,n_services=1)
i_eps=1
for _ in range(1000):
    
    sc.reset(n_agents=10)
    t=False
    
    
    while not t:
        
        state=sc.observation_space
        print("############## The state is:",state)
        
        action=pi(s=state,Q=Q,eps=1/(np.log(i_eps)),sc=sc)

        # pick random number of agent
        if sc.n_agents!=0:
             n_ag = action

        else:
                 n_ag=0

        
        
        print("number of agents dispatched:",n_ag)
        # step using epsilon greedy policy 
        state_prime,reward,t=sc.step(action=n_ag,arrivals=arrivals)
        print("the reward is:", reward)
        
        # get the S' and update q-value
        action_prime=pi(s=state,Q=Q,eps=1/(np.log(i_eps)),sc=sc)
        
        if sc.observation_space<48:
            Q[state,action]+=alpha*(reward+(gamma*Q[state_prime,action_prime])- Q[state,action] )
        else:
            break
        i_eps+=1
        reward_list.append(reward)
    print("##################################### iteration:",i_eps)    
    

In [None]:
import matplotlib.pyplot as plt

# Your data

# Splitting the data into two lists: one for time and one for counts
times = list(s.time_slots().values())
counts = sc.gk

# Creating the bar chart
plt.figure(figsize=(10, 5))  # Set the figure size (optional)
plt.bar(times, list(Q.argmax(dim=1)), color='red')
plt.xticks(rotation=90)  # Rotate the x-axis labels for better readability
plt.xlabel('Time')
plt.ylabel('Count')
plt.title('Counts per 30-minute interval over a 24-hour period')
plt.tight_layout()  # Adjust the layout to prevent clipping of labels
plt.plot(arrivals['arrivals'])

plt.show()  # Display the chart

In [None]:
def moving_average(data, window_size=1000):
    """
    Compute the moving average of a list.

    Parameters:
    - data (list or iterable): The input data.
    - window_size (int): The size of the moving average window.

    Returns:
    - list: The moving average values.
    """
    moving_avg = []
    for i in range(len(data) - window_size + 1):
        avg = sum(data[i:i + window_size]) / window_size
        moving_avg.append(avg)
    return moving_avg


averaged_data = moving_average(reward_list)

plt.plot(averaged_data)
reward_list[-1],np.mean(reward_list),np.std(reward_list)

In [None]:
plt.hist(reward_list)

In [None]:
plt.imshow(Q)

In [None]:
Q[43].argmax()

In [None]:
sum(counts)/(8*2)

In [None]:
plt.hist(counts)

### 1. Environement modification


- Arrivals from multiple services
- Each service have its own parameters (AHT,Shrinkage,...) see calculate_reward function
- Some agents are capable of handling both services
- limited number of agents (id,skills,workload,workload_left)--> (12, [french,english],7h,1h)

## Scheduling using the requirements

In [2]:
class Wscheduling(gym.Env):
    """ Scheduling Environment """


    def time_slots(self):
       # Define the start and end times for the day
        start_time = datetime.strptime("00:00", "%H:%M")
        end_time = datetime.strptime("23:59", "%H:%M")

        # Initialize a dictionary to store the index and time slot pairs
        time_slots_dict = {}

        # Generate time slots for every 30 minutes and store them with an index
        current_time = start_time
        index = 0
        while current_time <= end_time:
            time_slots_dict[index] = current_time.strftime("%H:%M")
            current_time += timedelta(minutes=30)
            index += 1

        return time_slots_dict


    def __init__(self,n_agents=11):
        super().__init__()

        # number of start time
        self.n_start_time=48
        # day of the week
        self.n_dow=5
        # number of agents
        self.n_agents=16
        # time slots: 48 
        self.ts=self.time_slots()
        # number of shifts (e.g 3)
        self.n_shifts=3
        self.d_shift=7 # a shift duration is 7 hours 
        self.between_shifts= 0*2 # 0 hours between every shifts of 30 minutes 
        
        
        # observation space
        self.observation_space = spaces.Tuple((spaces.Discrete(self.n_shifts), # shift 1
                                               spaces.Discrete(self.n_dow) # monday
                                              ))
        
       
        # Action space 
        self.action_space = spaces.Tuple((
                    spaces.Discrete(self.n_agents),  # number of agents: 4 
                     spaces.Discrete(self.n_start_time),  # a shift start time from dict "ts"  : 14h30 (48 options)
                ))
       
        # termination of episode condition
        self.terminated=False

        # list of state/actions/rewards
        self.sar=[]
        


    def step(self, action,arrivals):
        s,dow=self.observation_space            
        
        if self.observation_space!=(self.n_shifts,self.n_dow):
            # terminal state (shift 3, dow 5)
            reward=0 # we dont observe reward until terminal state
            self.sar.append((self.observation_space,action,reward)) # (5 agents, shift start at 14h30, 0 reward)
            if s==3: 
                dow+=1
                s=1
            else:
                s+=1
                            
            self.observation_space=(s,dow)
            
        else:
            self.terminated=True
#             reward=calculate_reward(self.sar,arrivals)
            reward=week_reward(self.sar,arrivals)
            self.sar.append((self.observation_space,action,reward))                
            self.observation_space=(s,dow)
            
        return self.observation_space, reward, self.terminated
             
    
    def reset(self, n_agents=11,seed=None, options=None):
         self.observation_space=(1,1)
         self.sar=[]
         self.n_agents=n_agents
         

         self.terminated=False
    
    def render(self, V,mode='human'):
       pass


In [130]:
############# weelky arrival ##################################
def weekly_arrivals(n_days=5):
    arr=[]
    for i in range(n_days):
        arrival=genArrivals()
        arrivals=pd.read_csv("../data/arrivals.csv")
        arr.append(list(arrivals.arrivals.values))
        
    return arr

def split_list_equally(lst,split_size=3):
    return [lst[i:i + split_size] for i in range(0, len(lst)//split_size)]

################### transform sar (state/action/reward) of an episode to shifts (n_agents,start_time) ################
def shifts_from_sar(sar):
    shifts=[]
    for sar in sar:
        state,action,reward=sar
        start_time_index,n_agents=action
        shifts.append((n_agents,s.ts[start_time_index]))
        
    return split_list_equally(shifts)
    
    
    
######## add duration to shifts #############    
def shifts_to_schedules(shifts,shift_duration=7):
    list_week_schedules=[]
    for s in shifts:
        schedule_agents=get_agents_schedule(s, shift_duration)
        list_week_schedules.append(schedule_agents)
        
    return list_week_schedules


########## reward for a day #########################################
def day_reward(daily_arrival,daily_schedule_agents):
    daily_rewards=[]
    for i in range(48):
#         print((daily_schedule_agents[s.ts[i]],daily_arrival[i]),calculate_reward_v2(daily_schedule_agents[s.ts[i]],daily_arrival[i]))
        daily_rewards.append(calculate_reward_v2(daily_schedule_agents[s.ts[i]],daily_arrival[i]))
        
    return sum(daily_rewards)


############# reward for a week 
def week_reward(sar,weekly_arrival,shift_duration=7):
    shifts=shifts_from_sar(sar)
    weekly_agents_schedules=shifts_to_schedules(shifts,shift_duration=shift_duration)
    reward_list=[]
    for a,sa in zip(weekly_arrival,weekly_agents_schedules):
        reward_list.append(day_reward(a,sa))

    return sum(reward_list)

########################
# shifts=shifts_from_sar(sar)    
# weekly_arrival=weekly_arrivals(n_days=5)
# weekly_agents_schedules=shifts_to_schedules(shifts,shift_duration=3)

# print(weekly_arrival)
# week_reward(s.sar,weekly_arrival)

In [4]:
def calculate_reward_v2(action,arrival):
    """
    Calculate a service level reward. 

    Parameters:
    - action (element): action (number of agents) for a step.
    - arrival (element): List of of arrivals (calls volume for every 30 minutes).

    Returns:
    - The reward score.
    """
    
    # Service Level: The expected percentage of transactions that don’t wait in the queue longer than the target ASA
    # Average speed of answer (ASA): Average time that a transaction waits in the queue to be attended by a resource
    #Average handle time (AHT): Average time that takes to a single resource to attend a transaction
    # The requirement is fixed by the call center for service level is 80%
    # the reward score is arbitratry +30 and -30

    if arrival>0 and action>0:
            erlang = ErlangC(transactions=arrival, asa=20/60, aht=2, interval=30, shrinkage=0)
            sla=erlang.service_level(positions=action)
    
 
    if arrival==0 and action>0:
        sla=1
        
    if arrival>=0 and action==0:
        sla=0
        
    
    if sla<0.80:
        reward=-30
    else:
        reward=30
        
    # the reward will contains the cost of agents
    reward+=action * -10
    
            
            
    return reward

In [131]:
# looping over multiple episodes

def episode():
    reward_list=[]
    weekly_arrival=weekly_arrivals(n_days=5)
    s=Wscheduling()
    s.reset()
    t=False
    shift_index=1
    n_shifts=3
    sampling_strt_min=0
    # Loop over an episode of 3 shift 
    while not t:
        
        # pick random number of agent
        n_ag = random.randint(0, s.n_agents)
        
        # pick a random starting time
        strt=random.randint(sampling_strt_min, s.n_start_time-1)
        
        print("sampling_strt_min",sampling_strt_min)
        
        if shift_index%n_shifts!=0:
            sampling_strt_min=strt
        else:
            sampling_strt_min=0      
        
        # the random action 
        action=(strt,n_ag)

        
        print("############## The state is:",s.observation_space)
        print(f"number of agents dispatched at {s.ts[strt]} is {n_ag}")
       
        # step using random action 
        o,reward,t=s.step(action=action,arrivals=weekly_arrival)
        print("the reward is:", reward)
        
        reward_list.append(reward)
        
        shift_index+=1
    print("##################################################################################################")
    return s.sar

In [208]:
sar=episode()

sampling_strt_min 0
############## The state is: (1, 1)
number of agents dispatched at 03:30 is 7
the reward is: 0
sampling_strt_min 7
############## The state is: (2, 1)
number of agents dispatched at 06:00 is 11
the reward is: 0
sampling_strt_min 12
############## The state is: (3, 1)
number of agents dispatched at 07:00 is 8
the reward is: 0
sampling_strt_min 0
############## The state is: (1, 2)
number of agents dispatched at 19:30 is 10
the reward is: 0
sampling_strt_min 39
############## The state is: (2, 2)
number of agents dispatched at 21:00 is 1
the reward is: 0
sampling_strt_min 42
############## The state is: (3, 2)
number of agents dispatched at 21:00 is 10
the reward is: 0
sampling_strt_min 0
############## The state is: (1, 3)
number of agents dispatched at 11:30 is 7
the reward is: 0
sampling_strt_min 23
############## The state is: (2, 3)
number of agents dispatched at 15:30 is 4
the reward is: 0
sampling_strt_min 31
############## The state is: (3, 3)
number of agents

In [210]:
G=0
# Gain for each state state dictionnary
dict_visit_action={(i,j,k,l):[0] for 
            i in range(s.n_dow) for j in range(s.n_shifts) for k in range(s.n_start_time) for l in range(s.n_agents)}
# print(dict_visit_action)

In [None]:
def Policy_Evaluation(pi,dim=4,num_episodes=100):
    Q={}
    for _ in range(num_episodes):
        G=0
        #episode 
        sar=episode()
        # reverse the list sar list
        reversed_sar=list(reversed(sar))

        for i,e in enumerate(reversed_sar):
            s,a,r=e
            G=G+(gamma*r)
            Exist_state_action=sum([(sar[0]==s and sar[1]==a) for sar in reversed_sar[i+1:]])
            if Exist_state_action==0 :
                dict_visit_action[s+(a,)].append(G)

    for k,v in dict_visit_action.items():
            Q[k]=np.mean(v)
      
    return Q