In [None]:
import gym
import numpy as np
import pandas as pd

In [None]:
#set parameters
num_gps = 3
num_slots = 3
num_pre_booked = 1

In [None]:
class SchedulerEnv(gym.Env):
    
    metadata = {'render.modes': ['human']}
    
    def __init__(self):
        
        #set parameters for the day
        self.num_gps = num_gps
        self.num_slots = num_slots
        self.num_pre_booked = num_pre_booked

        #set action space this is the gp to book the appointment for
        self.action_space = gym.spaces.Discrete(num_gps+1)
        
        #set observation space 
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=(num_slots, num_gps), dtype=np.int32)

        #create a new diary for the day
        self.reset()
        
        #general prints for testing
        free_gps = self.state.isin([0]).all().sum()
        print("Number of GPs: ", self.num_gps)
        print("Number of GPs with no appointments: ", free_gps)
        
    #creates daily diary for each gp and randomly populates prebooked appointments
    def reset(self):

        #creates zero filled dataframe with row per time slot and column per gp
        self.state = pd.DataFrame(np.zeros((self.num_slots, self.num_gps))).astype(int)

        #randomly enters a 1 for each pre booked appointments
        while self.num_pre_booked>=0:
            self.num_pre_booked -= 1
            row_to_update = np.random.randint(self.num_slots, size=1)
            col_to_update = np.random.randint(self.num_gps, size=1)
            self.state.at[row_to_update[0],col_to_update[0]]=1

        #resets parameters for new episode
        self.done = False
        self.reward = 0
        self.num_pre_booked = num_pre_booked

        return self.state.to_numpy(dtype=np.int32)


In [None]:
test = SchedulerEnv()

In [None]:
import stable_baselines
from stable_baselines.common.env_checker import check_env

In [None]:
check_env(test)