In [42]:
import gym
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from tensorboardX import SummaryWriter

In [43]:
#set parameters
num_gps = 10
num_slots = 15
num_pre_booked = 17
to_book = torch.tensor([2,1,4,2,3,1])
num_to_book = len(to_book)
entropy_beta = 0.01
agent_pos = [0,0]

In [44]:
class SchedulerEnv(gym.Env):

    def __init__(self):
        
        #set parameters for the day
        self.num_gps = num_gps
        self.num_slots = num_slots
        self.num_pre_booked = num_pre_booked
        self.to_book = to_book
        self.num_to_book = num_to_book
        self.diary_slots = num_gps*num_slots
        self.agent_pos = agent_pos

        #set action space this format of the diary
        self.action_space = gym.spaces.Box(low=0, high=1, shape=(self.num_slots, self.num_gps), dtype=np.int32)
        
        #set observation space 
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=(self.num_slots, self.num_gps), dtype=np.int32)
   
    #creates daily diary for each gp and randomly populates prebooked appointments
    def reset(self):

        #creates zero filled dataframe with row per time slot and column per gp
        self.state = np.zeros((self.num_slots, self.num_gps),dtype=float)

        #randomly enters a 1 for each pre booked appointments
        while self.num_pre_booked>0:
            self.num_pre_booked -= 1
            self.state[np.random.randint(num_slots), np.random.randint(num_gps)] = 1
            
        #randomly sets the agent start space
        self.agent_pos = [np.random.randint(num_slots), np.random.randint(num_gps)]

        #resets parameters for new episode
        self.done = False
        self.reward = 0
        self.num_to_book = num_to_book
        self.num_pre_booked = num_pre_booked

        return self.state

    def step(self, action):
    
        tot_appts = self.num_pre_booked + self.num_to_book
        #print('total appts to book = ', tot_appts)
        final_diary = action
        #print('final appts in diary = ', action.sum())
        
        #rewards if keeps original appointments in same place
        pre_booked_position = np.transpose(np.nonzero(self.state))
        for i in (pre_booked_position):
            if action[i[0],i[1]]:
                self.reward +=1
                #print('plus 1 matching')
            else:
                self.reward -=1
                #print('minus 1 appt lost')

        #rewards if all new appts are booked
        if tot_appts == action.sum():
            self.reward +=5
            #print('plus 1 all booked')
        else:
            self.reward -=5
            #print('not all booked')
            
        #print('tot reward', self.reward)
        
        #rewards if all longer appts are booked together

        self.done = True
        info = {}

        return action, self.reward, self.done, info

In [45]:
#initialise environment, model and optimiser
env = SchedulerEnv()
#model = Model((env.diary_slots+env.num_to_book), env.diary_slots)

#optimizer = torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, 
#                             amsgrad=False)
#optimizer.zero_grad()
writer = SummaryWriter()

start_state = env.reset()
print(start_state, env.agent_pos)


[[0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 1. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 1. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 1.]
 [0. 0. 1. 0. 0. 1. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]] [3, 9]


In [46]:
#this needs to be copied into the Env and called by step
def move_agent(action, agent_pos):

    #set boundaries for the grid
    max_row = num_slots - 1
    max_col = num_gps - 1

    #set new co-ordinates for the agent
    new_row = agent_pos[0]
    new_col = agent_pos[1]

    #calculate what the new position may be
    if action == 0:
        print('up')
        new_row = max(agent_pos[0] - 1, 0)
    if action == 1:
        print('down')
        new_row = min(agent_pos[0] + 1, max_row)
    if action == 2:
        print('left')
        new_col = max(agent_pos[1] - 1, 0)
    if action == 3:
        print('right')
        new_col = min(agent_pos[1] + 1, max_col)

    new_pos = [new_row, new_col]

    return new_pos

In [47]:
action = np.random.randint(4)
print(action, env.agent_pos)

2 [3, 9]


In [48]:
agent_pos = move_agent(action, env.agent_pos)
agent_pos

left


[3, 8]

In [49]:
#this needs to be copied into the Env and called by step
def check_bookable(agent_pos):
    return start_state[agent_pos[0], agent_pos[1]] == 0.0

In [50]:
check_bookable(env.agent_pos)

True

In [51]:
check_bookable(agent_pos)

True