In [None]:
import gym
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from tensorboardX import SummaryWriter

In [None]:
class SchedulerEnv(gym.Env):

    def __init__(self):
        
        #set parameters
        num_gps = 10
        num_slots = 15
        num_pre_booked = 75
        to_book = [2,1,4,2,3,1]
        num_to_book = len(to_book)
        agent_pos = [0,0]
        
        #set parameters for the day
        self.num_gps = num_gps
        self.num_slots = num_slots
        self.num_pre_booked = num_pre_booked
        self.to_book = to_book
        self.num_to_book = num_to_book
        self.diary_slots = num_gps*num_slots
        self.agent_pos = agent_pos

        #set action space this format of the diary
        self.action_space = gym.spaces.Box(low=0, high=1, shape=(self.num_slots, self.num_gps), dtype=np.int32)
        
        #set observation space 
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=(self.num_slots, self.num_gps), dtype=np.int32)
   
    #creates daily diary for each gp and randomly populates prebooked appointments
    def reset(self):

        #creates zero filled dataframe with row per time slot and column per gp
        self.state = np.zeros((self.num_slots, self.num_gps),dtype=float)

        #randomly enters a 1 for each pre booked appointments
        while self.num_pre_booked>0:
            self.num_pre_booked -= 1
            self.state[np.random.randint(self.num_slots), np.random.randint(self.num_gps)] = 1
            
        #randomly sets the agent start space
        self.agent_pos = [np.random.randint(self.num_slots), np.random.randint(self.num_gps)]

        #resets parameters for new episode
        self.done = False
        self.reward = 0
        self.num_to_book = self.num_to_book
        self.num_pre_booked = self.num_pre_booked

        return self.state

    def step(self, action):
    
        tot_appts = self.num_pre_booked + self.num_to_book
        #print('total appts to book = ', tot_appts)
        final_diary = action
        #print('final appts in diary = ', action.sum())
        
        #rewards if keeps original appointments in same place
        pre_booked_position = np.transpose(np.nonzero(self.state))
        for i in (pre_booked_position):
            if action[i[0],i[1]]:
                self.reward +=1
                #print('plus 1 matching')
            else:
                self.reward -=1
                #print('minus 1 appt lost')

        #rewards if all new appts are booked
        if tot_appts == action.sum():
            self.reward +=5
            #print('plus 1 all booked')
        else:
            self.reward -=5
            #print('not all booked')
            
        #print('tot reward', self.reward)
        
        #rewards if all longer appts are booked together

        self.done = True
        info = {}

        return action, self.reward, self.done, info

In [None]:
class Model(nn.Module):
    def __init__(self, input_size, action_size):
        super(Model, self).__init__()

        self.net = nn.Sequential(
            nn.Linear(input_size, 64),
            nn.ReLU(),
            nn.Linear(64, 128) 
        )

        self.actor = nn.Sequential(
            nn.Linear(128, 512),
            nn.ReLU(),
            nn.Linear(512, action_size)
        )

        self.critic = nn.Sequential(
            nn.Linear(128, 512),
            nn.ReLU(),
            nn.Linear(512, 1)
        )

    def forward(self, x):
        init_out = self.net(x)
        return self.actor(init_out), self.critic(init_out)

In [None]:
#initialise environment, model and optimiser
env = SchedulerEnv()
#model = Model((env.diary_slots+env.num_to_book), env.diary_slots)

#optimizer = torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, 
#                             amsgrad=False)
#optimizer.zero_grad()
writer = SummaryWriter()

start_state = env.reset()
print(start_state, env.agent_pos)


In [None]:
#this needs to be copied into the Env and called by step
def move_agent(action, agent_pos):

    #set boundaries for the grid
    max_row = env.num_slots - 1
    max_col = env.num_gps - 1

    #set new co-ordinates for the agent
    new_row = agent_pos[0]
    new_col = agent_pos[1]

    #calculate what the new position may be
    if action == 0:
        print('up')
        new_row = max(agent_pos[0] - 1, 0)
    if action == 1:
        print('down')
        new_row = min(agent_pos[0] + 1, max_row)
    if action == 2:
        print('left')
        new_col = max(agent_pos[1] - 1, 0)
    if action == 3:
        print('right')
        new_col = min(agent_pos[1] + 1, max_col)

    new_pos = [new_row, new_col]

    return new_pos

In [None]:
action = np.random.randint(4)
print(action, env.agent_pos)

In [None]:
agent_pos = move_agent(action, env.agent_pos)
agent_pos

In [None]:
#this needs to be copied into the Env and called by step
#checks if we can look to book appointment starting here
def check_bookable(agent_pos):
    return start_state[agent_pos[0], agent_pos[1]] == 0.0

In [None]:
check_bookable(env.agent_pos)

In [None]:
check_bookable(agent_pos)

In [None]:
#this needs to be copied into the Env and called by step
#checks if the appointment we're looking at fits here
def check_and_book(state, agent_pos, appt_idx):
    max_row = env.num_slots - 1
    
    #checks if the appointment fits
    cells_to_check = env.to_book[appt_idx]
    if cells_to_check==1:
        print('good to check for single')
        if state[agent_pos[0], agent_pos[1]] == 0:
            state[agent_pos[0], agent_pos[1]] = 1
        else:
            print('already taken')
    if cells_to_check==2:
        #check we're not at the bottom of the grid
        if agent_pos[0]<max_row:
            #check the next cells is also 0.0
            print('good to check for double')
            if state[agent_pos[0], agent_pos[1]] == 0 and state[(agent_pos[0]+1), agent_pos[1]] == 0:
                state[agent_pos[0], agent_pos[1]] = 1
                state[(agent_pos[0]+1), agent_pos[1]] = 1
                print('go ahead and book')
            else:
                print('already taken')
        else:
            print('not for double')
    if cells_to_check==3:
        #check we're not at the bottom of the grid
        if agent_pos[0]+1<max_row:
            print('good to check for treble')
            if state[agent_pos[0], agent_pos[1]] == 0 and state[(agent_pos[0]+1), agent_pos[1]] == 0 \
             and state[(agent_pos[0]+2), agent_pos[1]] == 0:
                state[agent_pos[0], agent_pos[1]] = 1
                state[(agent_pos[0]+1), agent_pos[1]] = 1
                state[(agent_pos[0]+2), agent_pos[1]] = 1
                print('go ahead and book')
            else:
                print('already taken')
        else:
            print('not for treble')
    if cells_to_check==4:
        #check we're not at the bottom of the grid
        if agent_pos[0]+2<max_row:
            #check the next cells is also 0.0
            print('good for quad')
            if state[agent_pos[0], agent_pos[1]] == 0 and state[(agent_pos[0]+1), agent_pos[1]] == 0 \
             and state[(agent_pos[0]+2), agent_pos[1]] == 0 and state[(agent_pos[0]+3), agent_pos[1]] == 0:
                state[agent_pos[0], agent_pos[1]] = 1
                state[(agent_pos[0]+1), agent_pos[1]] = 1
                state[(agent_pos[0]+2), agent_pos[1]] = 1
                state[(agent_pos[0]+3), agent_pos[1]] = 1
                print('go ahead and book')
            else:
                print('already taken')
        else:
            print('not for quad')
            
    next_state = state
            
    return next_state


[2,1,4,2,3,1]

In [None]:
appt_idx = 2
agent_pos = [1,4]
print(start_state.sum())

In [None]:
next = check_and_book(start_state, agent_pos, appt_idx)
next

In [None]:
print(next.sum())