In [1]:
import numpy as np
import pandas
import pickle
from pickle import load
import random
from random import choices

import torch
import torch.nn as nn
import torch.nn.utils as nn_utils
import torch.nn.functional as F
import torch.optim as optim

In [2]:
df = pandas.read_csv('JanData.csv')
day_to_schedule = df[df['Date'].isin(['11-Jan-22'])].to_numpy().flatten()

In [3]:
tot_appt = float((day_to_schedule[1]).replace(',',''))
to_book = float((day_to_schedule[7]).replace(',',''))
pre_book = tot_appt-to_book
small_app = (float((day_to_schedule[2]).replace(',',''))+float((day_to_schedule[5]).replace(',','')))/tot_appt
med_app = float((day_to_schedule[4]).replace(',',''))/tot_appt
large_app = 1-small_app-med_app

In [4]:
surgery_to_book = round(to_book/float((day_to_schedule[8]).replace(',',''))*100)
surgery_pre_booked = round(pre_book/float((day_to_schedule[8]).replace(',',''))*100)

In [5]:
num_slots = 40
num_gps = 100

In [6]:
#create the scheduler with pre booked appointments
def create_starting_schedule(num_gps, num_slots, num_pre_booked): 
    
    #creates zero filled dataframe with row per time slot and column per gp
    state = np.zeros((num_slots, num_gps),dtype=float)

    #randomly enters a 1 for each pre booked appointments
    while num_pre_booked>0:
        num_pre_booked -= 1
        state[np.random.randint(num_slots), np.random.randint(num_gps)] = 1
    
    with np.printoptions(threshold=np.inf):
        print(state)
    
    return state

In [7]:
#create list of appointments to book based on split
def list_to_book(num_to_book, small_split, med_split, large_split):
    to_book = []
    for i in range(num_to_book):
        to_book.append(*choices([1,2,3],[small_split, med_split, large_split]))
    return to_book

In [8]:
#calculates new position of the agent based on the action
def move_agent(agent_pos, action):

    #set boundaries for the grid
    max_row = num_slots - 1
    max_col = num_gps - 1
    
    #setting new co-ordinates for the agent
    new_row = agent_pos[0]
    new_col = agent_pos[1]

    #calculate what the new position may be based on the action without going out the grid
    if action == 0:
        new_row = max(agent_pos[0] - 1, 0)
    if action == 1:
        new_row = min(agent_pos[0] + 1, max_row)
    if action == 2:
        new_col = max(agent_pos[1] - 1, 0)
    if action == 3:
        new_col = min(agent_pos[1] + 1, max_col)

    new_pos = [new_row, new_col]

    return new_pos

In [9]:
#checks if the appointment fits
def check_and_book(state, cells_to_check, agent_pos, appt_idx):

    max_row = num_slots - 1

    if cells_to_check==1:
        if state[agent_pos[0], agent_pos[1]] == 0:
            state[agent_pos[0], agent_pos[1]] = 1
            appt_idx+=1

    if cells_to_check==2:
        #check we're not at the bottom of the grid
        if agent_pos[0]<max_row:
            if state[agent_pos[0], agent_pos[1]] == 0 and \
            state[(agent_pos[0]+1), agent_pos[1]] == 0:
                state[agent_pos[0], agent_pos[1]] = 1
                state[(agent_pos[0]+1), agent_pos[1]] = 1

                agent_pos = [(agent_pos[0]+1), agent_pos[1]]
                appt_idx+=1

    if cells_to_check==3:
        #check we're not at the bottom of the grid
        if agent_pos[0]+1<max_row:
            if state[agent_pos[0], agent_pos[1]] == 0 and \
            state[(agent_pos[0]+1), agent_pos[1]] == 0 \
             and state[(agent_pos[0]+2), agent_pos[1]] == 0:
                state[agent_pos[0], agent_pos[1]] = 1
                state[(agent_pos[0]+1), agent_pos[1]] = 1
                state[(agent_pos[0]+2), agent_pos[1]] = 1

                agent_pos = [(agent_pos[0]+2), agent_pos[1]]
                appt_idx+=1

    next_state = state

    return next_state, agent_pos, appt_idx

In [10]:
class Model(nn.Module):
    def __init__(self, input_shape, n_actions):
        super(Model, self).__init__()

        self.net = nn.Sequential(
            nn.Linear(input_shape[0]*input_shape[1], 64),
            nn.ReLU(),
            nn.Linear(64, 128) 
        )
        
        self.actor = nn.Sequential(
            nn.Linear(128, 512),
            nn.ReLU(),
            nn.Linear(512, n_actions)
        )

        self.critic = nn.Sequential(
            nn.Linear(128, 512),
            nn.ReLU(),
            nn.Linear(512, 1)
        )


    def forward(self, x):
        # flatten the observation space Box to linear tensor
        x_flat = torch.flatten(x, 1,2).to(torch.float32)
        #print('x_flat', x_flat.size(), x_flat)
        init_out = self.net(x_flat)
        return self.actor(init_out), self.critic(init_out)

In [11]:
filename = 'final_a2c_model.sav'
loaded_A2Cmodel = pickle.load(open(filename, 'rb'))

In [12]:
state = create_starting_schedule(num_gps, num_slots, surgery_pre_booked)
to_book = list_to_book(surgery_to_book, small_app, med_app, large_app)
agent_pos = (0,0)
appt_idx = 0

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.
  0. 0. 0. 0.]
 [0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0.
  1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0.
  0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.
  0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1.
  0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.

In [13]:
while appt_idx < len(to_book):
#for i in range(5):
    cells_to_check = to_book[appt_idx]
    state, agent_pos, appt_idx = check_and_book(state, cells_to_check, agent_pos, appt_idx) 
    agent_state = state.copy()
    agent_state[agent_pos[0], agent_pos[1]] = 5
    agent_state = torch.FloatTensor(np.array(agent_state, copy=False)).unsqueeze(dim=0)
    actor, critic = loaded_A2Cmodel(agent_state)
    action = torch.argmax(actor)
    print(actor)
    new_agent_pos = move_agent(agent_pos, action)
    
    if new_agent_pos == agent_pos:
        new_agent_pos = [np.random.randint(num_slots), np.random.randint(num_gps)]
    agent_pos = new_agent_pos

tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0

tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0

tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0

tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0.1061,  -9.5052, -11.0173,   2.6965]], grad_fn=<AddmmBackward>)
tensor([[  0

In [14]:
with np.printoptions(threshold=np.inf):
    print(state)

[[1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.
  0. 0. 0. 0.]
 [1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0.
  1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0.
  0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.
  0. 0. 0. 0.]
 [0. 0. 1. 1. 1. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1.
  0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 1. 1. 1. 1. 1. 1. 0. 0. 1. 0. 0. 0.