In [27]:
import gym
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from tensorboardX import SummaryWriter

In [28]:
class SchedulerEnv(gym.Env):

    def __init__(self):
        
        #starting parameters
        num_gps = 10
        num_slots = 15
        num_pre_booked = 75
        to_book = [2,1]
        num_to_book = len(to_book)
        agent_pos = [0,0]
        reward_decay = 0.95
        
        #set parameters for the day
        self.num_gps = num_gps
        self.num_slots = num_slots
        self.num_pre_booked = num_pre_booked
        self.to_book = to_book
        self.num_to_book = num_to_book
        self.diary_slots = num_gps*num_slots
        self.agent_pos = agent_pos
        self.reward_decay = reward_decay

        #set action space to move around the grid
        self.action_space = gym.spaces.Discrete(4) #up, down, left, right
        
        #set observation space 
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=(self.num_slots, self.num_gps), dtype=np.int32)
   
    #creates daily diary for each gp, randomly populates prebooked appointments and resets parameters
    def reset(self):

        #creates zero filled dataframe with row per time slot and column per gp
        self.state = np.zeros((self.num_slots, self.num_gps),dtype=float)

        #randomly enters a 1 for each pre booked appointments
        pre_booked = self.num_pre_booked
        while pre_booked>0:
            pre_booked -= 1
            self.state[np.random.randint(self.num_slots), np.random.randint(self.num_gps)] = 1
            
        #randomly sets the agent start space
        self.agent_pos = [np.random.randint(self.num_slots), np.random.randint(self.num_gps)]

        #resets parameters for new episode
        self.done = False
        self.reward = 0
        self.appt_idx = 0
        self.decay_steps = 1
        
        print('starting state', self.state.sum(), self.state)

        return self.state
    
    #calculates new position of the agent based on the action
    def move_agent(self, action):

        #set boundaries for the grid
        max_row = env.num_slots - 1
        max_col = env.num_gps - 1

        #set new co-ordinates for the agent
        new_row = self.agent_pos[0]
        new_col = self.agent_pos[1]

        #calculate what the new position may be based on the action without going out the griid
        if action == 0:
            print('up')
            new_row = max(self.agent_pos[0] - 1, 0)
        if action == 1:
            print('down')
            new_row = min(self.agent_pos[0] + 1, max_row)
        if action == 2:
            print('left')
            new_col = max(self.agent_pos[1] - 1, 0)
        if action == 3:
            print('right')
            new_col = min(self.agent_pos[1] + 1, max_col)

        new_pos = [new_row, new_col]
        print('new pos', new_pos)

        return new_pos

    #checks if we can look to book appointment starting here
    def check_bookable(self):
        return self.state[self.agent_pos[0], self.agent_pos[1]] == 0.0
    
    #checks if the appointment fits
    def check_and_book(self):
        max_row = env.num_slots - 1

        #checks if the appointment fits
        cells_to_check = self.to_book[self.appt_idx]
        if cells_to_check==1:
            print('good to check for single')
            if self.state[self.agent_pos[0], self.agent_pos[1]] == 0:
                self.state[self.agent_pos[0], self.agent_pos[1]] = 1
                self.appt_idx += 1
                self.decay_steps = 1
            else:
                print('already taken')
                self.decay_steps += 1
        if cells_to_check==2:
            #check we're not at the bottom of the grid
            if self.agent_pos[0]<max_row:
                #check the next cells is also 0.0
                print('good to check for double')
                if self.state[self.agent_pos[0], self.agent_pos[1]] == 0 and \
                self.state[(self.agent_pos[0]+1), self.agent_pos[1]] == 0:
                    self.state[self.agent_pos[0], self.agent_pos[1]] = 1
                    self.state[(self.agent_pos[0]+1), self.agent_pos[1]] = 1
                    self.appt_idx += 1
                    print('go ahead and book')
                    self.decay_steps = 1
                else:
                    print('already taken')
                    self.decay_steps += 1
            else:
                print('not for double')
                self.decay_steps += 1
        if cells_to_check==3:
            #check we're not at the bottom of the grid
            if self.agent_pos[0]+1<max_row:
                print('good to check for treble')
                if self.state[agent_pos[0], self.agent_pos[1]] == 0 and \
                self.state[(self.agent_pos[0]+1), self.agent_pos[1]] == 0 \
                 and self.state[(self.agent_pos[0]+2), self.agent_pos[1]] == 0:
                    self.state[self.agent_pos[0], self.agent_pos[1]] = 1
                    self.state[(self.agent_pos[0]+1), self.agent_pos[1]] = 1
                    self.state[(self.agent_pos[0]+2), self.agent_pos[1]] = 1
                    self.appt_idx += 1
                    print('go ahead and book')
                    self.decay_steps = 1
                else:
                    print('already taken')
                    self.decay_steps += 1
            else:
                print('not for treble')
                self.decay_steps += 1
        if cells_to_check==4:
            #check we're not at the bottom of the grid
            if self.agent_pos[0]+2<max_row:
                #check the next cells is also 0.0
                print('good for quad')
                if self.state[self.agent_pos[0], self.agent_pos[1]] == 0 and \
                self.state[(agent_pos[0]+1), agent_pos[1]] == 0 \
                 and self.state[(self.agent_pos[0]+2), self.agent_pos[1]] == 0 and \
                self.state[(self.agent_pos[0]+3), self.agent_pos[1]] == 0:
                    self.state[self.agent_pos[0], self.agent_pos[1]] = 1
                    self.state[(self.agent_pos[0]+1), self.agent_pos[1]] = 1
                    self.state[(self.agent_pos[0]+2), self.agent_pos[1]] = 1
                    self.state[(self.agent_pos[0]+3), self.agent_pos[1]] = 1
                    self.appt_idx += 1
                    print('go ahead and book')
                    self.decay_steps = 1
                else:
                    print('already taken')
                    self.decay_steps += 1
            else:
                print('not for quad')
                self.decay_steps += 1

        next_state = self.state

        return next_state

    def step(self, action):

        print('start step' , self.decay_steps)
        #get new positioin of agent based on action
        self.agent_pos = self.move_agent(action)
        print('trying to book', self.to_book, self.appt_idx)
        
        #check if it's possible to book then book
        if self.check_bookable():
            self.state = self.check_and_book()
            print('checked here')
        else:
            print('not bookable')
            self.decay_steps += 1
        
        #work out if episode complete
        if self.appt_idx == len(self.to_book):
            print('all booked')
            self.done = True
            
        #work out rewards
        self.reward = self.reward_decay**self.decay_steps
        
        print('step', self.decay_steps)
        print('end step')

        info = {}

        return self.state, self.reward, self.done, info

In [29]:
class Model(nn.Module):
    def __init__(self, input_size, action_size):
        super(Model, self).__init__()

        self.net = nn.Sequential(
            nn.Linear(input_size, 64),
            nn.ReLU(),
            nn.Linear(64, 128) 
        )

        self.actor = nn.Sequential(
            nn.Linear(128, 512),
            nn.ReLU(),
            nn.Linear(512, action_size)
        )

        self.critic = nn.Sequential(
            nn.Linear(128, 512),
            nn.ReLU(),
            nn.Linear(512, 1)
        )

    def forward(self, x):
        init_out = self.net(x)
        return self.actor(init_out), self.critic(init_out)

In [30]:
#convert numpy array to tensor for input
def tensor_convert(x):
    return torch.from_numpy(x).float()

In [31]:
#initialise environment, model and optimiser
env = SchedulerEnv()
model = Model((env.diary_slots), env.action_space.n)

#optimizer = torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, 
#                             amsgrad=False)
#optimizer.zero_grad()

writer = SummaryWriter()

state = env.reset()
print('start pos', env.agent_pos)
done = False

while not done:

    #create model input from flattened grid
    nn_input = torch.flatten(tensor_convert(state))
    actor, critic = model(nn_input)
    
    print('get new action')
    action = torch.argmax(actor)

    print(F.softmax(actor), action)

    new_state, reward, done, info = env.step(action)
    state = new_state


starting state 61.0 [[0. 1. 0. 0. 0. 0. 1. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 1. 0. 0. 0. 1. 1.]
 [0. 1. 1. 1. 0. 0. 0. 0. 1. 1.]
 [1. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [1. 0. 0. 0. 0. 1. 0. 1. 1. 1.]
 [1. 0. 0. 0. 1. 0. 1. 0. 0. 1.]
 [0. 1. 0. 1. 1. 1. 1. 0. 1. 0.]
 [1. 1. 1. 0. 0. 1. 1. 0. 1. 1.]
 [0. 0. 0. 1. 1. 0. 0. 1. 1. 1.]
 [0. 1. 0. 0. 0. 0. 1. 1. 1. 0.]
 [1. 0. 1. 0. 1. 0. 0. 1. 1. 0.]
 [0. 0. 0. 0. 1. 1. 0. 0. 1. 0.]
 [1. 0. 1. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 1. 1. 1. 0.]]
start pos [3, 1]
get new action
tensor([0.2665, 0.2511, 0.2331, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1
up
new pos [2, 1]
trying to book [2, 1] 0
good to check for double
already taken
checked here
step 2
end step
get new action
tensor([0.2665, 0.2511, 0.2331, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2
up
new pos [1, 1]
trying to book [2, 1] 0
good to check for double
go ahead and book
checked here
step 1
end step
get new action
tensor([0.2658, 



get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 65
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 66
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 66
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 67
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 67
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 68
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 68
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 69
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 69
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 70
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 124
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 125
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 125
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 126
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 126
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 127
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 127
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 128
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 128
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 129
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 129
up

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 191
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 192
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 192
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 193
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 193
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 194
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 194
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 195
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 195
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 196
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
st

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 257
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 258
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 258
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 259
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 259
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 260
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 260
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 261
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 261
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 262
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 262
up

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 315
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 316
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 316
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 317
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 317
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 318
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 318
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 319
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 319
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 320
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 320
up

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 378
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 379
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 379
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 380
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 380
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 381
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 381
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 382
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 382
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 383
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
st

up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 441
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 441
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 442
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 442
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 443
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 443
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 444
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 444
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 445
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 445
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 446
end step
get new action
tensor(

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 505
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 506
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 506
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 507
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 507
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 508
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 508
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 509
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 509
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 510
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 510
up

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 562
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 563
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 563
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 564
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 564
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 565
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 565
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 566
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 566
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 567
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 567
up

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 618
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 619
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 619
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 620
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 620
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 621
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 621
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 622
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 622
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 623
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
st

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 672
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 673
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 673
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 674
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 674
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 675
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 675
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 676
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 676
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 677
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 677
up

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 727
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 728
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 728
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 729
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 729
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 730
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 730
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 731
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 731
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 732
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 732
up

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 787
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 788
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 788
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 789
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 789
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 790
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 790
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 791
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 791
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 792
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 792
up

new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 855
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 855
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 856
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 856
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 857
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 857
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 858
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 858
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 859
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 859
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 860
end step
get new action
tensor([0.

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 925
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 926
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 926
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 927
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 927
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 928
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 928
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 929
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 929
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 930
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 930
up

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 991
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 992
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 992
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 993
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 993
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 994
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 994
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 995
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 995
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 996
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 996
up

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1046
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1047
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1047
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1048
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1048
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1049
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1049
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1050
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1050
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1051
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1104
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1105
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1105
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1106
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1106
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1107
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1107
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1108
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1108
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1109
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1170
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1171
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1171
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1172
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1172
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1173
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1173
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1174
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1174
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1175
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1229
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1230
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1230
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1231
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1231
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1232
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1232
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1233
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1233
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1234
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1287
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1288
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1288
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1289
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1289
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1290
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1290
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1291
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1291
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1292
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1344
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1344
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1345
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1345
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1346
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1346
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1347
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1347
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1348
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1348
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1349
end step
get new action

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1403
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1404
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1404
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1405
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1405
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1406
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1406
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1407
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1407
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1408
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1472
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1473
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1473
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1474
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1474
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1475
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1475
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1476
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1476
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1477
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1533
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1534
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1534
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1535
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1535
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1536
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1536
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1537
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1537
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1538
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1589
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1590
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1590
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1591
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1591
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1592
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1592
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1593
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1593
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1594
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1655
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1656
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1656
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1657
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1657
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1658
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1658
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1659
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1659
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1660
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1721
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1722
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1722
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1723
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1723
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1724
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1724
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1725
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1725
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1726
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1781
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1782
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1782
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1783
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1783
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1784
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1784
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1785
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1785
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1786
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1848
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1849
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1849
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1850
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1850
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1851
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1851
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1852
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1852
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1853
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1917
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1918
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1918
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1919
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1919
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1920
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1920
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1921
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1921
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1922
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1986
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1986
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1987
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1987
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1988
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1988
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1989
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1989
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1990
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 1990
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 1991
end step
get new action

new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2051
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2051
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2052
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2052
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2053
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2053
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2054
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2054
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2055
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2055
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2056
end step
get new action

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2114
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2115
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2115
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2116
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2116
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2117
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2117
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2118
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2118
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2119
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2181
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2182
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2182
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2183
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2183
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2184
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2184
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2185
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2185
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2186
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2247
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2248
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2248
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2249
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2249
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2250
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2250
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2251
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2251
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2252
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2317
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2318
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2318
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2319
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2319
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2320
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2320
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2321
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2321
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2322
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2387
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2388
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2388
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2389
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2389
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2390
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2390
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2391
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2391
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2392
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2456
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2457
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2457
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2458
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2458
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2459
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2459
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2460
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2460
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2461
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2525
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2526
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2526
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2527
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2527
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2528
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2528
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2529
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2529
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2530
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2592
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2593
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2593
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2594
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2594
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2595
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2595
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2596
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2596
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2597
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2656
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2657
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2657
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2658
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2658
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2659
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2659
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2660
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2660
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2661
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2712
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2713
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2713
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2714
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2714
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2715
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2715
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2716
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2716
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2717
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2777
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2777
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2778
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2778
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2779
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2779
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2780
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2780
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2781
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2781
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2782
end step
get new act

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2844
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2845
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2845
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2846
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2846
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2847
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2847
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2848
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2848
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2849
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2901
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2901
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2902
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2902
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2903
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2903
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2904
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2904
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2905
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2905
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2906
end step
get new action

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2966
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2967
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2967
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2968
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2968
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2969
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2969
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2970
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 2970
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 2971
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3033
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3034
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3034
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3035
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3035
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3036
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3036
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3037
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3037
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3038
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3102
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3103
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3103
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3104
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3104
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3105
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3105
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3106
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3106
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3107
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3170
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3171
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3171
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3172
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3172
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3173
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3173
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3174
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3174
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3175
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3230
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3231
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3231
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3232
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3232
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3233
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3233
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3234
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3234
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3235
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3294
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3295
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3295
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3296
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3296
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3297
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3297
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3298
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3298
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3299
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3359
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3360
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3360
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3361
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3361
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3362
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3362
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3363
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3363
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3364
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3426
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3427
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3427
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3428
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3428
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3429
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3429
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3430
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3430
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3431
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3488
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3489
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3489
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3490
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3490
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3491
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3491
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3492
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3492
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3493
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3547
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3548
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3548
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3549
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3549
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3550
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3550
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3551
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3551
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3552
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3611
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3612
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3612
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3613
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3613
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3614
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3614
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3615
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3615
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3616
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3671
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3671
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3672
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3672
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3673
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3673
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3674
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3674
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3675
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3675
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3676
end step
get new action

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3733
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3734
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3734
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3735
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3735
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3736
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3736
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3737
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3737
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3738
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3798
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3799
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3799
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3800
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3800
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3801
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3801
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3802
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3802
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3803
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3854
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3855
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3855
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3856
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3856
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3857
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3857
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3858
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3858
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3859
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3909
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3909
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3910
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3910
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3911
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3911
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3912
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3912
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3913
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3913
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3914
end step
get new act

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3973
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3974
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3974
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3975
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3975
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3976
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3976
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3977
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 3977
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 3978
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4040
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4041
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4041
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4042
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4042
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4043
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4043
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4044
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4044
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4045
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4108
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4109
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4109
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4110
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4110
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4111
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4111
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4112
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4112
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4113
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4174
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4175
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4175
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4176
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4176
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4177
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4177
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4178
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4178
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4179
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4243
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4244
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4244
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4245
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4245
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4246
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4246
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4247
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4247
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4248
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4310
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4311
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4311
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4312
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4312
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4313
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4313
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4314
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4314
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4315
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4376
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4377
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4377
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4378
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4378
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4379
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4379
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4380
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4380
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4381
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4434
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4435
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4435
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4436
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4436
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4437
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4437
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4438
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4438
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4439
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4497
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4498
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4498
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4499
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4499
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4500
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4500
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4501
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4501
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4502
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4563
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4564
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4564
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4565
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4565
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4566
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4566
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4567
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4567
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4568
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4628
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4629
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4629
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4630
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4630
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4631
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4631
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4632
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4632
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4633
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4693
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4694
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4694
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4695
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4695
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4696
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4696
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4697
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4697
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4698
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4761
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4762
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4762
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4763
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4763
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4764
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4764
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4765
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4765
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4766
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4821
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4821
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4822
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4822
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4823
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4823
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4824
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4824
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4825
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4825
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4826
end step
get new action

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4877
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4878
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4878
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4879
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4879
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4880
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4880
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4881
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4881
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4882
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4934
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4935
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4935
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4936
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4936
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4937
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4937
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4938
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4938
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4939
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4992
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4992
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4993
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4993
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4994
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4994
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4995
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4995
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4996
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 4996
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 4997
end step
get new act

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5048
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5049
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5049
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5050
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5050
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5051
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5051
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5052
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5052
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5053
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5109
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5109
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5110
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5110
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5111
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5111
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5112
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5112
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5113
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5113
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5114
end step
get new action

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5172
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5173
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5173
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5174
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5174
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5175
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5175
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5176
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5176
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5177
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5240
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5241
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5241
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5242
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5242
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5243
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5243
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5244
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5244
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5245
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5299
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5300
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5300
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5301
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5301
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5302
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5302
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5303
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5303
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5304
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5355
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5356
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5356
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5357
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5357
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5358
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5358
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5359
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5359
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5360
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5410
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5411
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5411
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5412
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5412
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5413
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5413
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5414
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5414
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5415
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5466
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5467
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5467
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5468
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5468
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5469
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5469
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5470
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5470
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5471
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5525
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5526
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5526
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5527
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5527
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5528
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5528
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5529
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5529
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5530
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5585
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5586
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5586
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5587
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5587
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5588
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5588
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5589
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5589
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5590
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5644
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5645
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5645
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5646
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5646
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5647
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5647
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5648
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5648
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5649
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5701
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5702
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5702
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5703
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5703
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5704
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5704
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5705
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5705
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5706
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5768
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5769
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5769
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5770
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5770
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5771
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5771
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5772
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5772
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5773
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5836
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5837
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5837
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5838
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5838
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5839
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5839
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5840
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5840
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5841
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5891
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5892
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5892
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5893
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5893
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5894
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5894
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5895
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5895
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5896
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5945
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5946
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5946
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5947
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5947
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5948
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5948
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5949
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 5949
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 5950
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6000
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6001
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6001
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6002
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6002
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6003
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6003
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6004
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6004
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6005
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6056
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6057
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6057
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6058
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6058
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6059
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6059
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6060
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6060
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6061
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6112
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6113
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6113
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6114
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6114
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6115
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6115
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6116
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6116
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6117
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6173
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6173
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6174
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6174
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6175
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6175
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6176
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6176
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6177
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6177
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6178
end step
get new action

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6227
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6228
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6228
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6229
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6229
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6230
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6230
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6231
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6231
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6232
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6292
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6293
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6293
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6294
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6294
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6295
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6295
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6296
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6296
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6297
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6361
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6362
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6362
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6363
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6363
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6364
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6364
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6365
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6365
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6366
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6428
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6429
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6429
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6430
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6430
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6431
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6431
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6432
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6432
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6433
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6496
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6497
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6497
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6498
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6498
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6499
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6499
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6500
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6500
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6501
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6563
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6564
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6564
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6565
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6565
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6566
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6566
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6567
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6567
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6568
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6631
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6632
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6632
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6633
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6633
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6634
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6634
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6635
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6635
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6636
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6700
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6701
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6701
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6702
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6702
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6703
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6703
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6704
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6704
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6705
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6769
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6770
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6770
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6771
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6771
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6772
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6772
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6773
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6773
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6774
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6838
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6839
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6839
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6840
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6840
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6841
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6841
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6842
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6842
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6843
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6909
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6909
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6910
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6910
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6911
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6911
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6912
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6912
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6913
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6913
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6914
end step
get new action

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6973
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6974
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6974
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6975
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6975
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6976
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6976
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6977
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 6977
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 6978
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7042
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7043
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7043
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7044
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7044
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7045
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7045
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7046
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7046
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7047
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7110
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7111
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7111
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7112
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7112
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7113
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7113
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7114
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7114
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7115
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7180
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7181
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7181
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7182
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7182
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7183
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7183
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7184
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7184
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7185
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7249
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7250
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7250
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7251
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7251
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7252
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7252
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7253
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7253
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7254
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7316
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7317
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7317
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7318
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7318
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7319
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7319
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7320
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7320
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7321
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7372
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7373
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7373
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7374
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7374
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7375
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7375
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7376
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7376
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7377
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7433
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7434
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7434
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7435
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7435
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7436
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7436
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7437
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7437
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7438
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7493
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7494
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7494
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7495
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7495
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7496
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7496
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7497
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7497
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7498
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7557
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7558
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7558
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7559
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7559
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7560
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7560
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7561
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7561
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7562
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7626
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7627
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7627
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7628
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7628
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7629
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7629
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7630
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7630
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7631
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7693
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7694
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7694
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7695
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7695
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7696
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7696
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7697
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7697
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7698
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7747
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7748
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7748
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7749
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7749
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7750
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7750
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7751
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7751
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7752
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7806
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7807
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7807
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7808
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7808
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7809
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7809
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7810
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7810
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7811
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7868
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7869
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7869
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7870
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7870
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7871
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7871
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7872
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7872
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7873
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7930
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7931
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7931
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7932
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7932
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7933
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7933
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7934
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7934
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7935
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7998
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 7999
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 7999
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8000
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8000
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8001
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8001
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8002
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8002
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8003
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8066
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8067
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8067
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8068
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8068
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8069
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8069
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8070
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8070
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8071
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8133
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8134
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8134
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8135
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8135
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8136
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8136
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8137
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8137
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8138
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8199
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8200
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8200
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8201
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8201
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8202
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8202
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8203
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8203
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8204
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8266
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8267
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8267
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8268
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8268
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8269
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8269
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8270
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8270
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8271
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8336
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8337
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8337
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8338
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8338
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8339
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8339
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8340
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8340
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8341
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8405
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8406
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8406
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8407
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8407
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8408
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8408
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8409
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8409
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8410
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8476
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8477
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8477
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8478
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8478
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8479
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8479
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8480
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8480
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8481
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8543
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8544
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8544
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8545
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8545
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8546
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8546
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8547
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8547
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8548
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8602
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8603
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8603
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8604
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8604
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8605
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8605
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8606
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8606
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8607
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8663
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8663
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8664
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8664
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8665
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8665
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8666
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8666
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8667
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8667
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8668
end step
get new action

up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8729
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8729
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8730
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8730
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8731
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8731
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8732
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8732
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8733
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8733
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8734
end step
get new act

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8785
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8786
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8786
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8787
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8787
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8788
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8788
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8789
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8789
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8790
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8841
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8841
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8842
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8842
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8843
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8843
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8844
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8844
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8845
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8845
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8846
end step
get new action

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8899
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8900
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8900
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8901
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8901
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8902
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8902
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8903
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8903
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8904
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8955
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8956
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8956
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8957
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8957
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8958
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8958
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8959
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 8959
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 8960
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9012
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9013
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9013
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9014
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9014
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9015
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9015
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9016
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9016
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9017
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9070
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9071
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9071
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9072
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9072
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9073
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9073
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9074
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9074
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9075
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9129
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9130
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9130
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9131
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9131
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9132
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9132
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9133
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9133
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9134
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9192
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9193
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9193
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9194
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9194
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9195
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9195
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9196
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9196
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9197
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9249
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9250
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9250
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9251
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9251
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9252
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9252
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9253
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9253
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9254
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9310
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9311
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9311
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9312
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9312
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9313
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9313
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9314
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9314
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9315
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9371
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9371
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9372
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9372
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9373
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9373
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9374
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9374
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9375
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9375
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9376
end step
get new action

new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9431
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9431
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9432
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9432
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9433
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9433
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9434
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9434
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9435
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9435
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9436
end step
get new action

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9497
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9498
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9498
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9499
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9499
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9500
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9500
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9501
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9501
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9502
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9566
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9567
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9567
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9568
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9568
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9569
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9569
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9570
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9570
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9571
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9623
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9624
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9624
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9625
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9625
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9626
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9626
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9627
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9627
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9628
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9681
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9682
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9682
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9683
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9683
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9684
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9684
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9685
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9685
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9686
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9740
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9741
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9741
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9742
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9742
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9743
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9743
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9744
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9744
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9745
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start s

new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9802
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9802
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9803
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9803
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9804
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9804
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9805
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9805
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9806
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9806
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9807
end step
get new action

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9864
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9865
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9865
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9866
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9866
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9867
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9867
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9868
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9868
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9869
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) te

new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9933
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9933
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9934
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9934
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9935
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9935
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9936
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9936
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9937
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 9937
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 9938
end step
get new action

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10001
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10002
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10002
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10003
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10003
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10004
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10004
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10005
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10005
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10006
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBac

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10068
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10069
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10069
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10070
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10070
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10071
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10071
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10072
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10072
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10073
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBac

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10128
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10129
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10129
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10130
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10130
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10131
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10131
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10132
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10132
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10133
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBac

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10188
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10189
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10189
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10190
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10190
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10191
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10191
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10192
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10192
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10193
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10249
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10250
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10250
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10251
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10251
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10252
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10252
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10253
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10253
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10254
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBac

up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10310
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10310
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10311
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10311
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10312
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10312
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10313
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10313
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10314
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10314
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10315
end step


get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10369
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10370
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10370
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10371
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10371
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10372
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10372
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10373
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10373
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10374
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBac

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10436
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10437
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10437
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10438
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10438
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10439
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10439
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10440
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10440
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10441
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBac

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10506
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10507
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10507
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10508
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10508
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10509
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10509
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10510
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10510
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10511
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBac

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10573
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10574
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10574
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10575
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10575
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10576
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10576
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10577
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10577
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10578
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(

new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10641
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10641
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10642
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10642
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10643
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10643
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10644
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10644
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10645
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10645
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10646
end step
get

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10707
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10708
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10708
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10709
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10709
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10710
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10710
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10711
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10711
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10712
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBac

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10772
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10773
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10773
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10774
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10774
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10775
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10775
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10776
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10776
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10777
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10840
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10841
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10841
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10842
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10842
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10843
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10843
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10844
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10844
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10845
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBac

trying to book [2, 1] 1
not bookable
step 10907
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10907
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10908
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10908
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10909
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10909
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10910
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10910
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10911
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10911
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10912
end step
get new action
ten

new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10978
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10978
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10979
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10979
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10980
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10980
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10981
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10981
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10982
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 10982
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 10983
end step
get

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11046
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11047
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11047
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11048
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11048
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11049
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11049
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11050
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11050
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11051
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11113
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11114
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11114
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11115
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11115
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11116
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11116
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11117
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11117
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11118
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBac

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11184
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11185
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11185
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11186
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11186
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11187
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11187
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11188
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11188
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11189
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBac

new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11240
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11240
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11241
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11241
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11242
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11242
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11243
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11243
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11244
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11244
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11245
end step
get

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11304
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11305
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11305
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11306
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11306
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11307
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11307
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11308
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11308
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11309
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11367
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11368
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11368
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11369
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11369
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11370
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11370
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11371
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11371
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11372
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBac

tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11436
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11437
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11437
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11438
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11438
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11439
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11439
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11440
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11440
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11441
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11501
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11502
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11502
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11503
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11503
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11504
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11504
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11505
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11505
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11506
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBac

get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11569
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11570
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11570
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11571
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11571
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11572
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11572
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11573
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBackward>) tensor(0)
start step 11573
up
new pos [0, 1]
trying to book [2, 1] 1
not bookable
step 11574
end step
get new action
tensor([0.2658, 0.2534, 0.2315, 0.2493], grad_fn=<SoftmaxBac

KeyboardInterrupt: 

In [None]:
print(env.state.sum())

In [None]:
env.appt_idx