In [1]:
import findgaps
import generate_randomip
import gurobiutils
import gymenv_v2
# import gymenv
import solverutils
import testgymenv

In [2]:
import tensorflow as tf
import numpy as np
import random
from gymenv_v2 import make_multiple_env
import wandb
wandb.login()
run=wandb.init(project="finalproject", entity="ieor-4575", tags=["training-easy"])
#run=wandb.init(project="finalproject", entity="ieor-4575", tags=["training-hard"])
#run=wandb.init(project="finalproject", entity="ieor-4575", tags=["test"])

[34m[1mwandb[0m: Currently logged in as: [33mieor-4575[0m (use `wandb login --relogin` to force relogin)


In [3]:
# Setup: You may generate your own instances on which you train the cutting agent.
custom_config = {
    "load_dir"        : 'instances/randomip_n60_m60',   # this is the location of the randomly generated instances (you may specify a different directory)
    "idx_list"        : list(range(20)),                # take the first 20 instances from the directory
    "timelimit"       : 50,                             # the maximum horizon length is 50
    "reward_type"     : 'obj'                           # DO NOT CHANGE reward_type
}

# Easy Setup: Use the following environment settings. We will evaluate your agent with the same easy config below:
easy_config = {
    "load_dir"        : 'instances/train_10_n60_m60',
    "idx_list"        : list(range(10)),
    "timelimit"       : 50,
    "reward_type"     : 'obj'
}

# Hard Setup: Use the following environment settings. We will evaluate your agent with the same hard config below:
hard_config = {
    "load_dir"        : 'instances/train_100_n60_m60',
    "idx_list"        : list(range(99)),
    "timelimit"       : 50,
    "reward_type"     : 'obj'
}

In [4]:
def calS(hg):
    h = hg[0]
    g = hg[1]
    p = tf.linalg.matmul(g,tf.transpose(h))
    p = tf.reduce_mean(p,axis=1)
    p = tf.linalg.normalize(p, ord=1)
    p = tf.nn.softmax(p[0])
    return p

In [5]:
def discounted_rewards(r, gamma):
    discounted_r = np.zeros_like(r)
    running_sum = 0
    for i in reversed(range(0,len(r))):
        discounted_r[i] = running_sum * gamma + r[i]
        running_sum = discounted_r[i]
    return list(discounted_r)

In [6]:
class Policy(object):
    
    def __init__(self, lr):

        self.model = tf.keras.models.Sequential([
                    tf.keras.layers.Input(shape=(1, 61)),
#                     tf.keras.layers.Dense(64, activation = "tanh"),
                    tf.keras.layers.LSTM(32, activation = "tanh"),
                    tf.keras.layers.Dense(10)
                ])

        self.optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
        
    
    def compute_prob(self, s):
        C = np.concatenate((s[0],np.array([s[1]]).T),axis=1)
        D = np.concatenate((s[3],np.array([s[4]]).T),axis=1)
        C = C.reshape(C.shape[0],1,C.shape[1])
        D = D.reshape(D.shape[0],1,D.shape[1])
        h = self.model(C)
        g = self.model(D)
        prob = tf.keras.layers.Lambda(calS)([h, g])
        prob = tf.cast(prob, tf.double)
        return prob.numpy()
    
    def train(self, states, actions, Qs):

        with tf.GradientTape() as tape:
            
            prob_selected = []

            for i in range(len(states)):
                C = np.concatenate((states[i][0],np.array([states[i][1]]).T),axis=1)
                D = np.concatenate((states[i][3],np.array([states[i][4]]).T),axis=1)
                C = C.reshape(C.shape[0],1,C.shape[1])
                D = D.reshape(D.shape[0],1,D.shape[1])
                h = self.model(C)
                g = self.model(D)
                prob = tf.keras.layers.Lambda(calS)([h, g])
                prob = tf.cast(prob, tf.double)

                prob_selected += [prob[actions[i]]]

            prob_selected = tf.convert_to_tensor(prob_selected)

            prob_selected += 1e-8

            loss = -tf.reduce_mean(Qs * tf.math.log(prob_selected))

            gradients = tape.gradient(loss, self.model.trainable_variables)  

            self.optimizer.apply_gradients(zip(gradients, self.model.trainable_variables))
            
        return loss.numpy()

In [7]:
env = make_multiple_env(**easy_config) 

loading training instances, dir instances/train_10_n60_m60 idx 0
loading training instances, dir instances/train_10_n60_m60 idx 1
loading training instances, dir instances/train_10_n60_m60 idx 2
loading training instances, dir instances/train_10_n60_m60 idx 3
loading training instances, dir instances/train_10_n60_m60 idx 4
loading training instances, dir instances/train_10_n60_m60 idx 5
loading training instances, dir instances/train_10_n60_m60 idx 6
loading training instances, dir instances/train_10_n60_m60 idx 7
loading training instances, dir instances/train_10_n60_m60 idx 8
loading training instances, dir instances/train_10_n60_m60 idx 9


In [8]:
lr = 1e-2  
numtrajs = 5 
gamma = .99
sigma = 0.02
epsilon = 0.6
iterations = 40
actor = Policy(lr) 
actor.model.build()

In [9]:
rrecord = []

for e in range(iterations):
    OBS = [] 
    ACTS = [] 
    ADS = []  
    VAL = []
    
    vas = actor.model.trainable_variables # set trainable_vars same for every iteration
    grashape = np.random.multivariate_normal(np.zeros(len(vas)), np.identity(len(vas)), 1).T 
    gradients = np.zeros_like(grashape)
    t = 0
    
    for num in range(numtrajs):

        s = env.reset() # get new IP
        
        done = False
        obss = []  # states
        acts = []   # actions
        rs = []
        repisode = 0
        counter = 0
        while not done:
            
            prob = actor.compute_prob(s)

            rand1 = random.random()
            if rand1>epsilon:
                action = list(np.random.randint(0, s[-1].size, 1))  
            else:
                action = [int(np.argmax(actor.compute_prob(s)))]
            
            acts += [action]
            obss += [s]
            
            s, r, done, _ = env.step(action)
            done_ = 1 if done else 0
            rs += [r]
            
            repisode += r
            print("ite",e,"traj",num,"step",counter, "action", action, "argmax", int(np.argmax(actor.compute_prob(s))), repisode)
            t += 1
            counter += 1
            
        VAL += discounted_rewards(rs, gamma)
        OBS += obss
        ACTS += acts
            
        rrecord += [repisode]
        fixedWindow=20
        movingAverage=0
        if len(rrecord) >= fixedWindow:
            movingAverage=np.mean(rrecord[len(rrecord)-fixedWindow:len(rrecord)-1])
        wandb.log({ "Training reward" : rrecord[-1], "training reward moving average" : movingAverage})


    actor.train(OBS,ACTS,np.array(VAL))


Using license file /Users/lan/gurobi.lic
ite 0 traj 0 step 0 action [36] argmax 1 0.0003492276023280283
ite 0 traj 0 step 1 action [1] argmax 1 0.0006329453194666712
ite 0 traj 0 step 2 action [1] argmax 1 0.0009394214575877413
ite 0 traj 0 step 3 action [58] argmax 1 0.0012637171284950455
ite 0 traj 0 step 4 action [1] argmax 1 0.01572065219443175
ite 0 traj 0 step 5 action [1] argmax 1 0.052210136439498456
ite 0 traj 0 step 6 action [1] argmax 1 0.05538015366573745
ite 0 traj 0 step 7 action [50] argmax 1 0.05561895482696855
ite 0 traj 0 step 8 action [1] argmax 1 0.06014018590030901
ite 0 traj 0 step 9 action [57] argmax 1 0.060874226114265184
ite 0 traj 0 step 10 action [38] argmax 1 0.06234596262993364
ite 0 traj 0 step 11 action [45] argmax 1 0.06649476410075295
ite 0 traj 0 step 12 action [4] argmax 1 0.06717708095402486
ite 0 traj 0 step 13 action [45] argmax 1 0.07662485670380192
ite 0 traj 0 step 14 action [21] argmax 1 0.08060318980460579
ite 0 traj 0 step 15 action [1] argm

ite 0 traj 2 step 34 action [87] argmax 0 0.11534908185194581
ite 0 traj 2 step 35 action [88] argmax 0 0.11534908185240056
ite 0 traj 2 step 36 action [0] argmax 0 0.1153490818528553
ite 0 traj 2 step 37 action [0] argmax 0 0.1153490818528553
ite 0 traj 2 step 38 action [0] argmax 0 0.1153490818537648
ite 0 traj 2 step 39 action [11] argmax 0 0.11534908185421955
ite 0 traj 2 step 40 action [71] argmax 0 0.1153490818546743
ite 0 traj 2 step 41 action [0] argmax 0 0.11534908185512904
ite 0 traj 2 step 42 action [0] argmax 0 0.11534908185512904
ite 0 traj 2 step 43 action [10] argmax 0 0.11534908185512904
ite 0 traj 2 step 44 action [0] argmax 0 0.11534908185558379
ite 0 traj 2 step 45 action [0] argmax 0 0.11534908185603854
ite 0 traj 2 step 46 action [0] argmax 0 0.11534908185649329
ite 0 traj 2 step 47 action [0] argmax 0 0.11534908185694803
ite 0 traj 2 step 48 action [42] argmax 0 0.11534908185740278
ite 0 traj 2 step 49 action [0] argmax 0 0.11534908185785753
ite 0 traj 3 step 0 ac

ite 1 traj 0 step 18 action [1] argmax 1 0.11023152771258538
ite 1 traj 0 step 19 action [1] argmax 1 0.11028225744485098
ite 1 traj 0 step 20 action [2] argmax 1 0.11030185903064194
ite 1 traj 0 step 21 action [28] argmax 1 0.11045694037102294
ite 1 traj 0 step 22 action [1] argmax 1 0.11062026386662183
ite 1 traj 0 step 23 action [1] argmax 1 0.11070663425471139
ite 1 traj 0 step 24 action [71] argmax 1 0.11103264286430203
ite 1 traj 0 step 25 action [1] argmax 1 0.11104281345888012
ite 1 traj 0 step 26 action [1] argmax 1 0.1110483865650167
ite 1 traj 0 step 27 action [71] argmax 1 0.11107150446605374
ite 1 traj 0 step 28 action [1] argmax 1 0.11107977948176995
ite 1 traj 0 step 29 action [55] argmax 1 0.11109359614147252
ite 1 traj 0 step 30 action [1] argmax 1 0.11110514617030276
ite 1 traj 0 step 31 action [1] argmax 1 0.11111177033717468
ite 1 traj 0 step 32 action [50] argmax 1 0.11111597956278274
ite 1 traj 0 step 33 action [1] argmax 1 0.11111818290191877
ite 1 traj 0 step 34

ite 1 traj 3 step 3 action [0] argmax 0 0.10131109041594755
ite 1 traj 3 step 4 action [16] argmax 0 0.1013110904164023
ite 1 traj 3 step 5 action [58] argmax 0 0.10131109041685704
ite 1 traj 3 step 6 action [35] argmax 0 0.10131109041731179
ite 1 traj 3 step 7 action [22] argmax 0 0.10131109041822128
ite 1 traj 3 step 8 action [26] argmax 0 0.10131109041822128
ite 1 traj 3 step 9 action [7] argmax 0 0.10131109041913078
ite 1 traj 3 step 10 action [0] argmax 0 0.10131109041958553
ite 1 traj 3 step 11 action [0] argmax 0 0.10131109041958553
ite 1 traj 3 step 12 action [55] argmax 0 0.10131109042004027
ite 1 traj 3 step 13 action [48] argmax 0 0.10131109042049502
ite 1 traj 3 step 14 action [0] argmax 0 0.10131109042049502
ite 1 traj 3 step 15 action [0] argmax 0 0.10131109042140451
ite 1 traj 3 step 16 action [0] argmax 0 0.10131109042231401
ite 1 traj 3 step 17 action [0] argmax 0 0.10131109042276876
ite 1 traj 3 step 18 action [35] argmax 0 0.10637035270974593
ite 1 traj 3 step 19 act

ite 2 traj 0 step 39 action [0] argmax 0 0.60724780731789
ite 2 traj 0 step 40 action [0] argmax 0 0.6072478073185721
ite 2 traj 0 step 41 action [0] argmax 0 0.6072478073194816
ite 2 traj 0 step 42 action [70] argmax 0 0.6072478073206184
ite 2 traj 0 step 43 action [33] argmax 0 0.6072478073213006
ite 2 traj 0 step 44 action [0] argmax 0 0.6072478073215279
ite 2 traj 0 step 45 action [0] argmax 0 0.6072478073217553
ite 2 traj 0 step 46 action [90] argmax 0 0.6072478073217553
ite 2 traj 0 step 47 action [48] argmax 0 0.6072478073217553
ite 2 traj 0 step 48 action [27] argmax 0 0.6072478073222101
ite 2 traj 0 step 49 action [0] argmax 0 0.6072478073283492
ite 2 traj 1 step 0 action [0] argmax 0 0.7082653196207502
ite 2 traj 1 step 1 action [0] argmax 0 0.7082653196221145
ite 2 traj 1 step 2 action [0] argmax 0 0.7082653196225692
ite 2 traj 1 step 3 action [60] argmax 0 0.7082653196225692
ite 2 traj 1 step 4 action [0] argmax 0 0.7082653196225692
ite 2 traj 1 step 5 action [0] argmax 0 0

ite 2 traj 3 step 25 action [0] argmax 0 1.1150639396896622
ite 2 traj 3 step 26 action [0] argmax 0 1.115063939690117
ite 2 traj 3 step 27 action [13] argmax 0 1.115063939690117
ite 2 traj 3 step 28 action [68] argmax 0 1.115063939690117
ite 2 traj 3 step 29 action [72] argmax 0 1.1150639396905717
ite 2 traj 3 step 30 action [4] argmax 0 1.115063939690799
ite 2 traj 3 step 31 action [81] argmax 0 1.1150639396914812
ite 2 traj 3 step 32 action [0] argmax 0 1.1150639396921633
ite 2 traj 3 step 33 action [0] argmax 0 1.1150639396923907
ite 2 traj 3 step 34 action [0] argmax 0 1.115063939692618
ite 2 traj 3 step 35 action [0] argmax 0 1.1150639396928455
ite 2 traj 3 step 36 action [0] argmax 0 1.1150639396930728
ite 2 traj 3 step 37 action [56] argmax 0 1.1150639396933002
ite 2 traj 3 step 38 action [0] argmax 0 1.115063939693755
ite 2 traj 3 step 39 action [0] argmax 0 1.1150639396958013
ite 2 traj 3 step 40 action [0] argmax 0 1.1150639396969382
ite 2 traj 3 step 41 action [64] argmax 0

ite 3 traj 1 step 12 action [34] argmax 0 0.6151751669247005
ite 3 traj 1 step 13 action [0] argmax 0 0.6151751669249279
ite 3 traj 1 step 14 action [0] argmax 0 0.61517516692561
ite 3 traj 1 step 15 action [36] argmax 0 0.6151751669258374
ite 3 traj 1 step 16 action [20] argmax 0 0.6151751669258374
ite 3 traj 1 step 17 action [5] argmax 0 0.6151751669258374
ite 3 traj 1 step 18 action [0] argmax 0 0.6151751669258374
ite 3 traj 1 step 19 action [0] argmax 0 0.6151751669262921
ite 3 traj 1 step 20 action [0] argmax 0 0.6151751669269743
ite 3 traj 1 step 21 action [0] argmax 0 0.6151751669272016
ite 3 traj 1 step 22 action [43] argmax 0 0.615175166927429
ite 3 traj 1 step 23 action [13] argmax 0 0.6151751669276564
ite 3 traj 1 step 24 action [0] argmax 0 0.6151751669278838
ite 3 traj 1 step 25 action [0] argmax 0 0.6151751669281111
ite 3 traj 1 step 26 action [34] argmax 0 0.6151751669285659
ite 3 traj 1 step 27 action [0] argmax 0 0.6151751669287933
ite 3 traj 1 step 28 action [0] argma

ite 3 traj 3 step 48 action [80] argmax 0 0.886741328988137
ite 3 traj 3 step 49 action [0] argmax 0 0.886741328988137
ite 3 traj 4 step 0 action [0] argmax 0 0.9216160135674727
ite 3 traj 4 step 1 action [0] argmax 0 0.9216160135681548
ite 3 traj 4 step 2 action [0] argmax 0 0.9216160135690643
ite 3 traj 4 step 3 action [58] argmax 0 0.9216160135692917
ite 3 traj 4 step 4 action [15] argmax 0 0.921616013569519
ite 3 traj 4 step 5 action [23] argmax 0 0.9216160135699738
ite 3 traj 4 step 6 action [5] argmax 0 0.9216160135704285
ite 3 traj 4 step 7 action [0] argmax 0 0.9216160135704285
ite 3 traj 4 step 8 action [0] argmax 0 0.9216160135706559
ite 3 traj 4 step 9 action [33] argmax 0 0.9216160135708833
ite 3 traj 4 step 10 action [0] argmax 0 0.9216160135708833
ite 3 traj 4 step 11 action [0] argmax 0 0.9216160135711107
ite 3 traj 4 step 12 action [0] argmax 0 0.921616013571338
ite 3 traj 4 step 13 action [0] argmax 0 0.9216160135720202
ite 3 traj 4 step 14 action [0] argmax 0 0.921616

ite 4 traj 1 step 34 action [0] argmax 0 0.11534908186922621
ite 4 traj 1 step 35 action [0] argmax 0 0.11534908186968096
ite 4 traj 1 step 36 action [2] argmax 0 0.1153490818701357
ite 4 traj 1 step 37 action [41] argmax 0 0.1153490818701357
ite 4 traj 1 step 38 action [87] argmax 0 0.1153490818710452
ite 4 traj 1 step 39 action [0] argmax 0 0.1153490818710452
ite 4 traj 1 step 40 action [16] argmax 0 0.11534908187149995
ite 4 traj 1 step 41 action [0] argmax 0 0.11534908187149995
ite 4 traj 1 step 42 action [61] argmax 0 0.1153490818719547
ite 4 traj 1 step 43 action [49] argmax 0 0.11534908187240944
ite 4 traj 1 step 44 action [5] argmax 0 0.11534908187286419
ite 4 traj 1 step 45 action [0] argmax 0 0.11534908187286419
ite 4 traj 1 step 46 action [0] argmax 0 0.11534908187286419
ite 4 traj 1 step 47 action [0] argmax 0 0.11534908187331894
ite 4 traj 1 step 48 action [106] argmax 0 0.11534908187377368
ite 4 traj 1 step 49 action [0] argmax 0 0.11534908187468318
ite 4 traj 2 step 0 ac

ite 4 traj 4 step 20 action [0] argmax 0 0.7022608541865338
ite 4 traj 4 step 21 action [45] argmax 0 0.7022608541865338
ite 4 traj 4 step 22 action [22] argmax 0 0.7022608541869886
ite 4 traj 4 step 23 action [0] argmax 0 0.7022608541878981
ite 4 traj 4 step 24 action [36] argmax 0 0.7022608541883528
ite 4 traj 4 step 25 action [2] argmax 0 0.7022608541888076
ite 4 traj 4 step 26 action [80] argmax 0 0.7022608541892623
ite 4 traj 4 step 27 action [0] argmax 0 0.7022608541897171
ite 4 traj 4 step 28 action [0] argmax 0 0.7022608541901718
ite 4 traj 4 step 29 action [84] argmax 0 0.7022608541906266
ite 4 traj 4 step 30 action [0] argmax 0 0.702260854191536
ite 4 traj 4 step 31 action [36] argmax 0 0.7022608541919908
ite 4 traj 4 step 32 action [0] argmax 0 0.7022608541919908
ite 4 traj 4 step 33 action [20] argmax 0 0.7022608541919908
ite 4 traj 4 step 34 action [60] argmax 0 0.7022608541924455
ite 4 traj 4 step 35 action [0] argmax 0 0.7022608541929003
ite 4 traj 4 step 36 action [0] a

ite 5 traj 2 step 7 action [0] argmax 0 0.6151751669242458
ite 5 traj 2 step 8 action [0] argmax 0 0.6151751669244732
ite 5 traj 2 step 9 action [0] argmax 0 0.6151751669249279
ite 5 traj 2 step 10 action [0] argmax 0 0.6151751669253827
ite 5 traj 2 step 11 action [9] argmax 0 0.6151751669253827
ite 5 traj 2 step 12 action [8] argmax 0 0.61517516692561
ite 5 traj 2 step 13 action [64] argmax 0 0.6151751669258374
ite 5 traj 2 step 14 action [0] argmax 0 0.6151751669265195
ite 5 traj 2 step 15 action [12] argmax 0 0.6151751669272016
ite 5 traj 2 step 16 action [45] argmax 0 0.6151751669276564
ite 5 traj 2 step 17 action [38] argmax 0 0.6151751669276564
ite 5 traj 2 step 18 action [0] argmax 0 0.6151751669276564
ite 5 traj 2 step 19 action [0] argmax 0 0.6151751669281111
ite 5 traj 2 step 20 action [0] argmax 0 0.6151751669285659
ite 5 traj 2 step 21 action [0] argmax 0 0.6151751669287933
ite 5 traj 2 step 22 action [22] argmax 0 0.6151751669290206
ite 5 traj 2 step 23 action [23] argmax 

ite 5 traj 4 step 44 action [65] argmax 0 0.9216160135815699
ite 5 traj 4 step 45 action [2] argmax 0 0.9216160135820246
ite 5 traj 4 step 46 action [0] argmax 0 0.9216160135824794
ite 5 traj 4 step 47 action [20] argmax 0 0.9216160135824794
ite 5 traj 4 step 48 action [0] argmax 0 0.9216160135831615
ite 5 traj 4 step 49 action [3] argmax 0 0.9216160135838436
ite 6 traj 0 step 0 action [12] argmax 0 0.014623114538608206
ite 6 traj 0 step 1 action [0] argmax 0 0.11506393968397788
ite 6 traj 0 step 2 action [54] argmax 0 0.11506393968443263
ite 6 traj 0 step 3 action [0] argmax 0 0.11506393968466
ite 6 traj 0 step 4 action [29] argmax 0 0.11506393968488737
ite 6 traj 0 step 5 action [0] argmax 0 0.21701794461591817
ite 6 traj 0 step 6 action [5] argmax 0 0.2175354338191937
ite 6 traj 0 step 7 action [0] argmax 0 1.1150639396844326
ite 6 traj 0 step 8 action [0] argmax 0 1.1150639396844326
ite 6 traj 0 step 9 action [0] argmax 0 1.1150639396844326
ite 6 traj 0 step 10 action [0] argmax 0 

ite 6 traj 2 step 30 action [0] argmax 0 0.11534908186513348
ite 6 traj 2 step 31 action [0] argmax 0 0.11534908186513348
ite 6 traj 2 step 32 action [0] argmax 0 0.11534908186558823
ite 6 traj 2 step 33 action [17] argmax 0 0.11534908186604298
ite 6 traj 2 step 34 action [0] argmax 0 0.11534908186695247
ite 6 traj 2 step 35 action [0] argmax 0 0.11534908186786197
ite 6 traj 2 step 36 action [0] argmax 0 0.11534908186831672
ite 6 traj 2 step 37 action [0] argmax 0 0.11534908186968096
ite 6 traj 2 step 38 action [0] argmax 0 0.11534908187059045
ite 6 traj 2 step 39 action [0] argmax 0 0.11534908187149995
ite 6 traj 2 step 40 action [38] argmax 0 0.11534908187240944
ite 6 traj 2 step 41 action [0] argmax 0 0.11534908187240944
ite 6 traj 2 step 42 action [85] argmax 0 0.11534908187240944
ite 6 traj 2 step 43 action [0] argmax 0 0.11534908187286419
ite 6 traj 2 step 44 action [0] argmax 0 0.11534908187331894
ite 6 traj 2 step 45 action [0] argmax 0 0.11534908187331894
ite 6 traj 2 step 46 

ite 7 traj 0 step 17 action [0] argmax 0 0.6072478073078855
ite 7 traj 0 step 18 action [0] argmax 0 0.6072478073078855
ite 7 traj 0 step 19 action [0] argmax 0 0.6072478073085676
ite 7 traj 0 step 20 action [0] argmax 0 0.6072478073092498
ite 7 traj 0 step 21 action [37] argmax 0 0.6072478073092498
ite 7 traj 0 step 22 action [0] argmax 0 0.6072478073094771
ite 7 traj 0 step 23 action [18] argmax 0 0.6072478073103866
ite 7 traj 0 step 24 action [0] argmax 0 0.6072478073110688
ite 7 traj 0 step 25 action [0] argmax 0 0.6072478073115235
ite 7 traj 0 step 26 action [68] argmax 0 0.6072478073119782
ite 7 traj 0 step 27 action [0] argmax 0 0.6072478073122056
ite 7 traj 0 step 28 action [0] argmax 0 0.6072478073122056
ite 7 traj 0 step 29 action [0] argmax 0 0.6072478073122056
ite 7 traj 0 step 30 action [0] argmax 0 0.6072478073126604
ite 7 traj 0 step 31 action [0] argmax 0 0.6072478073133425
ite 7 traj 0 step 32 action [0] argmax 0 0.6072478073137972
ite 7 traj 0 step 33 action [21] argm

ite 7 traj 3 step 4 action [27] argmax 0 0.705846150922298
ite 7 traj 3 step 5 action [0] argmax 0 0.705846150922298
ite 7 traj 3 step 6 action [0] argmax 0 0.705846150922298
ite 7 traj 3 step 7 action [0] argmax 0 0.7058461509227527
ite 7 traj 3 step 8 action [0] argmax 0 0.7058461509227527
ite 7 traj 3 step 9 action [33] argmax 0 0.7058461509232075
ite 7 traj 3 step 10 action [20] argmax 0 0.7058461509232075
ite 7 traj 3 step 11 action [0] argmax 0 0.7058461509232075
ite 7 traj 3 step 12 action [32] argmax 0 0.7058461509236622
ite 7 traj 3 step 13 action [0] argmax 0 0.705846150924117
ite 7 traj 3 step 14 action [0] argmax 0 0.705846150924117
ite 7 traj 3 step 15 action [0] argmax 0 0.7058461509250264
ite 7 traj 3 step 16 action [0] argmax 0 0.705846150925936
ite 7 traj 3 step 17 action [52] argmax 0 0.7058461509263907
ite 7 traj 3 step 18 action [0] argmax 0 0.7058461509268454
ite 7 traj 3 step 19 action [0] argmax 0 0.7058461509268454
ite 7 traj 3 step 20 action [15] argmax 0 0.705

ite 8 traj 0 step 40 action [0] argmax 0 1.1150639396939823
ite 8 traj 0 step 41 action [0] argmax 0 1.1150639396942097
ite 8 traj 0 step 42 action [0] argmax 0 1.115063939694437
ite 8 traj 0 step 43 action [0] argmax 0 1.1150639396951192
ite 8 traj 0 step 44 action [0] argmax 0 1.115063939695574
ite 8 traj 0 step 45 action [0] argmax 0 1.1150639396958013
ite 8 traj 0 step 46 action [60] argmax 0 1.1150639396964834
ite 8 traj 0 step 47 action [59] argmax 0 1.1150639396967108
ite 8 traj 0 step 48 action [0] argmax 0 1.1150639396967108
ite 8 traj 0 step 49 action [0] argmax 0 1.1150639396967108
ite 8 traj 1 step 0 action [0] argmax 0 0.10131109041594755
ite 8 traj 1 step 1 action [0] argmax 0 0.10131109041594755
ite 8 traj 1 step 2 action [0] argmax 0 0.10131109041594755
ite 8 traj 1 step 3 action [0] argmax 0 0.10131109041594755
ite 8 traj 1 step 4 action [0] argmax 0 0.1013110904164023
ite 8 traj 1 step 5 action [29] argmax 0 0.1013110904164023
ite 8 traj 1 step 6 action [0] argmax 0 0

ite 8 traj 3 step 26 action [52] argmax 0 0.7082653196302999
ite 8 traj 3 step 27 action [0] argmax 0 0.7082653196312094
ite 8 traj 3 step 28 action [0] argmax 0 0.7082653196316642
ite 8 traj 3 step 29 action [0] argmax 0 0.7082653196316642
ite 8 traj 3 step 30 action [0] argmax 0 0.7082653196316642
ite 8 traj 3 step 31 action [0] argmax 0 0.7082653196316642
ite 8 traj 3 step 32 action [56] argmax 0 0.7082653196316642
ite 8 traj 3 step 33 action [0] argmax 0 0.7082653196321189
ite 8 traj 3 step 34 action [0] argmax 0 0.7082653196325737
ite 8 traj 3 step 35 action [0] argmax 0 0.7082653196330284
ite 8 traj 3 step 36 action [0] argmax 0 0.7082653196334832
ite 8 traj 3 step 37 action [49] argmax 0 0.7082653196339379
ite 8 traj 3 step 38 action [40] argmax 0 0.7082653196343927
ite 8 traj 3 step 39 action [0] argmax 0 0.7082653196343927
ite 8 traj 3 step 40 action [0] argmax 0 0.7082653196343927
ite 8 traj 3 step 41 action [0] argmax 0 0.7082653196343927
ite 8 traj 3 step 42 action [0] argm

ite 9 traj 1 step 13 action [0] argmax 0 0.6151751669258374
ite 9 traj 1 step 14 action [0] argmax 0 0.6151751669262921
ite 9 traj 1 step 15 action [0] argmax 0 0.6151751669262921
ite 9 traj 1 step 16 action [0] argmax 0 0.6151751669269743
ite 9 traj 1 step 17 action [0] argmax 0 0.6151751669272016
ite 9 traj 1 step 18 action [0] argmax 0 0.615175166927429
ite 9 traj 1 step 19 action [6] argmax 0 0.615175166927429
ite 9 traj 1 step 20 action [0] argmax 0 0.615175166927429
ite 9 traj 1 step 21 action [0] argmax 0 0.6151751669283385
ite 9 traj 1 step 22 action [0] argmax 0 0.6151751669290206
ite 9 traj 1 step 23 action [9] argmax 0 0.615175166929248
ite 9 traj 1 step 24 action [0] argmax 0 0.6151751669294754
ite 9 traj 1 step 25 action [0] argmax 0 0.6151751669299301
ite 9 traj 1 step 26 action [51] argmax 0 0.6151751669301575
ite 9 traj 1 step 27 action [62] argmax 0 0.6151751669308396
ite 9 traj 1 step 28 action [38] argmax 0 0.6151751669308396
ite 9 traj 1 step 29 action [0] argmax 0 

ite 9 traj 4 step 0 action [4] argmax 0 0.005615998892835705
ite 9 traj 4 step 1 action [49] argmax 0 0.014397868167861816
ite 9 traj 4 step 2 action [0] argmax 0 0.9216160135679274
ite 9 traj 4 step 3 action [0] argmax 0 0.9216160135681548
ite 9 traj 4 step 4 action [0] argmax 0 0.9216160135686096
ite 9 traj 4 step 5 action [16] argmax 0 0.9216160135688369
ite 9 traj 4 step 6 action [56] argmax 0 0.9216160135690643
ite 9 traj 4 step 7 action [0] argmax 0 0.921616013569519
ite 9 traj 4 step 8 action [0] argmax 0 0.9216160135699738
ite 9 traj 4 step 9 action [55] argmax 0 0.9216160135702012
ite 9 traj 4 step 10 action [0] argmax 0 0.9216160135708833
ite 9 traj 4 step 11 action [0] argmax 0 0.9216160135720202
ite 9 traj 4 step 12 action [0] argmax 0 0.9216160135724749
ite 9 traj 4 step 13 action [0] argmax 0 0.9216160135724749
ite 9 traj 4 step 14 action [49] argmax 0 0.9216160135729297
ite 9 traj 4 step 15 action [0] argmax 0 0.9216160135729297
ite 9 traj 4 step 16 action [0] argmax 0 0

ite 10 traj 1 step 35 action [0] argmax 0 0.6072478073133425
ite 10 traj 1 step 36 action [4] argmax 0 0.6072478073135699
ite 10 traj 1 step 37 action [48] argmax 0 0.6072478073135699
ite 10 traj 1 step 38 action [0] argmax 0 0.6072478073137972
ite 10 traj 1 step 39 action [91] argmax 0 0.6072478073140246
ite 10 traj 1 step 40 action [0] argmax 0 0.6072478073140246
ite 10 traj 1 step 41 action [0] argmax 0 0.6072478073140246
ite 10 traj 1 step 42 action [94] argmax 0 0.607247807314252
ite 10 traj 1 step 43 action [0] argmax 0 0.6072478073147067
ite 10 traj 1 step 44 action [56] argmax 0 0.6072478073149341
ite 10 traj 1 step 45 action [8] argmax 0 0.6072478073151615
ite 10 traj 1 step 46 action [0] argmax 0 0.6072478073156162
ite 10 traj 1 step 47 action [90] argmax 0 0.607247807316071
ite 10 traj 1 step 48 action [72] argmax 0 0.6072478073167531
ite 10 traj 1 step 49 action [80] argmax 0 0.6072478073174352
ite 10 traj 2 step 0 action [0] argmax 0 0.615175166921972
ite 10 traj 2 step 1 

ite 10 traj 4 step 19 action [0] argmax 0 0.1153490818633145
ite 10 traj 4 step 20 action [0] argmax 0 0.11534908186376924
ite 10 traj 4 step 21 action [0] argmax 0 0.11534908186422399
ite 10 traj 4 step 22 action [0] argmax 0 0.11534908186422399
ite 10 traj 4 step 23 action [0] argmax 0 0.11534908186422399
ite 10 traj 4 step 24 action [0] argmax 0 0.11534908186422399
ite 10 traj 4 step 25 action [0] argmax 0 0.11534908186422399
ite 10 traj 4 step 26 action [0] argmax 0 0.11534908186422399
ite 10 traj 4 step 27 action [74] argmax 0 0.11534908186422399
ite 10 traj 4 step 28 action [30] argmax 0 0.11534908186422399
ite 10 traj 4 step 29 action [0] argmax 0 0.11534908186422399
ite 10 traj 4 step 30 action [0] argmax 0 0.11534908186422399
ite 10 traj 4 step 31 action [11] argmax 0 0.11534908186422399
ite 10 traj 4 step 32 action [24] argmax 0 0.11534908186422399
ite 10 traj 4 step 33 action [0] argmax 0 0.11534908186467874
ite 10 traj 4 step 34 action [0] argmax 0 0.11534908186513348
ite 1

ite 11 traj 2 step 3 action [0] argmax 0 0.705846150922298
ite 11 traj 2 step 4 action [0] argmax 0 0.7058461509227527
ite 11 traj 2 step 5 action [0] argmax 0 0.7058461509227527
ite 11 traj 2 step 6 action [0] argmax 0 0.7058461509232075
ite 11 traj 2 step 7 action [0] argmax 0 0.705846150924117
ite 11 traj 2 step 8 action [1] argmax 0 0.7058461509250264
ite 11 traj 2 step 9 action [0] argmax 0 0.7058461509254812
ite 11 traj 2 step 10 action [0] argmax 0 0.7058461509254812
ite 11 traj 2 step 11 action [0] argmax 0 0.705846150925936
ite 11 traj 2 step 12 action [22] argmax 0 0.7058461509263907
ite 11 traj 2 step 13 action [0] argmax 0 0.7058461509268454
ite 11 traj 2 step 14 action [12] argmax 0 0.7058461509268454
ite 11 traj 2 step 15 action [36] argmax 0 0.7058461509268454
ite 11 traj 2 step 16 action [0] argmax 0 0.7058461509268454
ite 11 traj 2 step 17 action [26] argmax 0 0.7058461509268454
ite 11 traj 2 step 18 action [68] argmax 0 0.7058461509268454
ite 11 traj 2 step 19 action 

ite 11 traj 4 step 38 action [0] argmax 0 0.7082653196321189
ite 11 traj 4 step 39 action [0] argmax 0 0.7082653196325737
ite 11 traj 4 step 40 action [0] argmax 0 0.7082653196330284
ite 11 traj 4 step 41 action [0] argmax 0 0.7082653196339379
ite 11 traj 4 step 42 action [0] argmax 0 0.7082653196343927
ite 11 traj 4 step 43 action [26] argmax 0 0.7082653196348474
ite 11 traj 4 step 44 action [22] argmax 0 0.7082653196353021
ite 11 traj 4 step 45 action [0] argmax 0 0.7082653196357569
ite 11 traj 4 step 46 action [72] argmax 0 0.7082653196362116
ite 11 traj 4 step 47 action [0] argmax 0 0.7082653196362116
ite 11 traj 4 step 48 action [0] argmax 0 0.7082653196362116
ite 11 traj 4 step 49 action [0] argmax 0 0.7082653196362116
ite 12 traj 0 step 0 action [0] argmax 0 0.9216160135674727
ite 12 traj 0 step 1 action [10] argmax 0 0.9216160135681548
ite 12 traj 0 step 2 action [0] argmax 0 0.9216160135690643
ite 12 traj 0 step 3 action [0] argmax 0 0.921616013569519
ite 12 traj 0 step 4 acti

ite 12 traj 2 step 22 action [0] argmax 0 0.615175166927429
ite 12 traj 2 step 23 action [0] argmax 0 0.6151751669276564
ite 12 traj 2 step 24 action [0] argmax 0 0.6151751669278838
ite 12 traj 2 step 25 action [32] argmax 0 0.6151751669283385
ite 12 traj 2 step 26 action [0] argmax 0 0.6151751669287933
ite 12 traj 2 step 27 action [0] argmax 0 0.615175166929248
ite 12 traj 2 step 28 action [10] argmax 0 0.6151751669294754
ite 12 traj 2 step 29 action [42] argmax 0 0.6151751669299301
ite 12 traj 2 step 30 action [0] argmax 0 0.6151751669303849
ite 12 traj 2 step 31 action [0] argmax 0 0.6151751669303849
ite 12 traj 2 step 32 action [44] argmax 0 0.6151751669308396
ite 12 traj 2 step 33 action [0] argmax 0 0.6151751669312944
ite 12 traj 2 step 34 action [0] argmax 0 0.6151751669317491
ite 12 traj 2 step 35 action [0] argmax 0 0.6151751669319765
ite 12 traj 2 step 36 action [0] argmax 0 0.6151751669324312
ite 12 traj 2 step 37 action [3] argmax 0 0.6151751669326586
ite 12 traj 2 step 38 

ite 13 traj 0 step 6 action [0] argmax 0 0.7022608541842601
ite 13 traj 0 step 7 action [26] argmax 0 0.7022608541856243
ite 13 traj 0 step 8 action [0] argmax 0 0.7022608541860791
ite 13 traj 0 step 9 action [18] argmax 0 0.7022608541869886
ite 13 traj 0 step 10 action [24] argmax 0 0.7022608541874433
ite 13 traj 0 step 11 action [22] argmax 0 0.7022608541874433
ite 13 traj 0 step 12 action [27] argmax 0 0.7022608541878981
ite 13 traj 0 step 13 action [13] argmax 0 0.7022608541883528
ite 13 traj 0 step 14 action [45] argmax 0 0.7022608541888076
ite 13 traj 0 step 15 action [67] argmax 0 0.7022608541888076
ite 13 traj 0 step 16 action [0] argmax 0 0.7022608541892623
ite 13 traj 0 step 17 action [19] argmax 0 0.7022608541892623
ite 13 traj 0 step 18 action [63] argmax 0 0.7022608541892623
ite 13 traj 0 step 19 action [0] argmax 0 0.7022608541892623
ite 13 traj 0 step 20 action [73] argmax 0 0.7022608541897171
ite 13 traj 0 step 21 action [66] argmax 0 0.7022608541901718
ite 13 traj 0 st

ite 13 traj 2 step 41 action [99] argmax 0 0.6072478073174352
ite 13 traj 2 step 42 action [55] argmax 0 0.60724780731789
ite 13 traj 2 step 43 action [0] argmax 0 0.6072478073181173
ite 13 traj 2 step 44 action [0] argmax 0 0.6072478073181173
ite 13 traj 2 step 45 action [0] argmax 0 0.6072478073185721
ite 13 traj 2 step 46 action [0] argmax 0 0.6072478073190268
ite 13 traj 2 step 47 action [11] argmax 0 0.6072478073194816
ite 13 traj 2 step 48 action [0] argmax 0 0.6072478073201637
ite 13 traj 2 step 49 action [0] argmax 0 0.6072478073206184
ite 13 traj 3 step 0 action [0] argmax 0 0.11506393968329576
ite 13 traj 3 step 1 action [18] argmax 0 0.11506393968352313
ite 13 traj 3 step 2 action [0] argmax 0 0.18152694826108018
ite 13 traj 3 step 3 action [0] argmax 0 1.1150639396832958
ite 13 traj 3 step 4 action [2] argmax 0 1.1150639396837505
ite 13 traj 3 step 5 action [0] argmax 0 1.1150639396839779
ite 13 traj 3 step 6 action [0] argmax 0 1.1150639396842053
ite 13 traj 3 step 7 actio

ite 14 traj 0 step 26 action [62] argmax 0 0.6151751669326586
ite 14 traj 0 step 27 action [60] argmax 0 0.615175166932886
ite 14 traj 0 step 28 action [41] argmax 0 0.6151751669331134
ite 14 traj 0 step 29 action [26] argmax 0 0.6151751669335681
ite 14 traj 0 step 30 action [0] argmax 0 0.6151751669335681
ite 14 traj 0 step 31 action [0] argmax 0 0.6151751669335681
ite 14 traj 0 step 32 action [8] argmax 0 0.6151751669344776
ite 14 traj 0 step 33 action [40] argmax 0 0.6151751669344776
ite 14 traj 0 step 34 action [0] argmax 0 0.6151751669351597
ite 14 traj 0 step 35 action [0] argmax 0 0.6151751669353871
ite 14 traj 0 step 36 action [30] argmax 0 0.6151751669356145
ite 14 traj 0 step 37 action [61] argmax 0 0.6151751669358418
ite 14 traj 0 step 38 action [12] argmax 0 0.6151751669358418
ite 14 traj 0 step 39 action [61] argmax 0 0.6151751669358418
ite 14 traj 0 step 40 action [21] argmax 0 0.6151751669358418
ite 14 traj 0 step 41 action [48] argmax 0 0.6151751669360692
ite 14 traj 0 

ite 14 traj 3 step 10 action [23] argmax 0 0.7022608541828959
ite 14 traj 3 step 11 action [0] argmax 0 0.7022608541833506
ite 14 traj 3 step 12 action [8] argmax 0 0.7022608541838053
ite 14 traj 3 step 13 action [0] argmax 0 0.7022608541842601
ite 14 traj 3 step 14 action [9] argmax 0 0.7022608541851696
ite 14 traj 3 step 15 action [0] argmax 0 0.7022608541856243
ite 14 traj 3 step 16 action [0] argmax 0 0.7022608541856243
ite 14 traj 3 step 17 action [0] argmax 0 0.7022608541856243
ite 14 traj 3 step 18 action [0] argmax 0 0.7022608541856243
ite 14 traj 3 step 19 action [14] argmax 0 0.7022608541860791
ite 14 traj 3 step 20 action [0] argmax 0 0.7022608541865338
ite 14 traj 3 step 21 action [42] argmax 0 0.7022608541869886
ite 14 traj 3 step 22 action [0] argmax 0 0.7022608541869886
ite 14 traj 3 step 23 action [0] argmax 0 0.7022608541874433
ite 14 traj 3 step 24 action [0] argmax 0 0.7022608541878981
ite 14 traj 3 step 25 action [0] argmax 0 0.7022608541888076
ite 14 traj 3 step 26

ite 15 traj 0 step 44 action [24] argmax 0 1.1150639396939823
ite 15 traj 0 step 45 action [0] argmax 0 1.1150639396942097
ite 15 traj 0 step 46 action [0] argmax 0 1.1150639396948918
ite 15 traj 0 step 47 action [92] argmax 0 1.1150639396953466
ite 15 traj 0 step 48 action [30] argmax 0 1.115063939695574
ite 15 traj 0 step 49 action [0] argmax 0 1.1150639396960287
ite 15 traj 1 step 0 action [0] argmax 0 0.7082653196207502
ite 15 traj 1 step 1 action [7] argmax 0 0.7082653196221145
ite 15 traj 1 step 2 action [25] argmax 0 0.7082653196243882
ite 15 traj 1 step 3 action [0] argmax 0 0.7082653196257525
ite 15 traj 1 step 4 action [0] argmax 0 0.708265319626662
ite 15 traj 1 step 5 action [58] argmax 0 0.7082653196275714
ite 15 traj 1 step 6 action [0] argmax 0 0.7082653196280262
ite 15 traj 1 step 7 action [0] argmax 0 0.7082653196280262
ite 15 traj 1 step 8 action [32] argmax 0 0.7082653196284809
ite 15 traj 1 step 9 action [0] argmax 0 0.7082653196289357
ite 15 traj 1 step 10 action [

ite 15 traj 3 step 28 action [74] argmax 0 0.10343623826929615
ite 15 traj 3 step 29 action [0] argmax 0 0.1036208815557984
ite 15 traj 3 step 30 action [62] argmax 0 0.10636990522607448
ite 15 traj 3 step 31 action [0] argmax 0 0.10658215767170987
ite 15 traj 3 step 32 action [24] argmax 0 0.10845810882074147
ite 15 traj 3 step 33 action [0] argmax 0 0.10943633764372862
ite 15 traj 3 step 34 action [0] argmax 0 0.1107248158650691
ite 15 traj 3 step 35 action [0] argmax 0 0.11128346963096192
ite 15 traj 3 step 36 action [42] argmax 0 0.11134125468424827
ite 15 traj 3 step 37 action [0] argmax 0 1.1013110904245877
ite 15 traj 3 step 38 action [0] argmax 0 1.1013110904250425
ite 15 traj 3 step 39 action [76] argmax 0 1.1013110904254972
ite 15 traj 3 step 40 action [49] argmax 0 1.1013110904254972
ite 15 traj 3 step 41 action [0] argmax 0 1.1013110904254972
ite 15 traj 3 step 42 action [0] argmax 0 1.101311090425952
ite 15 traj 3 step 43 action [83] argmax 0 1.1013110904264067
ite 15 traj

ite 16 traj 1 step 13 action [70] argmax 0 0.7022608541819864
ite 16 traj 1 step 14 action [48] argmax 0 0.7022608541819864
ite 16 traj 1 step 15 action [32] argmax 0 0.7022608541819864
ite 16 traj 1 step 16 action [0] argmax 0 0.7022608541819864
ite 16 traj 1 step 17 action [0] argmax 0 0.7022608541819864
ite 16 traj 1 step 18 action [0] argmax 0 0.7022608541824411
ite 16 traj 1 step 19 action [0] argmax 0 0.7022608541828959
ite 16 traj 1 step 20 action [42] argmax 0 0.7022608541838053
ite 16 traj 1 step 21 action [21] argmax 0 0.7022608541842601
ite 16 traj 1 step 22 action [0] argmax 0 0.7022608541851696
ite 16 traj 1 step 23 action [0] argmax 0 0.7022608541856243
ite 16 traj 1 step 24 action [0] argmax 0 0.7022608541856243
ite 16 traj 1 step 25 action [33] argmax 0 0.7022608541856243
ite 16 traj 1 step 26 action [0] argmax 0 0.7022608541856243
ite 16 traj 1 step 27 action [22] argmax 0 0.7022608541856243
ite 16 traj 1 step 28 action [0] argmax 0 0.7022608541856243
ite 16 traj 1 ste

ite 16 traj 3 step 47 action [0] argmax 0 0.6072478073181173
ite 16 traj 3 step 48 action [0] argmax 0 0.6072478073183447
ite 16 traj 3 step 49 action [0] argmax 0 0.6072478073185721
ite 16 traj 4 step 0 action [0] argmax 0 0.9216160135674727
ite 16 traj 4 step 1 action [33] argmax 0 0.9216160135677001
ite 16 traj 4 step 2 action [0] argmax 0 0.9216160135677001
ite 16 traj 4 step 3 action [0] argmax 0 0.9216160135681548
ite 16 traj 4 step 4 action [19] argmax 0 0.9216160135683822
ite 16 traj 4 step 5 action [0] argmax 0 0.9216160135686096
ite 16 traj 4 step 6 action [0] argmax 0 0.9216160135690643
ite 16 traj 4 step 7 action [0] argmax 0 0.9216160135690643
ite 16 traj 4 step 8 action [0] argmax 0 0.9216160135690643
ite 16 traj 4 step 9 action [59] argmax 0 0.9216160135699738
ite 16 traj 4 step 10 action [54] argmax 0 0.9216160135706559
ite 16 traj 4 step 11 action [21] argmax 0 0.9216160135711107
ite 16 traj 4 step 12 action [0] argmax 0 0.9216160135717928
ite 16 traj 4 step 13 action 

ite 17 traj 1 step 32 action [34] argmax 0 1.101311090427771
ite 17 traj 1 step 33 action [30] argmax 0 1.1013110904286805
ite 17 traj 1 step 34 action [40] argmax 0 1.10131109042959
ite 17 traj 1 step 35 action [45] argmax 0 1.10131109042959
ite 17 traj 1 step 36 action [0] argmax 0 1.1013110904300447
ite 17 traj 1 step 37 action [0] argmax 0 1.1013110904300447
ite 17 traj 1 step 38 action [0] argmax 0 1.1013110904304995
ite 17 traj 1 step 39 action [0] argmax 0 1.1013110904309542
ite 17 traj 1 step 40 action [0] argmax 0 1.101311090431409
ite 17 traj 1 step 41 action [0] argmax 0 1.101311090431409
ite 17 traj 1 step 42 action [58] argmax 0 1.101311090431409
ite 17 traj 1 step 43 action [0] argmax 0 1.101311090431409
ite 17 traj 1 step 44 action [0] argmax 0 1.1013110904318637
ite 17 traj 1 step 45 action [62] argmax 0 1.1013110904323185
ite 17 traj 1 step 46 action [0] argmax 0 1.1013110904323185
ite 17 traj 1 step 47 action [0] argmax 0 1.1013110904327732
ite 17 traj 1 step 48 actio

ite 17 traj 4 step 16 action [18] argmax 0 0.11534908185785753
ite 17 traj 4 step 17 action [0] argmax 0 0.11534908185785753
ite 17 traj 4 step 18 action [0] argmax 0 0.11534908185785753
ite 17 traj 4 step 19 action [58] argmax 0 0.11534908185785753
ite 17 traj 4 step 20 action [0] argmax 0 0.11534908185831227
ite 17 traj 4 step 21 action [0] argmax 0 0.11534908185831227
ite 17 traj 4 step 22 action [0] argmax 0 0.11534908185831227
ite 17 traj 4 step 23 action [6] argmax 0 0.11534908185876702
ite 17 traj 4 step 24 action [0] argmax 0 0.11534908185967652
ite 17 traj 4 step 25 action [29] argmax 0 0.11534908186013126
ite 17 traj 4 step 26 action [0] argmax 0 0.11534908186013126
ite 17 traj 4 step 27 action [0] argmax 0 0.115349081862405
ite 17 traj 4 step 28 action [0] argmax 0 0.11534908186467874
ite 17 traj 4 step 29 action [0] argmax 0 0.11534908186513348
ite 17 traj 4 step 30 action [0] argmax 0 0.11534908186649773
ite 17 traj 4 step 31 action [0] argmax 0 0.11534908186740722
ite 17 

ite 18 traj 1 step 49 action [30] argmax 0 0.1153490818710452
ite 18 traj 2 step 0 action [0] argmax 0 0.615175166921972
ite 18 traj 2 step 1 action [31] argmax 0 0.6151751669233363
ite 18 traj 2 step 2 action [42] argmax 0 0.6151751669233363
ite 18 traj 2 step 3 action [0] argmax 0 0.6151751669233363
ite 18 traj 2 step 4 action [0] argmax 0 0.6151751669235637
ite 18 traj 2 step 5 action [40] argmax 0 0.615175166923791
ite 18 traj 2 step 6 action [0] argmax 0 0.6151751669240184
ite 18 traj 2 step 7 action [0] argmax 0 0.6151751669240184
ite 18 traj 2 step 8 action [0] argmax 0 0.6151751669240184
ite 18 traj 2 step 9 action [0] argmax 0 0.6151751669242458
ite 18 traj 2 step 10 action [9] argmax 0 0.6151751669244732
ite 18 traj 2 step 11 action [26] argmax 0 0.6151751669249279
ite 18 traj 2 step 12 action [0] argmax 0 0.6151751669249279
ite 18 traj 2 step 13 action [46] argmax 0 0.6151751669253827
ite 18 traj 2 step 14 action [49] argmax 0 0.6151751669258374
ite 18 traj 2 step 15 action 

ite 18 traj 4 step 34 action [35] argmax 0 0.9216160135815699
ite 18 traj 4 step 35 action [0] argmax 0 0.9216160135817972
ite 18 traj 4 step 36 action [93] argmax 0 0.9216160135820246
ite 18 traj 4 step 37 action [28] argmax 0 0.9216160135824794
ite 18 traj 4 step 38 action [44] argmax 0 0.9216160135829341
ite 18 traj 4 step 39 action [0] argmax 0 0.9216160135831615
ite 18 traj 4 step 40 action [72] argmax 0 0.9216160135833888
ite 18 traj 4 step 41 action [0] argmax 0 0.921616013584071
ite 18 traj 4 step 42 action [0] argmax 0 0.9216160135847531
ite 18 traj 4 step 43 action [0] argmax 0 0.9216160135849805
ite 18 traj 4 step 44 action [56] argmax 0 0.9216160135852078
ite 18 traj 4 step 45 action [0] argmax 0 0.9216160135854352
ite 18 traj 4 step 46 action [0] argmax 0 0.9216160135861173
ite 18 traj 4 step 47 action [0] argmax 0 0.9216160135861173
ite 18 traj 4 step 48 action [97] argmax 0 0.9216160135867995
ite 18 traj 4 step 49 action [0] argmax 0 0.9216160135874816
ite 19 traj 0 step

ite 19 traj 2 step 19 action [0] argmax 0 0.6072478073092498
ite 19 traj 2 step 20 action [0] argmax 0 0.6072478073094771
ite 19 traj 2 step 21 action [0] argmax 0 0.6072478073101593
ite 19 traj 2 step 22 action [0] argmax 0 0.6072478073108414
ite 19 traj 2 step 23 action [0] argmax 0 0.6072478073108414
ite 19 traj 2 step 24 action [0] argmax 0 0.6072478073112961
ite 19 traj 2 step 25 action [0] argmax 0 0.6072478073117509
ite 19 traj 2 step 26 action [0] argmax 0 0.6072478073119782
ite 19 traj 2 step 27 action [58] argmax 0 0.6072478073122056
ite 19 traj 2 step 28 action [46] argmax 0 0.6072478073122056
ite 19 traj 2 step 29 action [56] argmax 0 0.6072478073126604
ite 19 traj 2 step 30 action [0] argmax 0 0.6072478073131151
ite 19 traj 2 step 31 action [0] argmax 0 0.6072478073133425
ite 19 traj 2 step 32 action [0] argmax 0 0.6072478073137972
ite 19 traj 2 step 33 action [0] argmax 0 0.6072478073149341
ite 19 traj 2 step 34 action [0] argmax 0 0.6072478073153889
ite 19 traj 2 step 35

ite 20 traj 0 step 3 action [18] argmax 0 0.11506393968488737
ite 20 traj 0 step 4 action [18] argmax 0 0.1150639396855695
ite 20 traj 0 step 5 action [0] argmax 0 0.1150639396855695
ite 20 traj 0 step 6 action [0] argmax 0 0.11506393968579687
ite 20 traj 0 step 7 action [0] argmax 0 0.11506393968716111
ite 20 traj 0 step 8 action [24] argmax 0 0.1150639396880706
ite 20 traj 0 step 9 action [0] argmax 0 0.11506393968829798
ite 20 traj 0 step 10 action [0] argmax 0 0.11506393968829798
ite 20 traj 0 step 11 action [48] argmax 0 0.11506393968829798
ite 20 traj 0 step 12 action [0] argmax 0 0.11506393968852535
ite 20 traj 0 step 13 action [0] argmax 0 0.1150639396889801
ite 20 traj 0 step 14 action [7] argmax 0 0.11830626715959625
ite 20 traj 0 step 15 action [0] argmax 0 0.12493935441193571
ite 20 traj 0 step 16 action [0] argmax 0 0.1282978489391553
ite 20 traj 0 step 17 action [51] argmax 0 0.1299629905161055
ite 20 traj 0 step 18 action [0] argmax 0 1.1150639396885254
ite 20 traj 0 ste

ite 20 traj 2 step 38 action [0] argmax 0 0.921616013578614
ite 20 traj 2 step 39 action [0] argmax 0 0.921616013578614
ite 20 traj 2 step 40 action [69] argmax 0 0.921616013578614
ite 20 traj 2 step 41 action [0] argmax 0 0.921616013578614
ite 20 traj 2 step 42 action [0] argmax 0 0.9216160135788414
ite 20 traj 2 step 43 action [0] argmax 0 0.9216160135790687
ite 20 traj 2 step 44 action [4] argmax 0 0.9216160135790687
ite 20 traj 2 step 45 action [84] argmax 0 0.9216160135797509
ite 20 traj 2 step 46 action [7] argmax 0 0.9216160135799782
ite 20 traj 2 step 47 action [0] argmax 0 0.9216160135802056
ite 20 traj 2 step 48 action [0] argmax 0 0.9216160135802056
ite 20 traj 2 step 49 action [0] argmax 0 0.921616013580433
ite 20 traj 3 step 0 action [9] argmax 0 2.155465017494862e-05
ite 20 traj 3 step 1 action [0] argmax 0 0.11534908185149106
ite 20 traj 3 step 2 action [33] argmax 0 0.11534908185149106
ite 20 traj 3 step 3 action [30] argmax 0 0.11534908185194581
ite 20 traj 3 step 4 ac

ite 21 traj 0 step 22 action [0] argmax 0 0.7058461509313929
ite 21 traj 0 step 23 action [0] argmax 0 0.7058461509313929
ite 21 traj 0 step 24 action [14] argmax 0 0.7058461509313929
ite 21 traj 0 step 25 action [46] argmax 0 0.7058461509313929
ite 21 traj 0 step 26 action [0] argmax 0 0.7058461509313929
ite 21 traj 0 step 27 action [0] argmax 0 0.7058461509313929
ite 21 traj 0 step 28 action [0] argmax 0 0.7058461509313929
ite 21 traj 0 step 29 action [0] argmax 0 0.7058461509313929
ite 21 traj 0 step 30 action [0] argmax 0 0.7058461509318477
ite 21 traj 0 step 31 action [10] argmax 0 0.7058461509323024
ite 21 traj 0 step 32 action [0] argmax 0 0.7058461509323024
ite 21 traj 0 step 33 action [0] argmax 0 0.7058461509327572
ite 21 traj 0 step 34 action [0] argmax 0 0.7058461509336666
ite 21 traj 0 step 35 action [26] argmax 0 0.7058461509341214
ite 21 traj 0 step 36 action [0] argmax 0 0.7058461509341214
ite 21 traj 0 step 37 action [69] argmax 0 0.7058461509341214
ite 21 traj 0 step 

ite 21 traj 3 step 7 action [0] argmax 0 0.607247807306976
ite 21 traj 3 step 8 action [0] argmax 0 0.607247807306976
ite 21 traj 3 step 9 action [43] argmax 0 0.607247807306976
ite 21 traj 3 step 10 action [62] argmax 0 0.6072478073074308
ite 21 traj 3 step 11 action [0] argmax 0 0.6072478073078855
ite 21 traj 3 step 12 action [50] argmax 0 0.6072478073081129
ite 21 traj 3 step 13 action [0] argmax 0 0.6072478073081129
ite 21 traj 3 step 14 action [0] argmax 0 0.6072478073081129
ite 21 traj 3 step 15 action [51] argmax 0 0.6072478073083403
ite 21 traj 3 step 16 action [0] argmax 0 0.6072478073090224
ite 21 traj 3 step 17 action [0] argmax 0 0.6072478073090224
ite 21 traj 3 step 18 action [4] argmax 0 0.6072478073090224
ite 21 traj 3 step 19 action [9] argmax 0 0.6072478073094771
ite 21 traj 3 step 20 action [34] argmax 0 0.6072478073094771
ite 21 traj 3 step 21 action [76] argmax 0 0.6072478073094771
ite 21 traj 3 step 22 action [14] argmax 0 0.6072478073097045
ite 21 traj 3 step 23 a

ite 22 traj 0 step 41 action [0] argmax 0 0.6072478073203911
ite 22 traj 0 step 42 action [0] argmax 0 0.6072478073203911
ite 22 traj 0 step 43 action [0] argmax 0 0.6072478073206184
ite 22 traj 0 step 44 action [0] argmax 0 0.6072478073208458
ite 22 traj 0 step 45 action [46] argmax 0 0.6072478073210732
ite 22 traj 0 step 46 action [38] argmax 0 0.6072478073210732
ite 22 traj 0 step 47 action [78] argmax 0 0.6072478073213006
ite 22 traj 0 step 48 action [0] argmax 0 0.6072478073219827
ite 22 traj 0 step 49 action [78] argmax 0 0.6072478073231196
ite 22 traj 1 step 0 action [0] argmax 0 0.11506393968329576
ite 22 traj 1 step 1 action [38] argmax 0 0.11506393968329576
ite 22 traj 1 step 2 action [0] argmax 0 0.12531251765562956
ite 22 traj 1 step 3 action [58] argmax 0 0.1616072626627556
ite 22 traj 1 step 4 action [47] argmax 0 0.16234806305919847
ite 22 traj 1 step 5 action [51] argmax 0 0.16705406002574819
ite 22 traj 1 step 6 action [0] argmax 0 1.1150639396830684
ite 22 traj 1 step

ite 22 traj 3 step 26 action [0] argmax 0 0.7022608541874433
ite 22 traj 3 step 27 action [10] argmax 0 0.7022608541878981
ite 22 traj 3 step 28 action [54] argmax 0 0.7022608541883528
ite 22 traj 3 step 29 action [0] argmax 0 0.7022608541883528
ite 22 traj 3 step 30 action [0] argmax 0 0.7022608541883528
ite 22 traj 3 step 31 action [0] argmax 0 0.7022608541888076
ite 22 traj 3 step 32 action [3] argmax 0 0.7022608541897171
ite 22 traj 3 step 33 action [6] argmax 0 0.7022608541906266
ite 22 traj 3 step 34 action [0] argmax 0 0.702260854191536
ite 22 traj 3 step 35 action [0] argmax 0 0.7022608541919908
ite 22 traj 3 step 36 action [71] argmax 0 0.7022608541924455
ite 22 traj 3 step 37 action [45] argmax 0 0.702260854193355
ite 22 traj 3 step 38 action [88] argmax 0 0.702260854193355
ite 22 traj 3 step 39 action [83] argmax 0 0.7022608541938098
ite 22 traj 3 step 40 action [0] argmax 0 0.7022608541942645
ite 22 traj 3 step 41 action [0] argmax 0 0.702260854195174
ite 22 traj 3 step 42 

ite 23 traj 1 step 10 action [21] argmax 0 0.9216160135708833
ite 23 traj 1 step 11 action [0] argmax 0 0.9216160135717928
ite 23 traj 1 step 12 action [0] argmax 0 0.9216160135722475
ite 23 traj 1 step 13 action [0] argmax 0 0.9216160135722475
ite 23 traj 1 step 14 action [53] argmax 0 0.9216160135724749
ite 23 traj 1 step 15 action [0] argmax 0 0.9216160135729297
ite 23 traj 1 step 16 action [25] argmax 0 0.9216160135736118
ite 23 traj 1 step 17 action [4] argmax 0 0.9216160135736118
ite 23 traj 1 step 18 action [76] argmax 0 0.9216160135736118
ite 23 traj 1 step 19 action [41] argmax 0 0.9216160135738392
ite 23 traj 1 step 20 action [49] argmax 0 0.9216160135738392
ite 23 traj 1 step 21 action [0] argmax 0 0.9216160135742939
ite 23 traj 1 step 22 action [3] argmax 0 0.9216160135742939
ite 23 traj 1 step 23 action [45] argmax 0 0.9216160135745213
ite 23 traj 1 step 24 action [21] argmax 0 0.9216160135761129
ite 23 traj 1 step 25 action [50] argmax 0 0.9216160135779319
ite 23 traj 1 s

ite 23 traj 3 step 44 action [0] argmax 0 0.7058461509386689
ite 23 traj 3 step 45 action [0] argmax 0 0.7058461509386689
ite 23 traj 3 step 46 action [102] argmax 0 0.7058461509386689
ite 23 traj 3 step 47 action [0] argmax 0 0.7058461509386689
ite 23 traj 3 step 48 action [98] argmax 0 0.7058461509386689
ite 23 traj 3 step 49 action [0] argmax 0 0.7058461509386689
ite 23 traj 4 step 0 action [0] argmax 0 0.8867413289726755
ite 23 traj 4 step 1 action [0] argmax 0 0.8867413289731303
ite 23 traj 4 step 2 action [14] argmax 0 0.8867413289731303
ite 23 traj 4 step 3 action [60] argmax 0 0.886741328973585
ite 23 traj 4 step 4 action [29] argmax 0 0.886741328973585
ite 23 traj 4 step 5 action [0] argmax 0 0.886741328973585
ite 23 traj 4 step 6 action [0] argmax 0 0.886741328973585
ite 23 traj 4 step 7 action [0] argmax 0 0.886741328973585
ite 23 traj 4 step 8 action [17] argmax 0 0.8867413289740398
ite 23 traj 4 step 9 action [19] argmax 0 0.8867413289740398
ite 23 traj 4 step 10 action [0

ite 24 traj 1 step 29 action [0] argmax 0 0.6151751669299301
ite 24 traj 1 step 30 action [54] argmax 0 0.6151751669301575
ite 24 traj 1 step 31 action [0] argmax 0 0.6151751669306122
ite 24 traj 1 step 32 action [0] argmax 0 0.6151751669306122
ite 24 traj 1 step 33 action [0] argmax 0 0.615175166931067
ite 24 traj 1 step 34 action [0] argmax 0 0.6151751669315217
ite 24 traj 1 step 35 action [0] argmax 0 0.6151751669317491
ite 24 traj 1 step 36 action [0] argmax 0 0.6151751669317491
ite 24 traj 1 step 37 action [0] argmax 0 0.6151751669319765
ite 24 traj 1 step 38 action [0] argmax 0 0.6151751669322039
ite 24 traj 1 step 39 action [0] argmax 0 0.6151751669337955
ite 24 traj 1 step 40 action [1] argmax 0 0.6151751669351597
ite 24 traj 1 step 41 action [50] argmax 0 0.6151751669358418
ite 24 traj 1 step 42 action [0] argmax 0 0.6151751669360692
ite 24 traj 1 step 43 action [73] argmax 0 0.6151751669369787
ite 24 traj 1 step 44 action [0] argmax 0 0.6151751669374335
ite 24 traj 1 step 45 

ite 24 traj 4 step 14 action [17] argmax 0 0.8867413289758588
ite 24 traj 4 step 15 action [0] argmax 0 0.8867413289758588
ite 24 traj 4 step 16 action [59] argmax 0 0.8867413289763135
ite 24 traj 4 step 17 action [0] argmax 0 0.8867413289767683
ite 24 traj 4 step 18 action [0] argmax 0 0.8867413289767683
ite 24 traj 4 step 19 action [0] argmax 0 0.886741328977223
ite 24 traj 4 step 20 action [68] argmax 0 0.886741328977223
ite 24 traj 4 step 21 action [63] argmax 0 0.886741328977223
ite 24 traj 4 step 22 action [0] argmax 0 0.886741328977223
ite 24 traj 4 step 23 action [0] argmax 0 0.886741328977223
ite 24 traj 4 step 24 action [7] argmax 0 0.8867413289776778
ite 24 traj 4 step 25 action [65] argmax 0 0.8867413289785873
ite 24 traj 4 step 26 action [0] argmax 0 0.8867413289785873
ite 24 traj 4 step 27 action [15] argmax 0 0.886741328979042
ite 24 traj 4 step 28 action [0] argmax 0 0.886741328979042
ite 24 traj 4 step 29 action [57] argmax 0 0.886741328979042
ite 24 traj 4 step 30 act

ite 25 traj 1 step 48 action [0] argmax 0 0.702260854191536
ite 25 traj 1 step 49 action [0] argmax 0 0.702260854191536
ite 25 traj 2 step 0 action [0] argmax 0 0.6072478073024286
ite 25 traj 2 step 1 action [0] argmax 0 0.6072478073031107
ite 25 traj 2 step 2 action [0] argmax 0 0.6072478073037928
ite 25 traj 2 step 3 action [42] argmax 0 0.6072478073040202
ite 25 traj 2 step 4 action [15] argmax 0 0.6072478073042475
ite 25 traj 2 step 5 action [0] argmax 0 0.6072478073049297
ite 25 traj 2 step 6 action [0] argmax 0 0.607247807305157
ite 25 traj 2 step 7 action [0] argmax 0 0.6072478073058392
ite 25 traj 2 step 8 action [50] argmax 0 0.6072478073060665
ite 25 traj 2 step 9 action [3] argmax 0 0.6072478073067487
ite 25 traj 2 step 10 action [0] argmax 0 0.6072478073072034
ite 25 traj 2 step 11 action [0] argmax 0 0.6072478073074308
ite 25 traj 2 step 12 action [7] argmax 0 0.6072478073074308
ite 25 traj 2 step 13 action [0] argmax 0 0.6072478073074308
ite 25 traj 2 step 14 action [45] 

ite 25 traj 4 step 32 action [0] argmax 0 0.7058461509282097
ite 25 traj 4 step 33 action [0] argmax 0 0.7058461509286644
ite 25 traj 4 step 34 action [88] argmax 0 0.7058461509286644
ite 25 traj 4 step 35 action [51] argmax 0 0.7058461509286644
ite 25 traj 4 step 36 action [0] argmax 0 0.7058461509286644
ite 25 traj 4 step 37 action [64] argmax 0 0.7058461509295739
ite 25 traj 4 step 38 action [9] argmax 0 0.7058461509300287
ite 25 traj 4 step 39 action [0] argmax 0 0.7058461509304834
ite 25 traj 4 step 40 action [0] argmax 0 0.7058461509313929
ite 25 traj 4 step 41 action [77] argmax 0 0.7058461509323024
ite 25 traj 4 step 42 action [0] argmax 0 0.7058461509323024
ite 25 traj 4 step 43 action [90] argmax 0 0.7058461509327572
ite 25 traj 4 step 44 action [0] argmax 0 0.7058461509332119
ite 25 traj 4 step 45 action [9] argmax 0 0.7058461509332119
ite 25 traj 4 step 46 action [0] argmax 0 0.7058461509332119
ite 25 traj 4 step 47 action [0] argmax 0 0.7058461509332119
ite 25 traj 4 step 

ite 26 traj 2 step 16 action [17] argmax 0 0.9216160135720202
ite 26 traj 2 step 17 action [0] argmax 0 0.9216160135724749
ite 26 traj 2 step 18 action [71] argmax 0 0.9216160135727023
ite 26 traj 2 step 19 action [0] argmax 0 0.9216160135733844
ite 26 traj 2 step 20 action [37] argmax 0 0.9216160135740665
ite 26 traj 2 step 21 action [36] argmax 0 0.9216160135740665
ite 26 traj 2 step 22 action [16] argmax 0 0.9216160135740665
ite 26 traj 2 step 23 action [28] argmax 0 0.9216160135745213
ite 26 traj 2 step 24 action [19] argmax 0 0.921616013574976
ite 26 traj 2 step 25 action [52] argmax 0 0.9216160135752034
ite 26 traj 2 step 26 action [27] argmax 0 0.9216160135756581
ite 26 traj 2 step 27 action [0] argmax 0 0.9216160135758855
ite 26 traj 2 step 28 action [19] argmax 0 0.9216160135765676
ite 26 traj 2 step 29 action [0] argmax 0 0.9216160135770224
ite 26 traj 2 step 30 action [0] argmax 0 0.9216160135770224
ite 26 traj 2 step 31 action [0] argmax 0 0.9216160135772498
ite 26 traj 2 s

ite 27 traj 0 step 0 action [0] argmax 0 0.7022608541797126
ite 27 traj 0 step 1 action [0] argmax 0 0.7022608541801674
ite 27 traj 0 step 2 action [0] argmax 0 0.7022608541810769
ite 27 traj 0 step 3 action [0] argmax 0 0.7022608541810769
ite 27 traj 0 step 4 action [60] argmax 0 0.7022608541810769
ite 27 traj 0 step 5 action [7] argmax 0 0.7022608541810769
ite 27 traj 0 step 6 action [61] argmax 0 0.7022608541810769
ite 27 traj 0 step 7 action [0] argmax 0 0.7022608541810769
ite 27 traj 0 step 8 action [28] argmax 0 0.7022608541815316
ite 27 traj 0 step 9 action [0] argmax 0 0.7022608541819864
ite 27 traj 0 step 10 action [0] argmax 0 0.7022608541819864
ite 27 traj 0 step 11 action [0] argmax 0 0.7022608541824411
ite 27 traj 0 step 12 action [0] argmax 0 0.7022608541828959
ite 27 traj 0 step 13 action [0] argmax 0 0.7022608541828959
ite 27 traj 0 step 14 action [63] argmax 0 0.7022608541828959
ite 27 traj 0 step 15 action [0] argmax 0 0.7022608541833506
ite 27 traj 0 step 16 action [

ite 27 traj 2 step 35 action [0] argmax 0 1.1150639396971656
ite 27 traj 2 step 36 action [0] argmax 0 1.1150639396976203
ite 27 traj 2 step 37 action [0] argmax 0 1.1150639396978477
ite 27 traj 2 step 38 action [18] argmax 0 1.1150639396985298
ite 27 traj 2 step 39 action [27] argmax 0 1.1150639396989845
ite 27 traj 2 step 40 action [0] argmax 0 1.115063939699212
ite 27 traj 2 step 41 action [0] argmax 0 1.1150639396994393
ite 27 traj 2 step 42 action [92] argmax 0 1.1150639396994393
ite 27 traj 2 step 43 action [0] argmax 0 1.1150639396996667
ite 27 traj 2 step 44 action [65] argmax 0 1.1150639397001214
ite 27 traj 2 step 45 action [0] argmax 0 1.1150639397003488
ite 27 traj 2 step 46 action [69] argmax 0 1.1150639397005762
ite 27 traj 2 step 47 action [91] argmax 0 1.1150639397008035
ite 27 traj 2 step 48 action [80] argmax 0 1.1150639397008035
ite 27 traj 2 step 49 action [0] argmax 0 1.1150639397012583
ite 27 traj 3 step 0 action [41] argmax 0 0.0002707345920498483
ite 27 traj 3 s

ite 28 traj 0 step 19 action [0] argmax 0 0.11534908185603854
ite 28 traj 0 step 20 action [58] argmax 0 0.11534908185649329
ite 28 traj 0 step 21 action [23] argmax 0 0.11534908185740278
ite 28 traj 0 step 22 action [0] argmax 0 0.11534908185785753
ite 28 traj 0 step 23 action [15] argmax 0 0.11534908185831227
ite 28 traj 0 step 24 action [0] argmax 0 0.11534908185831227
ite 28 traj 0 step 25 action [0] argmax 0 0.11534908185876702
ite 28 traj 0 step 26 action [0] argmax 0 0.11534908185876702
ite 28 traj 0 step 27 action [0] argmax 0 0.11534908185967652
ite 28 traj 0 step 28 action [0] argmax 0 0.11534908186058601
ite 28 traj 0 step 29 action [0] argmax 0 0.115349081862405
ite 28 traj 0 step 30 action [0] argmax 0 0.11534908186422399
ite 28 traj 0 step 31 action [15] argmax 0 0.11534908186513348
ite 28 traj 0 step 32 action [8] argmax 0 0.11534908186604298
ite 28 traj 0 step 33 action [57] argmax 0 0.11534908186604298
ite 28 traj 0 step 34 action [0] argmax 0 0.11534908186649773
ite 2

ite 28 traj 3 step 3 action [37] argmax 0 0.1013110904164023
ite 28 traj 3 step 4 action [0] argmax 0 0.10131109041685704
ite 28 traj 3 step 5 action [0] argmax 0 0.10131109041685704
ite 28 traj 3 step 6 action [0] argmax 0 0.10131109041685704
ite 28 traj 3 step 7 action [23] argmax 0 0.10131109041685704
ite 28 traj 3 step 8 action [0] argmax 0 0.10131109041731179
ite 28 traj 3 step 9 action [15] argmax 0 0.10131109041776654
ite 28 traj 3 step 10 action [24] argmax 0 0.10131109041776654
ite 28 traj 3 step 11 action [0] argmax 0 0.10131109041776654
ite 28 traj 3 step 12 action [0] argmax 0 0.10131109041822128
ite 28 traj 3 step 13 action [0] argmax 0 0.10131109041913078
ite 28 traj 3 step 14 action [0] argmax 0 0.10131109042004027
ite 28 traj 3 step 15 action [57] argmax 0 0.10131109042049502
ite 28 traj 3 step 16 action [0] argmax 0 0.10131109042094977
ite 28 traj 3 step 17 action [0] argmax 0 0.10131109042140451
ite 28 traj 3 step 18 action [2] argmax 0 0.10131109042140451
ite 28 traj

ite 29 traj 0 step 37 action [77] argmax 0 1.1150639396930728
ite 29 traj 0 step 38 action [0] argmax 0 1.1150639396935276
ite 29 traj 0 step 39 action [0] argmax 0 1.1150639396942097
ite 29 traj 0 step 40 action [15] argmax 0 1.1150639396942097
ite 29 traj 0 step 41 action [0] argmax 0 1.1150639396948918
ite 29 traj 0 step 42 action [89] argmax 0 1.1150639396958013
ite 29 traj 0 step 43 action [29] argmax 0 1.1150639396967108
ite 29 traj 0 step 44 action [0] argmax 0 1.1150639396971656
ite 29 traj 0 step 45 action [0] argmax 0 1.1150639396971656
ite 29 traj 0 step 46 action [0] argmax 0 1.1150639396971656
ite 29 traj 0 step 47 action [61] argmax 0 1.1150639396971656
ite 29 traj 0 step 48 action [0] argmax 0 1.1150639396976203
ite 29 traj 0 step 49 action [0] argmax 0 1.1150639396985298
ite 29 traj 1 step 0 action [0] argmax 0 0.7058461509213885
ite 29 traj 1 step 1 action [0] argmax 0 0.7058461509213885
ite 29 traj 1 step 2 action [0] argmax 0 0.7058461509218432
ite 29 traj 1 step 3 a

ite 29 traj 3 step 22 action [55] argmax 0 0.6072478073081129
ite 29 traj 3 step 23 action [0] argmax 0 0.6072478073081129
ite 29 traj 3 step 24 action [0] argmax 0 0.6072478073085676
ite 29 traj 3 step 25 action [0] argmax 0 0.6072478073092498
ite 29 traj 3 step 26 action [0] argmax 0 0.6072478073094771
ite 29 traj 3 step 27 action [0] argmax 0 0.6072478073097045
ite 29 traj 3 step 28 action [0] argmax 0 0.6072478073097045
ite 29 traj 3 step 29 action [80] argmax 0 0.6072478073103866
ite 29 traj 3 step 30 action [0] argmax 0 0.6072478073112961
ite 29 traj 3 step 31 action [0] argmax 0 0.6072478073119782
ite 29 traj 3 step 32 action [0] argmax 0 0.6072478073156162
ite 29 traj 3 step 33 action [0] argmax 0 0.6072478073187995
ite 29 traj 3 step 34 action [0] argmax 0 0.6072478073194816
ite 29 traj 3 step 35 action [0] argmax 0 0.6072478073203911
ite 29 traj 3 step 36 action [0] argmax 0 0.607247807324029
ite 29 traj 3 step 37 action [0] argmax 0 0.6072478073283492
ite 29 traj 3 step 38 a

ite 30 traj 1 step 7 action [57] argmax 0 0.7022608541819864
ite 30 traj 1 step 8 action [16] argmax 0 0.7022608541819864
ite 30 traj 1 step 9 action [0] argmax 0 0.7022608541824411
ite 30 traj 1 step 10 action [2] argmax 0 0.7022608541828959
ite 30 traj 1 step 11 action [0] argmax 0 0.7022608541833506
ite 30 traj 1 step 12 action [12] argmax 0 0.7022608541833506
ite 30 traj 1 step 13 action [28] argmax 0 0.7022608541838053
ite 30 traj 1 step 14 action [0] argmax 0 0.7022608541838053
ite 30 traj 1 step 15 action [41] argmax 0 0.7022608541842601
ite 30 traj 1 step 16 action [13] argmax 0 0.7022608541847148
ite 30 traj 1 step 17 action [0] argmax 0 0.7022608541847148
ite 30 traj 1 step 18 action [0] argmax 0 0.7022608541851696
ite 30 traj 1 step 19 action [70] argmax 0 0.7022608541856243
ite 30 traj 1 step 20 action [0] argmax 0 0.7022608541856243
ite 30 traj 1 step 21 action [40] argmax 0 0.7022608541856243
ite 30 traj 1 step 22 action [1] argmax 0 0.7022608541860791
ite 30 traj 1 step 

ite 30 traj 3 step 42 action [87] argmax 0 0.6072478073172078
ite 30 traj 3 step 43 action [0] argmax 0 0.6072478073174352
ite 30 traj 3 step 44 action [0] argmax 0 0.6072478073181173
ite 30 traj 3 step 45 action [0] argmax 0 0.6072478073183447
ite 30 traj 3 step 46 action [0] argmax 0 0.6072478073187995
ite 30 traj 3 step 47 action [0] argmax 0 0.6072478073187995
ite 30 traj 3 step 48 action [17] argmax 0 0.6072478073192542
ite 30 traj 3 step 49 action [18] argmax 0 0.607247807319709
ite 30 traj 4 step 0 action [0] argmax 0 0.615175166921972
ite 30 traj 4 step 1 action [0] argmax 0 0.6151751669228815
ite 30 traj 4 step 2 action [34] argmax 0 0.6151751669233363
ite 30 traj 4 step 3 action [0] argmax 0 0.6151751669240184
ite 30 traj 4 step 4 action [0] argmax 0 0.6151751669242458
ite 30 traj 4 step 5 action [0] argmax 0 0.6151751669244732
ite 30 traj 4 step 6 action [56] argmax 0 0.6151751669244732
ite 30 traj 4 step 7 action [0] argmax 0 0.6151751669244732
ite 30 traj 4 step 8 action [

ite 31 traj 1 step 27 action [0] argmax 0 0.9216160135761129
ite 31 traj 1 step 28 action [68] argmax 0 0.9216160135763403
ite 31 traj 1 step 29 action [0] argmax 0 0.9216160135765676
ite 31 traj 1 step 30 action [0] argmax 0 0.921616013576795
ite 31 traj 1 step 31 action [0] argmax 0 0.9216160135777045
ite 31 traj 1 step 32 action [0] argmax 0 0.9216160135783866
ite 31 traj 1 step 33 action [0] argmax 0 0.921616013578614
ite 31 traj 1 step 34 action [0] argmax 0 0.9216160135790687
ite 31 traj 1 step 35 action [0] argmax 0 0.9216160135795235
ite 31 traj 1 step 36 action [0] argmax 0 0.9216160135802056
ite 31 traj 1 step 37 action [0] argmax 0 0.921616013580433
ite 31 traj 1 step 38 action [0] argmax 0 0.9216160135813425
ite 31 traj 1 step 39 action [0] argmax 0 0.9216160135817972
ite 31 traj 1 step 40 action [90] argmax 0 0.9216160135817972
ite 31 traj 1 step 41 action [0] argmax 0 0.9216160135817972
ite 31 traj 1 step 42 action [0] argmax 0 0.9216160135820246
ite 31 traj 1 step 43 act

ite 31 traj 4 step 11 action [19] argmax 0 0.8867413289763135
ite 31 traj 4 step 12 action [26] argmax 0 0.8867413289767683
ite 31 traj 4 step 13 action [0] argmax 0 0.886741328977223
ite 31 traj 4 step 14 action [0] argmax 0 0.886741328977223
ite 31 traj 4 step 15 action [68] argmax 0 0.8867413289776778
ite 31 traj 4 step 16 action [0] argmax 0 0.8867413289785873
ite 31 traj 4 step 17 action [0] argmax 0 0.886741328979042
ite 31 traj 4 step 18 action [19] argmax 0 0.8867413289794968
ite 31 traj 4 step 19 action [0] argmax 0 0.8867413289799515
ite 31 traj 4 step 20 action [0] argmax 0 0.8867413289804063
ite 31 traj 4 step 21 action [0] argmax 0 0.886741328980861
ite 31 traj 4 step 22 action [0] argmax 0 0.8867413289813157
ite 31 traj 4 step 23 action [0] argmax 0 0.8867413289813157
ite 31 traj 4 step 24 action [0] argmax 0 0.8867413289817705
ite 31 traj 4 step 25 action [0] argmax 0 0.8867413289817705
ite 31 traj 4 step 26 action [0] argmax 0 0.8867413289822252
ite 31 traj 4 step 27 ac

ite 32 traj 1 step 44 action [29] argmax 0 0.11534908186604298
ite 32 traj 1 step 45 action [0] argmax 0 0.11534908186604298
ite 32 traj 1 step 46 action [0] argmax 0 0.11534908186604298
ite 32 traj 1 step 47 action [75] argmax 0 0.11534908186649773
ite 32 traj 1 step 48 action [0] argmax 0 0.11534908186740722
ite 32 traj 1 step 49 action [0] argmax 0 0.11534908186786197
ite 32 traj 2 step 0 action [0] argmax 0 0.7082653196207502
ite 32 traj 2 step 1 action [24] argmax 0 0.7082653196216597
ite 32 traj 2 step 2 action [0] argmax 0 0.7082653196216597
ite 32 traj 2 step 3 action [0] argmax 0 0.7082653196221145
ite 32 traj 2 step 4 action [9] argmax 0 0.7082653196225692
ite 32 traj 2 step 5 action [0] argmax 0 0.7082653196225692
ite 32 traj 2 step 6 action [4] argmax 0 0.7082653196225692
ite 32 traj 2 step 7 action [0] argmax 0 0.708265319623024
ite 32 traj 2 step 8 action [18] argmax 0 0.708265319623024
ite 32 traj 2 step 9 action [0] argmax 0 0.7082653196234787
ite 32 traj 2 step 10 acti

ite 32 traj 4 step 29 action [67] argmax 0 0.7058461509336666
ite 32 traj 4 step 30 action [0] argmax 0 0.7058461509341214
ite 32 traj 4 step 31 action [0] argmax 0 0.7058461509350309
ite 32 traj 4 step 32 action [0] argmax 0 0.7058461509354856
ite 32 traj 4 step 33 action [0] argmax 0 0.7058461509359404
ite 32 traj 4 step 34 action [59] argmax 0 0.7058461509363951
ite 32 traj 4 step 35 action [0] argmax 0 0.7058461509363951
ite 32 traj 4 step 36 action [0] argmax 0 0.7058461509368499
ite 32 traj 4 step 37 action [45] argmax 0 0.7058461509368499
ite 32 traj 4 step 38 action [45] argmax 0 0.7058461509382141
ite 32 traj 4 step 39 action [30] argmax 0 0.7058461509400331
ite 32 traj 4 step 40 action [62] argmax 0 0.7058461509404879
ite 32 traj 4 step 41 action [4] argmax 0 0.7058461509404879
ite 32 traj 4 step 42 action [96] argmax 0 0.7058461509404879
ite 32 traj 4 step 43 action [0] argmax 0 0.7058461509404879
ite 32 traj 4 step 44 action [0] argmax 0 0.7058461509404879
ite 32 traj 4 ste

ite 33 traj 2 step 14 action [15] argmax 0 0.8867413289776778
ite 33 traj 2 step 15 action [63] argmax 0 0.8867413289785873
ite 33 traj 2 step 16 action [67] argmax 0 0.886741328979042
ite 33 traj 2 step 17 action [6] argmax 0 0.886741328979042
ite 33 traj 2 step 18 action [34] argmax 0 0.8867413289794968
ite 33 traj 2 step 19 action [23] argmax 0 0.8867413289799515
ite 33 traj 2 step 20 action [0] argmax 0 0.8867413289799515
ite 33 traj 2 step 21 action [20] argmax 0 0.8867413289799515
ite 33 traj 2 step 22 action [14] argmax 0 0.8867413289804063
ite 33 traj 2 step 23 action [77] argmax 0 0.886741328980861
ite 33 traj 2 step 24 action [0] argmax 0 0.8867413289817705
ite 33 traj 2 step 25 action [0] argmax 0 0.88674132898268
ite 33 traj 2 step 26 action [0] argmax 0 0.88674132898268
ite 33 traj 2 step 27 action [61] argmax 0 0.88674132898268
ite 33 traj 2 step 28 action [0] argmax 0 0.88674132898268
ite 33 traj 2 step 29 action [0] argmax 0 0.8867413289831347
ite 33 traj 2 step 30 acti

ite 33 traj 4 step 48 action [0] argmax 0 0.9216160135845257
ite 33 traj 4 step 49 action [0] argmax 0 0.9216160135845257
ite 34 traj 0 step 0 action [0] argmax 0 0.8867413289726755
ite 34 traj 0 step 1 action [0] argmax 0 0.8867413289731303
ite 34 traj 0 step 2 action [29] argmax 0 0.8867413289731303
ite 34 traj 0 step 3 action [0] argmax 0 0.886741328973585
ite 34 traj 0 step 4 action [0] argmax 0 0.886741328973585
ite 34 traj 0 step 5 action [0] argmax 0 0.8867413289740398
ite 34 traj 0 step 6 action [0] argmax 0 0.8867413289740398
ite 34 traj 0 step 7 action [23] argmax 0 0.8867413289749493
ite 34 traj 0 step 8 action [0] argmax 0 0.8867413289749493
ite 34 traj 0 step 9 action [46] argmax 0 0.8867413289749493
ite 34 traj 0 step 10 action [29] argmax 0 0.886741328975404
ite 34 traj 0 step 11 action [60] argmax 0 0.886741328975404
ite 34 traj 0 step 12 action [10] argmax 0 0.886741328975404
ite 34 traj 0 step 13 action [11] argmax 0 0.8867413289758588
ite 34 traj 0 step 14 action [38

ite 34 traj 2 step 33 action [0] argmax 0 0.6151751669367513
ite 34 traj 2 step 34 action [0] argmax 0 0.6151751669369787
ite 34 traj 2 step 35 action [0] argmax 0 0.6151751669376608
ite 34 traj 2 step 36 action [0] argmax 0 0.6151751669381156
ite 34 traj 2 step 37 action [0] argmax 0 0.6151751669381156
ite 34 traj 2 step 38 action [0] argmax 0 0.6151751669381156
ite 34 traj 2 step 39 action [6] argmax 0 0.6151751669385703
ite 34 traj 2 step 40 action [10] argmax 0 0.6151751669385703
ite 34 traj 2 step 41 action [0] argmax 0 0.6151751669392524
ite 34 traj 2 step 42 action [26] argmax 0 0.6151751669392524
ite 34 traj 2 step 43 action [0] argmax 0 0.6151751669394798
ite 34 traj 2 step 44 action [64] argmax 0 0.6151751669397072
ite 34 traj 2 step 45 action [0] argmax 0 0.6151751669401619
ite 34 traj 2 step 46 action [0] argmax 0 0.6151751669403893
ite 34 traj 2 step 47 action [99] argmax 0 0.6151751669406167
ite 34 traj 2 step 48 action [15] argmax 0 0.6151751669408441
ite 34 traj 2 step 

ite 35 traj 0 step 18 action [0] argmax 0 0.921616013573157
ite 35 traj 0 step 19 action [0] argmax 0 0.9216160135738392
ite 35 traj 0 step 20 action [59] argmax 0 0.9216160135740665
ite 35 traj 0 step 21 action [18] argmax 0 0.9216160135742939
ite 35 traj 0 step 22 action [57] argmax 0 0.9216160135742939
ite 35 traj 0 step 23 action [69] argmax 0 0.9216160135742939
ite 35 traj 0 step 24 action [16] argmax 0 0.9216160135742939
ite 35 traj 0 step 25 action [0] argmax 0 0.9216160135745213
ite 35 traj 0 step 26 action [0] argmax 0 0.921616013574976
ite 35 traj 0 step 27 action [0] argmax 0 0.9216160135752034
ite 35 traj 0 step 28 action [0] argmax 0 0.9216160135752034
ite 35 traj 0 step 29 action [84] argmax 0 0.9216160135756581
ite 35 traj 0 step 30 action [0] argmax 0 0.9216160135758855
ite 35 traj 0 step 31 action [0] argmax 0 0.9216160135763403
ite 35 traj 0 step 32 action [0] argmax 0 0.921616013576795
ite 35 traj 0 step 33 action [36] argmax 0 0.9216160135770224
ite 35 traj 0 step 3

ite 35 traj 3 step 2 action [58] argmax 0 0.6072478073035654
ite 35 traj 3 step 3 action [0] argmax 0 0.6072478073040202
ite 35 traj 3 step 4 action [0] argmax 0 0.6072478073042475
ite 35 traj 3 step 5 action [0] argmax 0 0.6072478073047023
ite 35 traj 3 step 6 action [0] argmax 0 0.6072478073049297
ite 35 traj 3 step 7 action [0] argmax 0 0.6072478073053844
ite 35 traj 3 step 8 action [59] argmax 0 0.6072478073058392
ite 35 traj 3 step 9 action [0] argmax 0 0.6072478073060665
ite 35 traj 3 step 10 action [3] argmax 0 0.6072478073067487
ite 35 traj 3 step 11 action [0] argmax 0 0.6072478073074308
ite 35 traj 3 step 12 action [49] argmax 0 0.6072478073076581
ite 35 traj 3 step 13 action [0] argmax 0 0.6072478073078855
ite 35 traj 3 step 14 action [13] argmax 0 0.6072478073085676
ite 35 traj 3 step 15 action [0] argmax 0 0.6072478073097045
ite 35 traj 3 step 16 action [23] argmax 0 0.6072478073108414
ite 35 traj 3 step 17 action [0] argmax 0 0.6072478073110688
ite 35 traj 3 step 18 actio

ite 36 traj 0 step 36 action [0] argmax 0 1.1150639396935276
ite 36 traj 0 step 37 action [0] argmax 0 1.1150639396935276
ite 36 traj 0 step 38 action [0] argmax 0 1.1150639396935276
ite 36 traj 0 step 39 action [31] argmax 0 1.115063939693755
ite 36 traj 0 step 40 action [0] argmax 0 1.1150639396939823
ite 36 traj 0 step 41 action [6] argmax 0 1.1150639396942097
ite 36 traj 0 step 42 action [0] argmax 0 1.1150639396942097
ite 36 traj 0 step 43 action [0] argmax 0 1.1150639396946644
ite 36 traj 0 step 44 action [0] argmax 0 1.1150639396948918
ite 36 traj 0 step 45 action [47] argmax 0 1.1150639396951192
ite 36 traj 0 step 46 action [0] argmax 0 1.1150639396953466
ite 36 traj 0 step 47 action [0] argmax 0 1.1150639396953466
ite 36 traj 0 step 48 action [1] argmax 0 1.1150639396958013
ite 36 traj 0 step 49 action [3] argmax 0 1.1150639396969382
ite 36 traj 1 step 0 action [0] argmax 0 0.615175166921972
ite 36 traj 1 step 1 action [14] argmax 0 0.6151751669231089
ite 36 traj 1 step 2 acti

ite 36 traj 3 step 21 action [0] argmax 0 0.6072478073097045
ite 36 traj 3 step 22 action [43] argmax 0 0.6072478073099319
ite 36 traj 3 step 23 action [75] argmax 0 0.6072478073099319
ite 36 traj 3 step 24 action [0] argmax 0 0.6072478073101593
ite 36 traj 3 step 25 action [55] argmax 0 0.6072478073103866
ite 36 traj 3 step 26 action [0] argmax 0 0.6072478073103866
ite 36 traj 3 step 27 action [31] argmax 0 0.6072478073110688
ite 36 traj 3 step 28 action [22] argmax 0 0.6072478073110688
ite 36 traj 3 step 29 action [0] argmax 0 0.6072478073112961
ite 36 traj 3 step 30 action [0] argmax 0 0.6072478073112961
ite 36 traj 3 step 31 action [0] argmax 0 0.6072478073115235
ite 36 traj 3 step 32 action [0] argmax 0 0.6072478073117509
ite 36 traj 3 step 33 action [0] argmax 0 0.6072478073119782
ite 36 traj 3 step 34 action [0] argmax 0 0.6072478073122056
ite 36 traj 3 step 35 action [0] argmax 0 0.6072478073126604
ite 36 traj 3 step 36 action [0] argmax 0 0.6072478073131151
ite 36 traj 3 step 

ite 37 traj 1 step 5 action [0] argmax 0 0.9216160135704285
ite 37 traj 1 step 6 action [34] argmax 0 0.9216160135704285
ite 37 traj 1 step 7 action [59] argmax 0 0.9216160135708833
ite 37 traj 1 step 8 action [66] argmax 0 0.9216160135708833
ite 37 traj 1 step 9 action [0] argmax 0 0.9216160135708833
ite 37 traj 1 step 10 action [22] argmax 0 0.9216160135708833
ite 37 traj 1 step 11 action [0] argmax 0 0.921616013571338
ite 37 traj 1 step 12 action [42] argmax 0 0.921616013571338
ite 37 traj 1 step 13 action [0] argmax 0 0.9216160135717928
ite 37 traj 1 step 14 action [19] argmax 0 0.9216160135720202
ite 37 traj 1 step 15 action [25] argmax 0 0.9216160135720202
ite 37 traj 1 step 16 action [0] argmax 0 0.9216160135720202
ite 37 traj 1 step 17 action [0] argmax 0 0.9216160135722475
ite 37 traj 1 step 18 action [0] argmax 0 0.9216160135724749
ite 37 traj 1 step 19 action [0] argmax 0 0.9216160135727023
ite 37 traj 1 step 20 action [72] argmax 0 0.921616013573157
ite 37 traj 1 step 21 ac

ite 37 traj 3 step 40 action [0] argmax 0 0.7082653196343927
ite 37 traj 3 step 41 action [16] argmax 0 0.7082653196343927
ite 37 traj 3 step 42 action [0] argmax 0 0.7082653196343927
ite 37 traj 3 step 43 action [67] argmax 0 0.7082653196343927
ite 37 traj 3 step 44 action [0] argmax 0 0.7082653196348474
ite 37 traj 3 step 45 action [0] argmax 0 0.7082653196353021
ite 37 traj 3 step 46 action [0] argmax 0 0.7082653196357569
ite 37 traj 3 step 47 action [42] argmax 0 0.7082653196362116
ite 37 traj 3 step 48 action [0] argmax 0 0.7082653196362116
ite 37 traj 3 step 49 action [11] argmax 0 0.7082653196366664
ite 37 traj 4 step 0 action [0] argmax 0 0.7022608541797126
ite 37 traj 4 step 1 action [0] argmax 0 0.7022608541801674
ite 37 traj 4 step 2 action [0] argmax 0 0.7022608541810769
ite 37 traj 4 step 3 action [0] argmax 0 0.7022608541810769
ite 37 traj 4 step 4 action [0] argmax 0 0.7022608541810769
ite 37 traj 4 step 5 action [0] argmax 0 0.7022608541810769
ite 37 traj 4 step 6 actio

ite 38 traj 1 step 24 action [0] argmax 0 0.11534908186058601
ite 38 traj 1 step 25 action [0] argmax 0 0.11534908186104076
ite 38 traj 1 step 26 action [10] argmax 0 0.11534908186195025
ite 38 traj 1 step 27 action [36] argmax 0 0.115349081862405
ite 38 traj 1 step 28 action [0] argmax 0 0.11534908186285975
ite 38 traj 1 step 29 action [0] argmax 0 0.1153490818633145
ite 38 traj 1 step 30 action [0] argmax 0 0.11534908186376924
ite 38 traj 1 step 31 action [72] argmax 0 0.11534908186422399
ite 38 traj 1 step 32 action [39] argmax 0 0.11534908186467874
ite 38 traj 1 step 33 action [0] argmax 0 0.11534908186513348
ite 38 traj 1 step 34 action [0] argmax 0 0.11534908186513348
ite 38 traj 1 step 35 action [0] argmax 0 0.11534908186558823
ite 38 traj 1 step 36 action [0] argmax 0 0.11534908186558823
ite 38 traj 1 step 37 action [0] argmax 0 0.11534908186831672
ite 38 traj 1 step 38 action [64] argmax 0 0.11534908187059045
ite 38 traj 1 step 39 action [82] argmax 0 0.11534908187059045
ite 3

ite 38 traj 4 step 8 action [0] argmax 0 0.10131109041731179
ite 38 traj 4 step 9 action [15] argmax 0 0.10131109041776654
ite 38 traj 4 step 10 action [0] argmax 0 0.10131109041776654
ite 38 traj 4 step 11 action [28] argmax 0 0.10131109041822128
ite 38 traj 4 step 12 action [0] argmax 0 0.10131109041867603
ite 38 traj 4 step 13 action [0] argmax 0 0.10131109041867603
ite 38 traj 4 step 14 action [17] argmax 0 0.10131109041867603
ite 38 traj 4 step 15 action [0] argmax 0 0.10131109041867603
ite 38 traj 4 step 16 action [0] argmax 0 0.10131109041913078
ite 38 traj 4 step 17 action [12] argmax 0 0.10131109041958553
ite 38 traj 4 step 18 action [0] argmax 0 0.10131109042004027
ite 38 traj 4 step 19 action [0] argmax 0 0.10131109042049502
ite 38 traj 4 step 20 action [0] argmax 0 0.10131109042049502
ite 38 traj 4 step 21 action [4] argmax 0 0.10131109042094977
ite 38 traj 4 step 22 action [0] argmax 0 0.10188203517327565
ite 38 traj 4 step 23 action [55] argmax 0 0.10543274512383505
ite 3

ite 39 traj 1 step 42 action [0] argmax 0 0.7022608541965383
ite 39 traj 1 step 43 action [0] argmax 0 0.702260854196993
ite 39 traj 1 step 44 action [66] argmax 0 0.702260854196993
ite 39 traj 1 step 45 action [27] argmax 0 0.7022608541979025
ite 39 traj 1 step 46 action [0] argmax 0 0.702260854198812
ite 39 traj 1 step 47 action [0] argmax 0 0.702260854198812
ite 39 traj 1 step 48 action [0] argmax 0 0.7022608541997215
ite 39 traj 1 step 49 action [51] argmax 0 0.7022608541997215
ite 39 traj 2 step 0 action [0] argmax 0 0.8867413289726755
ite 39 traj 2 step 1 action [43] argmax 0 0.8867413289726755
ite 39 traj 2 step 2 action [11] argmax 0 0.8867413289731303
ite 39 traj 2 step 3 action [0] argmax 0 0.886741328973585
ite 39 traj 2 step 4 action [0] argmax 0 0.8867413289744945
ite 39 traj 2 step 5 action [0] argmax 0 0.8867413289763135
ite 39 traj 2 step 6 action [0] argmax 0 0.886741328977223
ite 39 traj 2 step 7 action [6] argmax 0 0.886741328977223
ite 39 traj 2 step 8 action [40] a

ite 39 traj 4 step 27 action [0] argmax 0 0.6072478073110688
ite 39 traj 4 step 28 action [0] argmax 0 0.6072478073115235
ite 39 traj 4 step 29 action [0] argmax 0 0.6072478073119782
ite 39 traj 4 step 30 action [87] argmax 0 0.6072478073119782
ite 39 traj 4 step 31 action [0] argmax 0 0.6072478073122056
ite 39 traj 4 step 32 action [0] argmax 0 0.607247807312433
ite 39 traj 4 step 33 action [0] argmax 0 0.6072478073128877
ite 39 traj 4 step 34 action [76] argmax 0 0.6072478073133425
ite 39 traj 4 step 35 action [0] argmax 0 0.6072478073140246
ite 39 traj 4 step 36 action [0] argmax 0 0.607247807314252
ite 39 traj 4 step 37 action [0] argmax 0 0.6072478073144794
ite 39 traj 4 step 38 action [0] argmax 0 0.6072478073147067
ite 39 traj 4 step 39 action [0] argmax 0 0.6072478073149341
ite 39 traj 4 step 40 action [51] argmax 0 0.6072478073151615
ite 39 traj 4 step 41 action [0] argmax 0 0.6072478073151615
ite 39 traj 4 step 42 action [0] argmax 0 0.6072478073151615
ite 39 traj 4 step 43 a

In [10]:
run.finish()

0,1
Training reward,0.60725
training reward moving average,0.75529
_runtime,7380.0
_timestamp,1619195688.0
_step,199.0


0,1
Training reward,▂▁▅▅▁█▅▄█▅▄▅▁▄▆▇▅▅▁▄▅▆█▁▅▄▁██▆▄▁▄▆██▄▅▅▄
training reward moving average,▁▁▁▁▆▇▇▇████▇▇▇▇▇████▇█▇▇█▇▇▇▇██▇▇▇███▇█
_runtime,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇███
_timestamp,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇███
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
