In [1]:
import evebox as box
import pandas as pd
import tensorflow as tf
import cProfile
from tqdm.notebook import tqdm, trange

In [2]:
uni = box.Universe.from_esi(cache = 'universe.txt', tqdm = tqdm).range('Jita', min_sec = 0.5).with_market_types()

In [3]:
#orders = box.load_orders(uni, tqdm = tqdm)
orders = pd.read_csv('market/current.csv')

In [4]:
#orders.to_csv('market/current.csv')

In [4]:
# Filter all buy orders which are 50x above the median
median   = orders.groupby('type_id')[['price']].median().rename(columns = {'price' : 'median_price'})

orders = orders[
    (50 * orders.merge(median, on = 'type_id')['median_price'] >= orders['price']) |
    ~orders['is_buy_order']
]

In [5]:
# Filter all orders that can not be handled profitably
max_buy  = orders[ orders['is_buy_order']].groupby('type_id')[['price']].max().rename(columns = {'price' : 'max_buy'})
min_sell = orders[~orders['is_buy_order']].groupby('type_id')[['price']].min().rename(columns = {'price' : 'min_sell'})

orders = orders.merge(max_buy, on = 'type_id').merge(min_sell, on = 'type_id')

orders = orders[
    (
        # Sell orders must be below max_buy
        orders['is_buy_order'] |
        (orders['price'] <= orders['max_buy'])
    ) & 
    (
        # Buy orders must be above min_sell
        ~orders['is_buy_order'] |
        (orders['price'] >= orders['min_sell'])
    )
]

In [6]:
# Reduce universe types to all types appearing in the order list

# Pre-filter types
def check_type(t):
    if t["volume"] > 155000:
        return False
    
    return t['type_id'] in orders['type_id']

# Reduce universe
types = [t for t, v in tqdm(uni.types.items()) if check_type(v)]
print('Reducing from {} to {} types'.format(len(uni.types), len(types)))
uni = uni.with_types(types)

# Reduce orders
orders = orders[orders['type_id'].isin(uni.types)]

HBox(children=(FloatProgress(value=0.0, max=14472.0), HTML(value='')))


Reducing from 14472 to 4606 types


In [11]:
class Model(tf.keras.Model):
    def __init__(self, universe, bandwidth = 128, d_notes = 4, layers_start = [(16, 8)] * 2, layers_step = [(16, 8)] * 2):
        super().__init__()
        
        self.bandwidth = bandwidth
        self.d_notes  = d_notes
        
        kw = {
            'use_bias' : True
        }
        
        self.embedding = box.tf.Embedding(universe, d_notes = self.d_notes, tqdm = tqdm)
        
        self.input_transforms = {
            k : tf.keras.layers.Dense(self.bandwidth)
            for k in ['state', 'orders', 'cargo', 'systems']
        }
        
        def otf(n):
            return tf.keras.layers.Dense(n, **kw)
        
        self.output_transforms = {
            'actions' : otf(3),
            'move_targets' : otf(1),
            'buy_params' : otf(3),
            'sell_params' : otf(3),
            'value' : otf(1)
        }
        
        def stack(layer_info):
            return box.tf.StackedTogether(
                tf.keras.Sequential([
                    box.tf.MultiHeadedAttention(self.bandwidth, d_head, n_heads) for d_head, n_heads in layer_info
                ])
            )
        
        self.stack_start = stack(layers_start)
        self.stack_step = stack(layers_step)
        
        self.rnn_cell = tf.keras.layers.GRUCell(self.bandwidth)
            
    
    def _preprocess_input(self, input):
        # Pre-process input (adds "note" variables to types & systems, joins orders with their types & systems)
        input = self.embedding(input)
        
        # Expand state to have the same shape as the other stuff
        input['state'] = tf.expand_dims(input['state'], axis = -2)
        
        # Expand all items into a [...,bandwidth] shape
        input = {
            k : self.input_transforms[k](v)
            for k, v in input.items()
        }
        
        return input
        
    def get_initial_state(self, input):
        input = self._preprocess_input(input)
        
        input = self.stack_start(input)
        
        return [tf.reshape(input['state'], [-1, self.bandwidth])]
    
    def call(self, input):
        input, rnn_state = input
        
        batch_shape = tf.shape(input['state'])[:-1]
        
        input = self._preprocess_input(input)
        
        # Add RNN state to the mix
        input['rnn_state'] = tf.reshape(rnn_state, tf.concat([batch_shape, [1, self.bandwidth]], axis = 0))
        
        # We don't want orders in here, too costly (but probably not present anyway)
        if 'orders' in input:
            del input['orders']
        
        # Apply attention stack
        input = self.stack_step(input)
        
        # Extract rnn state & apply to cell
        rnn_in = tf.reshape(input['rnn_state'], [-1, self.bandwidth])
        _, rnn_state = self.rnn_cell(rnn_in, rnn_state)
        del input['rnn_state']
        
        output = {
            'actions' : tf.squeeze(
                self.output_transforms['actions'](input['state']),
                axis = -2
            ),
            
            'move_targets' : tf.squeeze(
                self.output_transforms['move_targets'](input['systems']),
                axis = -1
            ),
            
            'buy_params'  : self.output_transforms['buy_params'] (input['cargo']),
            'sell_params' : self.output_transforms['sell_params'](input['cargo']),
            
            'value' : tf.squeeze(
                self.output_transforms['value'](input['state']),
                axis = [-2, -1]
            )
        }
        
        return output, rnn_state

In [15]:
import optuna as tuna
from time import time

gym = box.tf.TradingGym(uni, orders)

jita = [s["system_id"] for s in uni.systems.values() if s["name"] == "Jita"][0]

state = box.MutableState()
state.universe  = uni
state.time_left = 100.0
state.system    = jita
state.wallet    = 1e7
state.collateral_limit = 1e6
state.volume_limit = 1e4

state = box.State(state)

def opt_fun(trial):   
    policy_weight = trial.suggest_loguniform('policy_weight', 1e-6, 1e2)
    entropy_weight = trial.suggest_loguniform('entropy_weight', 1e-6, 1e2)
    learning_rate = trial.suggest_loguniform('sgd_rate', 1e-8, 1)
    
    print('Policy weight:  {:.2e}'.format(policy_weight))
    print('Entropy weight: {:.2e}'.format(entropy_weight))
    print('Learning rate:  {:.2e}'.format(learning_rate))
    
    def make_layer_info(name, nmin, nmax):
        n = trial.suggest_int('n_layers_{}'.format(name), nmin, nmax)
        print('n_layers_{}: {}'.format(name, n))
        return [
            (
                trial.suggest_int('d_heads_{}_{}'.format(name, i), 1, 16),
                trial.suggest_int('n_heads_{}_{}'.format(name, i), 1, 16)
            )
            for i in range(n)
        ]
    
    model = Model(
        uni,
        bandwidth    = trial.suggest_int('bandwidth', 8, 128),
        d_notes      = trial.suggest_int('d_notes', 1, 8),
        layers_start = make_layer_info('start', 1, 4),
        layers_step  = make_layer_info('step' , 1, 8)
    )
    
    print('Bandwidth: {}'.format(model.bandwidth))
    
    unroller = gym.unroll_model(model, tqdm)
    
    # We have to unroll once to make sure the weights exist
    unroller(state, 2)
    
    loss_sideline = tf.Variable(0, dtype = tf.float32)
    
    def loss():
        result = unroller(state, 100)

        policy_loss, value_loss, entropy_loss = gym.losses(result)

        total_loss = value_loss + policy_weight * policy_loss + entropy_weight * entropy_loss
        
        print('Action 1: {}'.format(result[0][1]))
        
        #print('Final state:')
        #print(result[-1][0])
        
        #loss_sideline.assign(total_loss)

        tf.print('Policy loss:  ', policy_loss)
        tf.print('Value loss:   ', value_loss)
        tf.print('Entropy loss: ', entropy_loss) 
        tf.print('Total loss:   ', total_loss)
        tf.print('Gain:         ', result[-1][0].value - result[0][0].value)
        
        return total_loss
    
    def performance(n = 20):        
        def single_run():
            result = unroller(state, 100)
            return result[-1][0].value - state.value
        
        return sum([single_run() for i in range(n)]) / n
        #return sum([loss() for i in range(n)]) / n
    
    opt = tf.keras.optimizers.SGD(learning_rate)
    
    print('Trial info')
    print(trial)
    print('Beginning')
    
    last_time = time()
    counter = 0
    
    for i in trange(0, 100, desc = 'Iterations', leave = False):
        opt.minimize(loss, model.trainable_variables)
        
        loss_value = loss_sideline.read_value().numpy()
        
        # Report every minute for pruning
        if time() - last_time > 60:
            trial.report(-performance(n = 5), counter)
            counter += 1
            
            last_time = time()
        
        if trial.should_prune():
            raise tuna.exceptions.TrialPruned()
    
    return -performance()

In [16]:
study = tuna.create_study(
    pruner = tuna.pruners.HyperbandPruner(),
    sampler = tuna.samplers.TPESampler()
)



In [18]:
study.optimize(opt_fun, catch = (Exception,))

Policy weight:  2.54e+00
Entropy weight: 1.34e-06
Learning rate:  1.14e-01
n_layers_start: 3
n_layers_step: 1


HBox(children=(FloatProgress(value=0.0, description='Encoding systems', max=972.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Encoding types', max=4606.0, style=ProgressStyle(descript…


Bandwidth: 40


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

Trial info
<optuna.trial.Trial object at 0x000001939228E128>
Beginning


HBox(children=(FloatProgress(value=0.0, description='Iterations', style=ProgressStyle(description_width='initi…

W0310 16:25:24.210972  4272 backprop.py:1017] The dtype of the source tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32
W0310 16:25:24.212969  4272 backprop.py:1017] The dtype of the source tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

W0310 16:25:25.383249  4272 backprop.py:1003] The dtype of the target tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32
W0310 16:25:25.384248  4272 backprop.py:1003] The dtype of the target tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32
W0310 16:25:25.385248  4272 backprop.py:1003] The dtype of the target tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32


Action 1: Move to system 30003406
Policy loss:   -75.8385468
Value loss:    23.3012962
Entropy loss:  -14.8135567
Total loss:    -169.165741
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 334.0693359375 of Arkonor (22, 16.0 m3)
Policy loss:   -427756320
Value loss:    4.70442129e+14
Entropy loss:  -14.1898136
Total loss:    4.70441055e+14
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[W 2020-03-10 16:25:38,967] Setting status of trial#2 as TrialState.FAIL because of the following error: InvalidArgumentError()
Traceback (most recent call last):
  File "c:\program files\python36\lib\site-packages\optuna\study.py", line 648, in _run_trial
    result = func(trial)
  File "<ipython-input-15-6d0c2af9b8cd>", line 95, in opt_fun
    opt.minimize(loss, model.trainable_variables)
  File "c:\program files\python36\lib\site-packages\tensorflow_core\python\keras\optimizer_v2\optimizer_v2.py", line 316, in minimize
    loss, var_list=var_list, grad_loss=grad_loss)
  File "c:\program files\python36\lib\site-packages\tensorflow_core\python\keras\optimizer_v2\optimizer_v2.py", line 350, in _compute_gradients
    loss_value = loss()
  File "<ipython-input-15-6d0c2af9b8cd>", line 56, in loss
    result = unroller(state, 100)
  File "C:\Users\Knieps\Documents\repos\tf_playground\eveplan\evebox\tf\trading_gym.py", line 230, in run
    model_state, logp, value, sample = unroll_step(mode

Policy weight:  1.48e-04
Entropy weight: 1.57e-05
Learning rate:  4.59e-08
n_layers_start: 2
n_layers_step: 4


HBox(children=(FloatProgress(value=0.0, description='Encoding systems', max=972.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Encoding types', max=4606.0, style=ProgressStyle(descript…


Bandwidth: 18


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

Trial info
<optuna.trial.Trial object at 0x00000193933FBDA0>
Beginning


HBox(children=(FloatProgress(value=0.0, description='Iterations', style=ProgressStyle(description_width='initi…

W0310 16:25:48.888618  4272 backprop.py:1017] The dtype of the source tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32
W0310 16:25:48.889616  4272 backprop.py:1017] The dtype of the source tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

W0310 16:25:50.916370  4272 backprop.py:1003] The dtype of the target tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32
W0310 16:25:50.918367  4272 backprop.py:1003] The dtype of the target tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32
W0310 16:25:50.919367  4272 backprop.py:1003] The dtype of the target tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32


Action 1: Move to system 30002766
Policy loss:   76.9411621
Value loss:    20.1163845
Entropy loss:  -16.1337624
Total loss:    20.1274853
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000159
Policy loss:   61.3163185
Value loss:    15.3606501
Entropy loss:  -16.5783806
Total loss:    15.3694391
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002737
Policy loss:   31.2100983
Value loss:    6.68454266
Entropy loss:  -17.0413055
Total loss:    6.68888092
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002814
Policy loss:   95.6247
Value loss:    24.22369
Entropy loss:  -15.9047327
Total loss:    24.2375526
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000120
Policy loss:   72.8820496
Value loss:    18.566576
Entropy loss:  -15.5539303
Total loss:    18.5770874
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002775
Policy loss:   66.4725494
Value loss:    18.3646965
Entropy loss:  -16.7983551
Total loss:    18.3742428
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001374
Policy loss:   82.2626343
Value loss:    22.3498859
Entropy loss:  -17.7322464
Total loss:    22.3617477
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30003870
Policy loss:   54.9796181
Value loss:    12.1263
Entropy loss:  -16.8492451
Total loss:    12.1341486
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002788
Policy loss:   72.4446793
Value loss:    19.7793
Entropy loss:  -16.9759083
Total loss:    19.7897263
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001669
Policy loss:   36.4244614
Value loss:    8.66783714
Entropy loss:  -17.528862
Total loss:    8.67293739
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30003517
Policy loss:   39.0325279
Value loss:    8.56195545
Entropy loss:  -17.2218285
Total loss:    8.5674448
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30005306
Policy loss:   46.250782
Value loss:    10.6304255
Entropy loss:  -16.6513386
Total loss:    10.6369896
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001362
Policy loss:   60.7491341
Value loss:    15.8465672
Entropy loss:  -16.0732841
Total loss:    15.8552799
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30003489
Policy loss:   56.2877159
Value loss:    13.1573668
Entropy loss:  -15.8961325
Total loss:    13.1654243
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001407
Policy loss:   38.5805855
Value loss:    9.40627098
Entropy loss:  -17.4111099
Total loss:    9.41169167
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30004103
Policy loss:   51.5225945
Value loss:    12.3515434
Entropy loss:  -16.7578163
Total loss:    12.3588839
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002750
Policy loss:   54.5232353
Value loss:    13.4933348
Entropy loss:  -15.699522
Total loss:    13.5011349
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000153
Policy loss:   54.1862907
Value loss:    14.76126
Entropy loss:  -16.2195835
Total loss:    14.7690029
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001700
Policy loss:   64.8703079
Value loss:    17.1821
Entropy loss:  -16.3107796
Total loss:    17.1914177
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001397
Policy loss:   105.88324
Value loss:    29.6147099
Entropy loss:  -15.3244286
Total loss:    29.6300964
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000155
Policy loss:   75.5183792
Value loss:    19.484993
Entropy loss:  -16.790966
Total loss:    19.4958744
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001369
Policy loss:   58.5685081
Value loss:    15.1923456
Entropy loss:  -16.0862637
Total loss:    15.200737
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000138
Policy loss:   52.1827545
Value loss:    13.1895971
Entropy loss:  -15.3944693
Total loss:    13.1970568
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002744
Policy loss:   41.1013603
Value loss:    9.00750637
Entropy loss:  -16.3573074
Total loss:    9.0133152
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002818
Policy loss:   79.8888321
Value loss:    20.3287411
Entropy loss:  -16.7121181
Total loss:    20.3402691
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30003520
Policy loss:   46.6895065
Value loss:    10.1326466
Entropy loss:  -16.1757565
Total loss:    10.1392822
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002750
Policy loss:   56.8883972
Value loss:    13.7774429
Entropy loss:  -16.7099113
Total loss:    13.7855759
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000119
Policy loss:   43.743988
Value loss:    11.1272154
Entropy loss:  -16.7228451
Total loss:    11.1334085
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30005230
Policy loss:   48.3131142
Value loss:    10.8157959
Entropy loss:  -16.0047493
Total loss:    10.8226748
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30003508
Policy loss:   92.9694672
Value loss:    22.4454517
Entropy loss:  -16.1921902
Total loss:    22.4589195
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30003500
Policy loss:   56.6984673
Value loss:    12.7151346
Entropy loss:  -16.8747406
Total loss:    12.723237
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30003878
Policy loss:   39.9742088
Value loss:    8.39098263
Entropy loss:  -17.5369492
Total loss:    8.39660645
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000157
Policy loss:   65.3885117
Value loss:    17.2649288
Entropy loss:  -16.6757565
Total loss:    17.2743187
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002764
Policy loss:   41.3796844
Value loss:    8.96733475
Entropy loss:  -16.2746735
Total loss:    8.97318649
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001412
Policy loss:   43.6922264
Value loss:    9.51265049
Entropy loss:  -16.7057381
Total loss:    9.51883698
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000135
Policy loss:   55.1739388
Value loss:    12.1512375
Entropy loss:  -16.719532
Total loss:    12.1591167
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30010141
Policy loss:   94.1541824
Value loss:    24.0574684
Entropy loss:  -15.7418079
Total loss:    24.0711155
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002768
Policy loss:   67.2421417
Value loss:    15.6693535
Entropy loss:  -16.59412
Total loss:    15.6790161
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30005049
Policy loss:   90.2841263
Value loss:    21.0332298
Entropy loss:  -15.6808405
Total loss:    21.0463085
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002801
Policy loss:   58.1189651
Value loss:    12.8928604
Entropy loss:  -17.2625656
Total loss:    12.901166
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001391
Policy loss:   36.6688423
Value loss:    7.40554523
Entropy loss:  -16.1559563
Total loss:    7.41070271
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002816
Policy loss:   57.4531708
Value loss:    13.2087193
Entropy loss:  -16.3913727
Total loss:    13.2169409
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002642
Policy loss:   58.6394119
Value loss:    14.3173027
Entropy loss:  -16.4518394
Total loss:    14.3256989
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002801
Policy loss:   61.0865631
Value loss:    13.848999
Entropy loss:  -16.4151497
Total loss:    13.8577557
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002803
Policy loss:   52.7487183
Value loss:    11.3400412
Entropy loss:  -16.0387611
Total loss:    11.3475742
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000134
Policy loss:   57.3801422
Value loss:    12.3471565
Entropy loss:  -16.0483685
Total loss:    12.3553724
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002802
Policy loss:   61.1190758
Value loss:    15.2407312
Entropy loss:  -16.4743366
Total loss:    15.2494917
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001409
Policy loss:   32.5358772
Value loss:    6.52049208
Entropy loss:  -16.8849049
Total loss:    6.52502823
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002636
Policy loss:   55.449543
Value loss:    12.0925236
Entropy loss:  -16.0679455
Total loss:    12.1004543
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000122
Policy loss:   47.1362343
Value loss:    10.5653095
Entropy loss:  -16.9347935
Total loss:    10.5720005
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002988
Policy loss:   54.620636
Value loss:    11.2365837
Entropy loss:  -16.3073368
Total loss:    11.2443886
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002789
Policy loss:   67.2114258
Value loss:    15.3454046
Entropy loss:  -16.2866421
Total loss:    15.3550673
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002784
Policy loss:   46.717556
Value loss:    10.0900164
Entropy loss:  -16.175909
Total loss:    10.0966568
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001411
Policy loss:   45.5689697
Value loss:    9.74972248
Entropy loss:  -16.4698277
Total loss:    9.75618839
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30041407
Policy loss:   76.9983444
Value loss:    18.0886288
Entropy loss:  -14.8491354
Total loss:    18.0997601
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30005042
Policy loss:   60.0603218
Value loss:    11.8660288
Entropy loss:  -16.1798649
Total loss:    11.8746386
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000140
Policy loss:   78.62957
Value loss:    17.7747822
Entropy loss:  -15.5608435
Total loss:    17.7861423
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001415
Policy loss:   48.6481094
Value loss:    10.8687725
Entropy loss:  -16.6670609
Total loss:    10.8756905
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002191
Policy loss:   33.4434509
Value loss:    6.45251656
Entropy loss:  -16.8251858
Total loss:    6.45718765
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001700
Policy loss:   84.8502579
Value loss:    18.1299763
Entropy loss:  -15.8192759
Total loss:    18.1422501
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000153
Policy loss:   56.9563675
Value loss:    11.8443012
Entropy loss:  -15.8687792
Total loss:    11.852457
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000140
Policy loss:   47.8916893
Value loss:    11.128087
Entropy loss:  -16.1011429
Total loss:    11.134902
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30004148
Policy loss:   49.2473145
Value loss:    9.91601276
Entropy loss:  -16.2876759
Total loss:    9.92302418
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30041407
Policy loss:   89.1165314
Value loss:    20.2017956
Entropy loss:  -16.5921249
Total loss:    20.2146873
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002777
Policy loss:   46.7740059
Value loss:    10.7863665
Entropy loss:  -16.5386181
Total loss:    10.7930088
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001395
Policy loss:   46.5074844
Value loss:    10.3745213
Entropy loss:  -16.0756302
Total loss:    10.3811321
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000167
Policy loss:   34.7330818
Value loss:    6.99942875
Entropy loss:  -16.9155293
Total loss:    7.00428867
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30034971
Policy loss:   49.3236
Value loss:    10.030736
Entropy loss:  -16.4414234
Total loss:    10.0377569
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000144
Policy loss:   38.5425339
Value loss:    9.05317116
Entropy loss:  -16.6965771
Total loss:    9.05859756
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000121
Policy loss:   46.8778305
Value loss:    10.3523273
Entropy loss:  -16.6393375
Total loss:    10.3589849
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001380
Policy loss:   59.7830963
Value loss:    12.4389715
Entropy loss:  -16.1912365
Total loss:    12.4475403
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001379
Policy loss:   54.8201561
Value loss:    12.1251583
Entropy loss:  -18.0368118
Total loss:    12.132966
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000181
Policy loss:   48.1764183
Value loss:    8.59894943
Entropy loss:  -17.8704529
Total loss:    8.60577869
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001381
Policy loss:   31.1348038
Value loss:    6.24718142
Entropy loss:  -16.0000153
Total loss:    6.25152445
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000136
Policy loss:   49.8287048
Value loss:    9.99941063
Entropy loss:  -16.3389454
Total loss:    10.0065079
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000150
Policy loss:   69.2459717
Value loss:    13.3579817
Entropy loss:  -15.5570087
Total loss:    13.3679562
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000119
Policy loss:   44.9416733
Value loss:    8.9184866
Entropy loss:  -16.9867477
Total loss:    8.92485237
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002774
Policy loss:   59.3320923
Value loss:    13.093338
Entropy loss:  -16.3256264
Total loss:    13.1018381
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000156
Policy loss:   62.9804344
Value loss:    14.1867304
Entropy loss:  -15.7880726
Total loss:    14.1957769
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000139
Policy loss:   59.0916748
Value loss:    13.3538151
Entropy loss:  -16.4146614
Total loss:    13.362278
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000125
Policy loss:   70.9958344
Value loss:    15.3685713
Entropy loss:  -14.5226688
Total loss:    15.3788214
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000119
Policy loss:   75.4670639
Value loss:    15.9403791
Entropy loss:  -14.9607754
Total loss:    15.9512815
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000140
Policy loss:   51.9649734
Value loss:    11.2934389
Entropy loss:  -16.6208096
Total loss:    11.3008471
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30004974
Policy loss:   43.4810333
Value loss:    7.92649126
Entropy loss:  -16.064167
Total loss:    7.93265581
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001386
Policy loss:   40.058
Value loss:    7.46459532
Entropy loss:  -16.286869
Total loss:    7.47025108
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000156
Policy loss:   48.367
Value loss:    9.37977219
Entropy loss:  -15.9641151
Total loss:    9.38666
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002772
Policy loss:   31.9817753
Value loss:    5.52778101
Entropy loss:  -16.2261982
Total loss:    5.53224611
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000125
Policy loss:   39.7086754
Value loss:    7.78380346
Entropy loss:  -16.9137516
Total loss:    7.78939772
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000130
Policy loss:   39.5283585
Value loss:    7.38617659
Entropy loss:  -15.9141045
Total loss:    7.39176
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000157
Policy loss:   41.2074966
Value loss:    8.03586674
Entropy loss:  -16.7010288
Total loss:    8.04168606
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002792
Policy loss:   56.363533
Value loss:    11.0115738
Entropy loss:  -15.8423748
Total loss:    11.0196428
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001371
Policy loss:   50.7091026
Value loss:    10.800643
Entropy loss:  -16.4871807
Total loss:    10.8078671
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30004151
Policy loss:   67.767395
Value loss:    12.8957081
Entropy loss:  -16.2529469
Total loss:    12.9054537
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001379
Policy loss:   69.4015121
Value loss:    14.3660603
Entropy loss:  -14.5418625
Total loss:    14.3760738
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000167
Policy loss:   56.0935326
Value loss:    10.882638
Entropy loss:  -15.6419516
Total loss:    10.8906708
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30021407
Policy loss:   42.2225647
Value loss:    7.7362175
Entropy loss:  -16.6622849
Total loss:    7.74218702
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002233
Policy loss:   33.6751671
Value loss:    5.90473557
Entropy loss:  -16.9543743
Total loss:    5.90943909
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001653
Policy loss:   40.0683823
Value loss:    6.74986506
Entropy loss:  -15.9648075
Total loss:    6.75552797
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000140
Policy loss:   33.1748276
Value loss:    6.51914692
Entropy loss:  -16.9038582
Total loss:    6.52377748
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000141
Policy loss:   39.4333534
Value loss:    7.15800524
Entropy loss:  -16.0052052
Total loss:    7.16357327
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[I 2020-03-10 16:31:15,512] Finished trial#3 resulted in value: -0.0. Current best value is -0.0 with parameters: {'policy_weight': 0.0001475851713313501, 'entropy_weight': 1.572401677335221e-05, 'sgd_rate': 4.585360398716473e-08, 'bandwidth': 18, 'd_notes': 3, 'n_layers_start': 2, 'd_heads_start_0': 10, 'n_heads_start_0': 9, 'd_heads_start_1': 3, 'n_heads_start_1': 16, 'n_layers_step': 4, 'd_heads_step_0': 1, 'n_heads_step_0': 10, 'd_heads_step_1': 4, 'n_heads_step_1': 11, 'd_heads_step_2': 16, 'n_heads_step_2': 12, 'd_heads_step_3': 11, 'n_heads_step_3': 14}.


Policy weight:  6.56e-03
Entropy weight: 2.89e+01
Learning rate:  5.51e-05
n_layers_start: 2
n_layers_step: 2


HBox(children=(FloatProgress(value=0.0, description='Encoding systems', max=972.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Encoding types', max=4606.0, style=ProgressStyle(descript…


Bandwidth: 30


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

Trial info
<optuna.trial.Trial object at 0x0000019393588470>
Beginning


HBox(children=(FloatProgress(value=0.0, description='Iterations', style=ProgressStyle(description_width='initi…

W0310 16:31:21.941475  4272 backprop.py:1017] The dtype of the source tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32
W0310 16:31:21.942474  4272 backprop.py:1017] The dtype of the source tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

W0310 16:31:23.292643  4272 backprop.py:1003] The dtype of the target tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32
W0310 16:31:23.294642  4272 backprop.py:1003] The dtype of the target tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32
W0310 16:31:23.295641  4272 backprop.py:1003] The dtype of the target tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32


Action 1: Move to system 30002780
Policy loss:   268.699524
Value loss:    339.621613
Entropy loss:  -13.4634752
Total loss:    -47.5416565
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002752
Policy loss:   -372.027557
Value loss:    596.795105
Entropy loss:  -13.1990919
Total loss:    213.065
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000134
Policy loss:   280.259857
Value loss:    357.920441
Entropy loss:  -13.4460106
Total loss:    -28.6624451
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002752
Policy loss:   -46.4087143
Value loss:    9.90032864
Entropy loss:  -13.732151
Total loss:    -387.091949
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000132
Policy loss:   -33.4102936
Value loss:    4.2299118
Entropy loss:  -14.2532778
Total loss:    -407.73114
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001407
Policy loss:   3.62989068
Value loss:    0.0574517213
Entropy loss:  -14.560379
Total loss:    -420.531952
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001365
Policy loss:   -1.27070701
Value loss:    0.00728883222
Entropy loss:  -14.521945
Total loss:    -419.504
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001386
Policy loss:   -1.3862797
Value loss:    0.00749582658
Entropy loss:  -14.6531696
Total loss:    -423.295288
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000129
Policy loss:   0.736946344
Value loss:    0.00203423086
Entropy loss:  -15.1256056
Total loss:    -436.934326
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001407
Policy loss:   -0.417139918
Value loss:    0.000812280341
Entropy loss:  -15.248311
Total loss:    -440.487793
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002754
Policy loss:   1.02338398
Value loss:    0.00424792944
Entropy loss:  -14.1323404
Total loss:    -408.237274
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001404
Policy loss:   -2.31504941
Value loss:    0.0246391632
Entropy loss:  -14.967164
Total loss:    -432.354767
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000144
Policy loss:   -0.623054087
Value loss:    0.00178903167
Entropy loss:  -13.7136173
Total loss:    -396.154663
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002754
Policy loss:   -0.0700062886
Value loss:    4.05986066e-05
Entropy loss:  -14.3694925
Total loss:    -415.099396
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002780
Policy loss:   -0.991176665
Value loss:    0.00450266246
Entropy loss:  -14.4180012
Total loss:    -416.502289
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000165
Policy loss:   1.14351225
Value loss:    0.00491100131
Entropy loss:  -14.594964
Total loss:    -421.599854
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30003488
Policy loss:   -1.18957663
Value loss:    0.00629848
Entropy loss:  -14.3482628
Total loss:    -414.487183
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000145
Policy loss:   -0.708588481
Value loss:    0.00218851957
Entropy loss:  -13.7672548
Total loss:    -397.704285
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001387
Policy loss:   0.0982926786
Value loss:    2.31240865e-05
Entropy loss:  -14.5571375
Total loss:    -420.51889
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001443
Policy loss:   -0.170951724
Value loss:    0.000196061679
Entropy loss:  -14.0743294
Total loss:    -406.573364
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001386
Policy loss:   -1.31634057
Value loss:    0.00741665717
Entropy loss:  -13.4177027
Total loss:    -387.605347
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001404
Policy loss:   0.771052182
Value loss:    0.00233615912
Entropy loss:  -14.145813
Total loss:    -408.630035
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001375
Policy loss:   -0.420017034
Value loss:    0.000953695795
Entropy loss:  -14.4835663
Total loss:    -418.396088
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002642
Policy loss:   -1.08713758
Value loss:    0.00603136141
Entropy loss:  -13.1983662
Total loss:    -381.269135
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002780
Policy loss:   -0.42528218
Value loss:    0.00106004905
Entropy loss:  -13.3278284
Total loss:    -385.009613
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000142
Policy loss:   -1.15101409
Value loss:    0.00595201831
Entropy loss:  -13.7276297
Total loss:    -396.558746
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000136
Policy loss:   -0.879743159
Value loss:    0.00329524
Entropy loss:  -13.7442427
Total loss:    -397.03952
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001376
Policy loss:   -1.08939826
Value loss:    0.00493457122
Entropy loss:  -14.864253
Total loss:    -429.393616
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000138
Policy loss:   -1.10897648
Value loss:    0.005809105
Entropy loss:  -13.7110538
Total loss:    -396.079773
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000143
Policy loss:   -0.086138241
Value loss:    0.000108523156
Entropy loss:  -14.0649681
Total loss:    -406.30249
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000145
Policy loss:   0.54953289
Value loss:    0.00120819907
Entropy loss:  -13.5222788
Total loss:    -390.620239
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001389
Policy loss:   -0.99494648
Value loss:    0.00482526375
Entropy loss:  -14.5019712
Total loss:    -418.927673
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001364
Policy loss:   0.130685568
Value loss:    4.39394535e-05
Entropy loss:  -14.0998659
Total loss:    -407.309204
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001389
Policy loss:   -0.407393396
Value loss:    0.000747914484
Entropy loss:  -13.5557537
Total loss:    -391.594
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30021407
Policy loss:   -1.26944089
Value loss:    0.00737636397
Entropy loss:  -13.6535902
Total loss:    -394.419281
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002780
Policy loss:   -1.17848325
Value loss:    0.00746894674
Entropy loss:  -13.3794546
Total loss:    -386.499512
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002804
Policy loss:   9.72561073
Value loss:    0.411946803
Entropy loss:  -14.3362617
Total loss:    -413.663239
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000143
Policy loss:   0.609845
Value loss:    0.00131777616
Entropy loss:  -14.1171904
Total loss:    -407.805267
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000141
Policy loss:   -2.98811865
Value loss:    0.035135068
Entropy loss:  -14.9001751
Total loss:    -430.413544
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30030141
Policy loss:   -3.03994489
Value loss:    0.0356716551
Entropy loss:  -14.0809212
Total loss:    -406.747131
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000119
Policy loss:   -0.268266499
Value loss:    0.000539858534
Entropy loss:  -14.3463068
Total loss:    -414.43042
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001362
Policy loss:   3.117167
Value loss:    0.0411485843
Entropy loss:  -13.966958
Total loss:    -403.409149
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30011407
Policy loss:   -1.38848448
Value loss:    0.0083549805
Entropy loss:  -14.299942
Total loss:    -413.090607
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001374
Policy loss:   -0.663226664
Value loss:    0.00241010217
Entropy loss:  -13.5583725
Total loss:    -391.669678
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002772
Policy loss:   0.742687643
Value loss:    0.00191821984
Entropy loss:  -13.7870836
Total loss:    -398.267822
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001368
Policy loss:   -1.44178045
Value loss:    0.00948460214
Entropy loss:  -14.4614992
Total loss:    -417.756775
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002798
Policy loss:   -0.857271135
Value loss:    0.00393706094
Entropy loss:  -14.0345821
Total loss:    -405.425934
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001368
Policy loss:   -1.22171772
Value loss:    0.00708715
Entropy loss:  -13.9619751
Total loss:    -403.327728
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002780
Policy loss:   0.509052694
Value loss:    0.00107984489
Entropy loss:  -12.6399832
Total loss:    -365.133301
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000134
Policy loss:   -3.37132573
Value loss:    0.0586484373
Entropy loss:  -14.0169678
Total loss:    -404.878876
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30021392
Policy loss:   -0.0746870935
Value loss:    0.000136199276
Entropy loss:  -14.2046251
Total loss:    -410.336731
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000121
Policy loss:   -0.612869442
Value loss:    0.00265635853
Entropy loss:  -14.1969252
Total loss:    -410.115295
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30041407
Policy loss:   -0.922932804
Value loss:    0.00416189712
Entropy loss:  -13.2361412
Total loss:    -382.361145
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002754
Policy loss:   -0.665109217
Value loss:    0.00193737436
Entropy loss:  -13.7505121
Total loss:    -397.220612
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000145
Policy loss:   1.43990409
Value loss:    0.00825586449
Entropy loss:  -13.1934347
Total loss:    -381.10788
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000132
Policy loss:   -0.747944832
Value loss:    0.00272569293
Entropy loss:  -12.4573584
Total loss:    -359.864349
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000143
Policy loss:   -1.87691915
Value loss:    0.0163653065
Entropy loss:  -13.4498501
Total loss:    -388.528717
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002804
Policy loss:   0.0270878673
Value loss:    3.92661e-05
Entropy loss:  -13.3619671
Total loss:    -385.993835
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000134
Policy loss:   -0.00305948965
Value loss:    6.18401755e-05
Entropy loss:  -13.4122715
Total loss:    -387.447205
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001379
Policy loss:   -8.22113895
Value loss:    0.294474244
Entropy loss:  -13.9321499
Total loss:    -402.224701
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000130
Policy loss:   -2.91584349
Value loss:    0.0324224196
Entropy loss:  -14.9693727
Total loss:    -432.414734
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000140
Policy loss:   -3.46126151
Value loss:    0.0545062385
Entropy loss:  -13.8947229
Total loss:    -401.352264
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002776
Policy loss:   -0.586425543
Value loss:    0.00130086741
Entropy loss:  -15.0198536
Total loss:    -433.888824
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000135
Policy loss:   4.30488253
Value loss:    0.069958441
Entropy loss:  -13.7624025
Total loss:    -397.46344
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000121
Policy loss:   -1.21632791
Value loss:    0.00655867299
Entropy loss:  -14.4298859
Total loss:    -416.845032
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002643
Policy loss:   -0.628804
Value loss:    0.00264455401
Entropy loss:  -14.033288
Total loss:    -405.388367
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000121
Policy loss:   0.514445186
Value loss:    0.000744698278
Entropy loss:  -15.2518988
Total loss:    -440.585388
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002753
Policy loss:   0.187718078
Value loss:    9.08132861e-05
Entropy loss:  -14.2385168
Total loss:    -411.314087
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000119
Policy loss:   1.41765785
Value loss:    0.0079258047
Entropy loss:  -12.6432991
Total loss:    -365.216278
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001362
Policy loss:   -0.709313333
Value loss:    0.00320881559
Entropy loss:  -12.8378611
Total loss:    -370.855377
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000139
Policy loss:   -0.604362547
Value loss:    0.00220260792
Entropy loss:  -13.3870335
Total loss:    -386.71994
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001407
Policy loss:   -0.763553083
Value loss:    0.00297730579
Entropy loss:  -13.1789455
Total loss:    -380.709076
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001363
Policy loss:   -0.724719763
Value loss:    0.00338559
Entropy loss:  -13.9199886
Total loss:    -402.115295
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000153
Policy loss:   -0.239469796
Value loss:    0.000669925415
Entropy loss:  -13.2564592
Total loss:    -382.947113
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000165
Policy loss:   -0.836006284
Value loss:    0.00410717167
Entropy loss:  -12.9775324
Total loss:    -374.890076
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000156
Policy loss:   -0.756098092
Value loss:    0.00291908556
Entropy loss:  -13.1101761
Total loss:    -378.722504
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000132
Policy loss:   -0.820042133
Value loss:    0.00371268368
Entropy loss:  -13.0588303
Total loss:    -377.238861
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000125
Policy loss:   -0.572777331
Value loss:    0.00219701929
Entropy loss:  -13.8961449
Total loss:    -401.426697
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002776
Policy loss:   -0.500703275
Value loss:    0.00206793519
Entropy loss:  -13.9037313
Total loss:    -401.645508
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000132
Policy loss:   -0.570413172
Value loss:    0.00272226962
Entropy loss:  -14.2526712
Total loss:    -411.725311
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000120
Policy loss:   -0.878234565
Value loss:    0.00423397962
Entropy loss:  -13.0352821
Total loss:    -376.558472
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000142
Policy loss:   -0.677882373
Value loss:    0.00253748451
Entropy loss:  -12.6495562
Total loss:    -365.416199
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002779
Policy loss:   -0.682268918
Value loss:    0.00279266946
Entropy loss:  -14.2619238
Total loss:    -411.993256
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000136
Policy loss:   -0.728439391
Value loss:    0.00273638056
Entropy loss:  -14.1240797
Total loss:    -408.011658
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001378
Policy loss:   -0.662353456
Value loss:    0.00322479848
Entropy loss:  -13.7950668
Total loss:    -398.506378
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002791
Policy loss:   -0.573468208
Value loss:    0.00227777194
Entropy loss:  -14.4417076
Total loss:    -417.186584
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002783
Policy loss:   -0.504250228
Value loss:    0.0019634706
Entropy loss:  -13.6495657
Total loss:    -394.303406
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30021407
Policy loss:   -0.684367418
Value loss:    0.00278272689
Entropy loss:  -13.3659143
Total loss:    -386.109802
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001393
Policy loss:   -0.541216612
Value loss:    0.00183296192
Entropy loss:  -13.8232117
Total loss:    -399.319977
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30021392
Policy loss:   -0.790100396
Value loss:    0.00409496576
Entropy loss:  -14.1435308
Total loss:    -408.572601
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001381
Policy loss:   -0.965493739
Value loss:    0.00550095318
Entropy loss:  -14.246727
Total loss:    -411.553406
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001364
Policy loss:   -0.997274816
Value loss:    0.00616658898
Entropy loss:  -13.8700113
Total loss:    -400.670563
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000143
Policy loss:   -0.715199
Value loss:    0.00369175035
Entropy loss:  -14.5552378
Total loss:    -420.465698
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001368
Policy loss:   -0.961096704
Value loss:    0.00511927763
Entropy loss:  -14.565134
Total loss:    -420.75177
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001368
Policy loss:   1.49534023
Value loss:    0.00860843156
Entropy loss:  -13.8098955
Total loss:    -398.915161
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001408
Policy loss:   -0.580077648
Value loss:    0.00205660774
Entropy loss:  -13.3587837
Total loss:    -385.903839
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001365
Policy loss:   -0.536820292
Value loss:    0.00269474019
Entropy loss:  -13.8312283
Total loss:    -399.55069
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000138
Policy loss:   -0.818780065
Value loss:    0.00348799024
Entropy loss:  -14.0912638
Total loss:    -407.063538
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000119
Policy loss:   -0.718065
Value loss:    0.00309367967
Entropy loss:  -13.549118
Total loss:    -391.402
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002775
Policy loss:   -0.202443704
Value loss:    0.000763459306
Entropy loss:  -14.6524754
Total loss:    -423.274231
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[I 2020-03-10 16:37:30,897] Finished trial#4 resulted in value: -0.0. Current best value is -0.0 with parameters: {'policy_weight': 0.0001475851713313501, 'entropy_weight': 1.572401677335221e-05, 'sgd_rate': 4.585360398716473e-08, 'bandwidth': 18, 'd_notes': 3, 'n_layers_start': 2, 'd_heads_start_0': 10, 'n_heads_start_0': 9, 'd_heads_start_1': 3, 'n_heads_start_1': 16, 'n_layers_step': 4, 'd_heads_step_0': 1, 'n_heads_step_0': 10, 'd_heads_step_1': 4, 'n_heads_step_1': 11, 'd_heads_step_2': 16, 'n_heads_step_2': 12, 'd_heads_step_3': 11, 'n_heads_step_3': 14}.


Policy weight:  1.29e-03
Entropy weight: 1.41e-03
Learning rate:  3.14e-03
n_layers_start: 3
n_layers_step: 3


HBox(children=(FloatProgress(value=0.0, description='Encoding systems', max=972.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Encoding types', max=4606.0, style=ProgressStyle(descript…


Bandwidth: 121


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

Trial info
<optuna.trial.Trial object at 0x00000193815AE7B8>
Beginning


HBox(children=(FloatProgress(value=0.0, description='Iterations', style=ProgressStyle(description_width='initi…

W0310 16:37:39.731671  4272 backprop.py:1017] The dtype of the source tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32
W0310 16:37:39.733671  4272 backprop.py:1017] The dtype of the source tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

W0310 16:37:41.704456  4272 backprop.py:1003] The dtype of the target tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32
W0310 16:37:41.706455  4272 backprop.py:1003] The dtype of the target tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32
W0310 16:37:41.707454  4272 backprop.py:1003] The dtype of the target tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32


Action 1: Buy 27903.9921875 of Arbitrator (628, 120000.0 m3)
Policy loss:   -60.91716
Value loss:    5.72035933
Entropy loss:  -18.3065033
Total loss:    5.61568165
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 0.024058837443590164 of Women's 'Function' T-shirt (cream)
Policy loss:   28362.2773
Value loss:    2225039.5
Entropy loss:  -13.6856546
Total loss:    2225076.25
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[W 2020-03-10 16:38:06,521] Setting status of trial#5 as TrialState.FAIL because of the following error: InvalidArgumentError()
Traceback (most recent call last):
  File "c:\program files\python36\lib\site-packages\optuna\study.py", line 648, in _run_trial
    result = func(trial)
  File "<ipython-input-15-6d0c2af9b8cd>", line 95, in opt_fun
    opt.minimize(loss, model.trainable_variables)
  File "c:\program files\python36\lib\site-packages\tensorflow_core\python\keras\optimizer_v2\optimizer_v2.py", line 316, in minimize
    loss, var_list=var_list, grad_loss=grad_loss)
  File "c:\program files\python36\lib\site-packages\tensorflow_core\python\keras\optimizer_v2\optimizer_v2.py", line 350, in _compute_gradients
    loss_value = loss()
  File "<ipython-input-15-6d0c2af9b8cd>", line 56, in loss
    result = unroller(state, 100)
  File "C:\Users\Knieps\Documents\repos\tf_playground\eveplan\evebox\tf\trading_gym.py", line 230, in run
    model_state, logp, value, sample = unroll_step(mode

Policy weight:  2.14e-01
Entropy weight: 3.04e+01
Learning rate:  3.21e-03
n_layers_start: 4
n_layers_step: 1


HBox(children=(FloatProgress(value=0.0, description='Encoding systems', max=972.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Encoding types', max=4606.0, style=ProgressStyle(descript…


Bandwidth: 55


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

Trial info
<optuna.trial.Trial object at 0x0000019381753E10>
Beginning


HBox(children=(FloatProgress(value=0.0, description='Iterations', style=ProgressStyle(description_width='initi…

W0310 16:38:15.512964  4272 backprop.py:1017] The dtype of the source tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32
W0310 16:38:15.514962  4272 backprop.py:1017] The dtype of the source tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

W0310 16:38:16.846143  4272 backprop.py:1003] The dtype of the target tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32
W0310 16:38:16.847142  4272 backprop.py:1003] The dtype of the target tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32
W0310 16:38:16.849140  4272 backprop.py:1003] The dtype of the target tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32


Action 1: Move to system 30002535
Policy loss:   29.7545872
Value loss:    8.75901127
Entropy loss:  -18.0002708
Total loss:    -531.594055
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 5 of 26929
Action 1: Buy 28.045330047607422 of Large Liquid Cooled Electronics II (26348, 20.0 m3)
Policy loss:   1077.79932
Value loss:    3207.61548
Entropy loss:  -16.0168037
Total loss:    2951.49756
Gain:          240.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 6 of 12620
Transferred 7 of 237
Transferred 10 of 11690
Transferred 2 of 9945
Transferred 6 of 16001
Transferred 30 of 41
Transferred 28 of 210
Action 1: Buy 33.53786849975586 of Huginn Blue Tiger SKIN (36822, 0.01 m3)
Policy loss:   -331625568
Value loss:    1.7064348e+14
Entropy loss:  -18.26717
Total loss:    1.70643413e+14
Gain:          989.3866499997675


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[W 2020-03-10 16:38:28,862] Setting status of trial#6 as TrialState.FAIL because of the following error: InvalidArgumentError()
Traceback (most recent call last):
  File "c:\program files\python36\lib\site-packages\optuna\study.py", line 648, in _run_trial
    result = func(trial)
  File "<ipython-input-15-6d0c2af9b8cd>", line 95, in opt_fun
    opt.minimize(loss, model.trainable_variables)
  File "c:\program files\python36\lib\site-packages\tensorflow_core\python\keras\optimizer_v2\optimizer_v2.py", line 316, in minimize
    loss, var_list=var_list, grad_loss=grad_loss)
  File "c:\program files\python36\lib\site-packages\tensorflow_core\python\keras\optimizer_v2\optimizer_v2.py", line 350, in _compute_gradients
    loss_value = loss()
  File "<ipython-input-15-6d0c2af9b8cd>", line 56, in loss
    result = unroller(state, 100)
  File "C:\Users\Knieps\Documents\repos\tf_playground\eveplan\evebox\tf\trading_gym.py", line 230, in run
    model_state, logp, value, sample = unroll_step(mode

Policy weight:  1.44e-04
Entropy weight: 5.69e-01
Learning rate:  8.16e-05
n_layers_start: 3
n_layers_step: 4


HBox(children=(FloatProgress(value=0.0, description='Encoding systems', max=972.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Encoding types', max=4606.0, style=ProgressStyle(descript…


Bandwidth: 117


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

Trial info
<optuna.trial.Trial object at 0x000001938138A0F0>
Beginning


HBox(children=(FloatProgress(value=0.0, description='Iterations', style=ProgressStyle(description_width='initi…

W0310 16:38:39.484241  4272 backprop.py:1017] The dtype of the source tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32
W0310 16:38:39.485240  4272 backprop.py:1017] The dtype of the source tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

W0310 16:38:41.640914  4272 backprop.py:1003] The dtype of the target tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32
W0310 16:38:41.642913  4272 backprop.py:1003] The dtype of the target tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32
W0310 16:38:41.643912  4272 backprop.py:1003] The dtype of the target tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32


Action 1: Buy 26797.51171875 of Pilgrim (11965, 120000.0 m3)
Policy loss:   -121.196754
Value loss:    21.5402718
Entropy loss:  -18.8720722
Total loss:    10.793644
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30004142
Policy loss:   208.284836
Value loss:    283.781982
Entropy loss:  -17.283556
Total loss:    273.985779
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002199
Policy loss:   -66.5849152
Value loss:    23.0351105
Entropy loss:  -17.3497963
Total loss:    13.1617727
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30003452
Policy loss:   7.67585373
Value loss:    0.549513698
Entropy loss:  -18.0033131
Total loss:    -9.68470478
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30004151
Policy loss:   2.96324873
Value loss:    0.0617494173
Entropy loss:  -16.9161472
Total loss:    -9.55506516
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30005310
Policy loss:   -0.392562628
Value loss:    0.000703119091
Entropy loss:  -17.0545559
Total loss:    -9.69528198
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000081
Policy loss:   0.00731826061
Value loss:    8.0043111e-07
Entropy loss:  -17.0638142
Total loss:    -9.70119
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002200
Policy loss:   -0.0245272759
Value loss:    2.97105885e-06
Entropy loss:  -18.2055168
Total loss:    -10.3502789
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30004989
Policy loss:   0.000364046427
Value loss:    3.2482518e-07
Entropy loss:  -17.5921402
Total loss:    -10.0015583
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002644
Policy loss:   -0.0409817696
Value loss:    6.73205e-06
Entropy loss:  -17.6756229
Total loss:    -10.0490189
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000085
Policy loss:   -0.00237917039
Value loss:    5.90941795e-07
Entropy loss:  -17.3804722
Total loss:    -9.88122
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30005314
Policy loss:   -0.00987610221
Value loss:    2.3425703e-07
Entropy loss:  -17.206213
Total loss:    -9.78215
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30004142
Policy loss:   2.38914871
Value loss:    0.0348493643
Entropy loss:  -16.7905884
Total loss:    -9.51066399
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002656
Policy loss:   1.29595351
Value loss:    0.00783549901
Entropy loss:  -17.5223885
Total loss:    -9.95388126
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30003388
Policy loss:   -0.425829142
Value loss:    0.000860998116
Entropy loss:  -17.418541
Total loss:    -9.90206242
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002519
Policy loss:   -0.258503258
Value loss:    0.000214726388
Entropy loss:  -16.1526604
Total loss:    -9.18300152
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001363
Policy loss:   0.163578317
Value loss:    0.000185910612
Entropy loss:  -16.5999928
Total loss:    -9.43728828
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002761
Policy loss:   -0.18090035
Value loss:    0.000135243768
Entropy loss:  -17.1466618
Total loss:    -9.74818325
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30004083
Policy loss:   0.011424683
Value loss:    1.11887648e-05
Entropy loss:  -16.9693584
Total loss:    -9.64747906
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30022505
Policy loss:   -6.65215874
Value loss:    0.297449023
Entropy loss:  -17.2246265
Total loss:    -9.49612331
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30003870
Policy loss:   -0.172313109
Value loss:    0.000144123609
Entropy loss:  -17.3531914
Total loss:    -9.86559105
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002545
Policy loss:   0.0324991122
Value loss:    1.81581636e-05
Entropy loss:  -17.2315807
Total loss:    -9.79654884
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002528
Policy loss:   -0.105579145
Value loss:    3.82944854e-05
Entropy loss:  -16.3331013
Total loss:    -9.28574085
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30003018
Policy loss:   -0.0392348506
Value loss:    2.98181726e-06
Entropy loss:  -16.3373547
Total loss:    -9.28818512
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30003517
Policy loss:   -0.053315334
Value loss:    4.83848771e-06
Entropy loss:  -16.9781399
Total loss:    -9.6524868
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000187
Policy loss:   -0.0238109455
Value loss:    1.80794018e-06
Entropy loss:  -16.7217255
Total loss:    -9.50670815
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001689
Policy loss:   -0.0988158584
Value loss:    2.05172564e-05
Entropy loss:  -16.5349407
Total loss:    -9.40050793
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30003438
Policy loss:   0.0175571032
Value loss:    2.57875763e-05
Entropy loss:  -17.2836838
Total loss:    -9.82616425
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002671
Policy loss:   -0.0718742162
Value loss:    1.0151427e-05
Entropy loss:  -16.7397404
Total loss:    -9.51694775
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30003532
Policy loss:   -0.0313942432
Value loss:    3.45893068e-06
Entropy loss:  -16.9791908
Total loss:    -9.65308285
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30004095
Policy loss:   -0.0599575676
Value loss:    6.74151579e-06
Entropy loss:  -16.5059853
Total loss:    -9.38405418
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30005198
Policy loss:   -0.0532007366
Value loss:    5.42551925e-06
Entropy loss:  -16.3490543
Total loss:    -9.29483604
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30003403
Policy loss:   -0.0524395369
Value loss:    5.00773103e-06
Entropy loss:  -16.9713631
Total loss:    -9.64863396
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30003050
Policy loss:   -0.0543082058
Value loss:    5.5456394e-06
Entropy loss:  -16.4587
Total loss:    -9.35717201
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000135
Policy loss:   -0.0551083311
Value loss:    5.56456e-06
Entropy loss:  -16.931776
Total loss:    -9.62612724
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30005052
Policy loss:   -0.0624421127
Value loss:    7.61809315e-06
Entropy loss:  -16.004631
Total loss:    -9.09902191
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30005317
Policy loss:   -0.0426752307
Value loss:    4.64750156e-06
Entropy loss:  -16.6627274
Total loss:    -9.47316647
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002751
Policy loss:   -0.0601098575
Value loss:    6.50981656e-06
Entropy loss:  -16.4696922
Total loss:    -9.36342144
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002792
Policy loss:   -0.0521331839
Value loss:    5.56294617e-06
Entropy loss:  -16.6703625
Total loss:    -9.47750664
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001372
Policy loss:   -0.0522831492
Value loss:    5.70275324e-06
Entropy loss:  -16.4499569
Total loss:    -9.35220146
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30003048
Policy loss:   -0.0618874729
Value loss:    7.04582135e-06
Entropy loss:  -16.419281
Total loss:    -9.33476162
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002527
Policy loss:   -0.0562948436
Value loss:    6.18809327e-06
Entropy loss:  -16.6475983
Total loss:    -9.46456528
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30003395
Policy loss:   -0.0645705611
Value loss:    6.95255267e-06
Entropy loss:  -17.2904015
Total loss:    -9.83001423
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002754
Policy loss:   -0.0482864603
Value loss:    5.48492244e-06
Entropy loss:  -16.903059
Total loss:    -9.6098
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30003392
Policy loss:   -0.0837330893
Value loss:    1.12256193e-05
Entropy loss:  -17.7574787
Total loss:    -10.0955582
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002682
Policy loss:   -0.0639329851
Value loss:    7.96470431e-06
Entropy loss:  -16.2226257
Total loss:    -9.22295666
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30003036
Policy loss:   -0.0601942129
Value loss:    7.3778715e-06
Entropy loss:  -17.1392841
Total loss:    -9.7441
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30005043
Policy loss:   -0.0716881603
Value loss:    9.85605129e-06
Entropy loss:  -16.158844
Total loss:    -9.18669415
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001413
Policy loss:   -0.0633743703
Value loss:    7.79083712e-06
Entropy loss:  -16.9942703
Total loss:    -9.66165543
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002747
Policy loss:   -0.0427953266
Value loss:    4.74355465e-06
Entropy loss:  -15.8086567
Total loss:    -8.98760509
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001376
Policy loss:   -0.0778307095
Value loss:    1.03446764e-05
Entropy loss:  -17.2510967
Total loss:    -9.80766773
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002383
Policy loss:   -0.0669027865
Value loss:    8.55850521e-06
Entropy loss:  -16.4946613
Total loss:    -9.37761593
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002658
Policy loss:   -0.0658539236
Value loss:    8.41898873e-06
Entropy loss:  -16.7568302
Total loss:    -9.52666473
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002198
Policy loss:   -0.0700798482
Value loss:    8.92400203e-06
Entropy loss:  -16.7800102
Total loss:    -9.53984356
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30005217
Policy loss:   -0.0686961785
Value loss:    8.90789124e-06
Entropy loss:  -17.1953487
Total loss:    -9.77597332
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002268
Policy loss:   -0.0648062602
Value loss:    8.45245904e-06
Entropy loss:  -16.4736137
Total loss:    -9.36565
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30003395
Policy loss:   -0.0716121197
Value loss:    9.13235453e-06
Entropy loss:  -17.3053703
Total loss:    -9.83852386
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30005036
Policy loss:   -0.0296773538
Value loss:    5.93502864e-06
Entropy loss:  -16.0024643
Total loss:    -9.0977869
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30005332
Policy loss:   -0.0901817
Value loss:    1.52972116e-05
Entropy loss:  -16.6120319
Total loss:    -9.44434071
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30005308
Policy loss:   -0.0433340222
Value loss:    5.37366213e-06
Entropy loss:  -16.3961258
Total loss:    -9.32159615
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001665
Policy loss:   -0.0892249197
Value loss:    1.53611236e-05
Entropy loss:  -16.1441822
Total loss:    -9.17835617
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30005325
Policy loss:   -0.0382924415
Value loss:    6.66330288e-06
Entropy loss:  -16.1807384
Total loss:    -9.1991415
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30044971
Policy loss:   -0.0890274048
Value loss:    1.49172447e-05
Entropy loss:  -16.4188843
Total loss:    -9.33453178
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002778
Policy loss:   -0.040685676
Value loss:    8.85069767e-06
Entropy loss:  -16.3488884
Total loss:    -9.29473686
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001362
Policy loss:   -0.118094116
Value loss:    2.94926267e-05
Entropy loss:  -16.0470676
Total loss:    -9.12313461
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002791
Policy loss:   0.0183297433
Value loss:    6.80724334e-05
Entropy loss:  -16.7969646
Total loss:    -9.54941082
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002648
Policy loss:   -0.139542967
Value loss:    4.02391888e-05
Entropy loss:  -17.4382095
Total loss:    -9.91402435
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002265
Policy loss:   -0.0434399247
Value loss:    1.64292433e-05
Entropy loss:  -16.7885494
Total loss:    -9.54468727
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30003522
Policy loss:   -0.0653595179
Value loss:    1.02314425e-05
Entropy loss:  -16.2639923
Total loss:    -9.24647331
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001695
Policy loss:   -0.0829216465
Value loss:    1.28998381e-05
Entropy loss:  -16.5824451
Total loss:    -9.42752075
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002643
Policy loss:   -0.0802356
Value loss:    1.20263358e-05
Entropy loss:  -16.9074097
Total loss:    -9.61227131
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30003871
Policy loss:   -0.0533245429
Value loss:    6.20292258e-06
Entropy loss:  -16.2208519
Total loss:    -9.22195
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002776
Policy loss:   -0.0492358804
Value loss:    9.06952937e-06
Entropy loss:  -16.4573288
Total loss:    -9.35638809
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30004090
Policy loss:   -0.104433127
Value loss:    2.06878685e-05
Entropy loss:  -16.5051651
Total loss:    -9.38358116
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000141
Policy loss:   -0.0784346
Value loss:    1.20074274e-05
Entropy loss:  -16.8111267
Total loss:    -9.55753231
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30003492
Policy loss:   -0.0825652629
Value loss:    1.32251571e-05
Entropy loss:  -16.7504902
Total loss:    -9.52305889
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002248
Policy loss:   -0.0906460583
Value loss:    1.38675987e-05
Entropy loss:  -17.4849033
Total loss:    -9.94059086
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000077
Policy loss:   -0.0884656459
Value loss:    1.45209124e-05
Entropy loss:  -16.9135475
Total loss:    -9.61576
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30003024
Policy loss:   -0.0824552402
Value loss:    1.39322201e-05
Entropy loss:  -16.0553513
Total loss:    -9.12785435
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30005304
Policy loss:   -0.0650869533
Value loss:    8.90004776e-06
Entropy loss:  -17.0578499
Total loss:    -9.69780159
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30003055
Policy loss:   -0.0911592692
Value loss:    1.53184174e-05
Entropy loss:  -16.6706448
Total loss:    -9.47766399
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30004134
Policy loss:   -12.2835569
Value loss:    0.32834059
Entropy loss:  -17.1465168
Total loss:    -9.42163372
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002766
Policy loss:   31.9381676
Value loss:    3.66249251
Entropy loss:  -16.8465405
Total loss:    -5.91058922
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001387
Policy loss:   -24.3618412
Value loss:    4.09768677
Entropy loss:  -16.5713501
Total loss:    -5.32702446
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30003492
Policy loss:   1.47470379
Value loss:    0.00919380598
Entropy loss:  -16.5598888
Total loss:    -9.40529346
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30003384
Policy loss:   -1.69399941
Value loss:    0.0146932071
Entropy loss:  -15.8054285
Total loss:    -8.9713192
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002231
Policy loss:   0.794026256
Value loss:    0.00242136954
Entropy loss:  -16.3055458
Total loss:    -9.26756191
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30003504
Policy loss:   -1.39207649
Value loss:    0.00836288556
Entropy loss:  -16.5784187
Total loss:    -9.41706944
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000139
Policy loss:   2.41444969
Value loss:    0.0191893671
Entropy loss:  -16.7469234
Total loss:    -9.50149632
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30004968
Policy loss:   -3.16824317
Value loss:    0.0586292595
Entropy loss:  -16.5588
Total loss:    -9.35590458
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000120
Policy loss:   1.96875834
Value loss:    0.0257358644
Entropy loss:  -17.2531605
Total loss:    -9.78282166
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001379
Policy loss:   -1.56369674
Value loss:    0.0108419927
Entropy loss:  -16.3764935
Total loss:    -9.29981613
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000158
Policy loss:   2.58302331
Value loss:    0.0325317718
Entropy loss:  -16.1158085
Total loss:    -9.12932491
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000137
Policy loss:   -6.03412104
Value loss:    0.114581503
Entropy loss:  -16.8157711
Total loss:    -9.44645786
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001377
Policy loss:   11.3544464
Value loss:    0.567617
Entropy loss:  -16.3869343
Total loss:    -8.74712181
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30043410
Policy loss:   -6.74339628
Value loss:    0.378175
Entropy loss:  -16.6785145
Total loss:    -9.10493279
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002662
Policy loss:   -3.92972541
Value loss:    0.0796992
Entropy loss:  -16.9903889
Total loss:    -9.58031273
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30003389
Policy loss:   0.274999976
Value loss:    0.000676520634
Entropy loss:  -16.5841274
Total loss:    -9.42776203
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30005192
Policy loss:   -0.10155309
Value loss:    1.8586019e-05
Entropy loss:  -17.0520515
Total loss:    -9.69450092
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30005024
Policy loss:   3.88777
Value loss:    0.0767124742
Entropy loss:  -16.9890366
Total loss:    -9.5814085
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[I 2020-03-10 16:48:11,316] Finished trial#7 resulted in value: -0.0. Current best value is -0.0 with parameters: {'policy_weight': 0.0001475851713313501, 'entropy_weight': 1.572401677335221e-05, 'sgd_rate': 4.585360398716473e-08, 'bandwidth': 18, 'd_notes': 3, 'n_layers_start': 2, 'd_heads_start_0': 10, 'n_heads_start_0': 9, 'd_heads_start_1': 3, 'n_heads_start_1': 16, 'n_layers_step': 4, 'd_heads_step_0': 1, 'n_heads_step_0': 10, 'd_heads_step_1': 4, 'n_heads_step_1': 11, 'd_heads_step_2': 16, 'n_heads_step_2': 12, 'd_heads_step_3': 11, 'n_heads_step_3': 14}.


Policy weight:  1.85e-06
Entropy weight: 2.01e+00
Learning rate:  6.47e-05
n_layers_start: 3
n_layers_step: 7


HBox(children=(FloatProgress(value=0.0, description='Encoding systems', max=972.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Encoding types', max=4606.0, style=ProgressStyle(descript…


Bandwidth: 114


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

Trial info
<optuna.trial.Trial object at 0x00000193C310D7F0>
Beginning


HBox(children=(FloatProgress(value=0.0, description='Iterations', style=ProgressStyle(description_width='initi…

W0310 16:48:24.218657  4272 backprop.py:1017] The dtype of the source tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32
W0310 16:48:24.219656  4272 backprop.py:1017] The dtype of the source tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

W0310 16:48:27.450415  4272 backprop.py:1003] The dtype of the target tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32
W0310 16:48:27.451415  4272 backprop.py:1003] The dtype of the target tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32
W0310 16:48:27.453413  4272 backprop.py:1003] The dtype of the target tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32


Action 1: Sell 1.4458706378936768 of Inherent Implants 'Squire' Capacitor Management EM-801
Policy loss:   -2035.86658
Value loss:    8773.4043
Entropy loss:  -15.6963415
Total loss:    8741.80371
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 1.0624477863311768 of Maller Ironblood SKIN
Policy loss:   12769557
Value loss:    3.54866954e+11
Entropy loss:  -15.4335899
Total loss:    3.54866954e+11
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[W 2020-03-10 16:49:27,799] Setting status of trial#8 as TrialState.FAIL because of the following error: InvalidArgumentError()
Traceback (most recent call last):
  File "c:\program files\python36\lib\site-packages\optuna\study.py", line 648, in _run_trial
    result = func(trial)
  File "<ipython-input-15-6d0c2af9b8cd>", line 101, in opt_fun
    trial.report(-performance(n = 5), counter)
  File "<ipython-input-15-6d0c2af9b8cd>", line 82, in performance
    return sum([single_run() for i in range(n)]) / n
  File "<ipython-input-15-6d0c2af9b8cd>", line 82, in <listcomp>
    return sum([single_run() for i in range(n)]) / n
  File "<ipython-input-15-6d0c2af9b8cd>", line 79, in single_run
    result = unroller(state, 100)
  File "C:\Users\Knieps\Documents\repos\tf_playground\eveplan\evebox\tf\trading_gym.py", line 230, in run
    model_state, logp, value, sample = unroll_step(model_state, model_input)
  File "c:\program files\python36\lib\site-packages\tensorflow_core\python\eager\def_func

Policy weight:  1.96e-03
Entropy weight: 1.99e-02
Learning rate:  4.21e-04
n_layers_start: 2
n_layers_step: 6


HBox(children=(FloatProgress(value=0.0, description='Encoding systems', max=972.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Encoding types', max=4606.0, style=ProgressStyle(descript…


Bandwidth: 120


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

Trial info
<optuna.trial.Trial object at 0x0000019363AFCB38>
Beginning


HBox(children=(FloatProgress(value=0.0, description='Iterations', style=ProgressStyle(description_width='initi…

W0310 16:49:39.197931  4272 backprop.py:1017] The dtype of the source tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32
W0310 16:49:39.198930  4272 backprop.py:1017] The dtype of the source tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

W0310 16:49:41.785338  4272 backprop.py:1003] The dtype of the target tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32
W0310 16:49:41.787346  4272 backprop.py:1003] The dtype of the target tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32
W0310 16:49:41.788336  4272 backprop.py:1003] The dtype of the target tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32


Action 1: Move to system 30000063
Policy loss:   -249.757568
Value loss:    239.015488
Entropy loss:  -12.5161839
Total loss:    238.276016
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002755
Policy loss:   8771.41
Value loss:    452235.375
Entropy loss:  -16.3071594
Total loss:    452252.281
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[W 2020-03-10 16:50:15,569] Setting status of trial#9 as TrialState.FAIL because of the following error: InvalidArgumentError()
Traceback (most recent call last):
  File "c:\program files\python36\lib\site-packages\optuna\study.py", line 648, in _run_trial
    result = func(trial)
  File "<ipython-input-15-6d0c2af9b8cd>", line 95, in opt_fun
    opt.minimize(loss, model.trainable_variables)
  File "c:\program files\python36\lib\site-packages\tensorflow_core\python\keras\optimizer_v2\optimizer_v2.py", line 316, in minimize
    loss, var_list=var_list, grad_loss=grad_loss)
  File "c:\program files\python36\lib\site-packages\tensorflow_core\python\keras\optimizer_v2\optimizer_v2.py", line 350, in _compute_gradients
    loss_value = loss()
  File "<ipython-input-15-6d0c2af9b8cd>", line 56, in loss
    result = unroller(state, 100)
  File "C:\Users\Knieps\Documents\repos\tf_playground\eveplan\evebox\tf\trading_gym.py", line 230, in run
    model_state, logp, value, sample = unroll_step(mode

Policy weight:  3.86e-01
Entropy weight: 6.98e-03
Learning rate:  2.34e-06
n_layers_start: 4
n_layers_step: 4


HBox(children=(FloatProgress(value=0.0, description='Encoding systems', max=972.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Encoding types', max=4606.0, style=ProgressStyle(descript…


Bandwidth: 94


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

Trial info
<optuna.trial.Trial object at 0x00000193555B03C8>
Beginning


HBox(children=(FloatProgress(value=0.0, description='Iterations', style=ProgressStyle(description_width='initi…

W0310 16:50:27.786755  4272 backprop.py:1017] The dtype of the source tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32
W0310 16:50:27.787755  4272 backprop.py:1017] The dtype of the source tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

W0310 16:50:30.162292  4272 backprop.py:1003] The dtype of the target tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32
W0310 16:50:30.164291  4272 backprop.py:1003] The dtype of the target tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32
W0310 16:50:30.165291  4272 backprop.py:1003] The dtype of the target tensor must be floating (e.g. tf.float32) when calling GradientTape.gradient, got tf.int32


Action 1: Buy 0.010377433151006699 of Raptor Matigu Seabeast SKIN (40591, 0.01 m3)
Policy loss:   -111.415482
Value loss:    41.9639816
Entropy loss:  -12.4066029
Total loss:    -1.16550696
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.018007252365350723 of Redeemer Cold Iron SKIN (42787, 0.01 m3)
Policy loss:   -80.9006882
Value loss:    20.4707737
Entropy loss:  -12.8053274
Total loss:    -10.8727732
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.04863159731030464 of Phased Plasma M (192, 0.0125 m3)
Policy loss:   -69.3059921
Value loss:    14.9317474
Entropy loss:  -12.862957
Total loss:    -11.9328451
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.05047941952943802 of Small Focused Pulse Laser I Blueprint (837, 0.01 m3)
Policy loss:   -64.5737762
Value loss:    13.0895376
Entropy loss:  -12.8798876
Total loss:    -11.9469852
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.11981619894504547 of Men's 'Precision' Boots (3966, 0.1 m3)
Policy loss:   -62.9215508
Value loss:    12.3419895
Entropy loss:  -12.8371983
Total loss:    -12.055934
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 2 of 11533


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 3687


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.0020043887197971344 of Sensor Optimization Charge (42835, 0.01 m3)
Policy loss:   -62.9647636
Value loss:    12.1121588
Entropy loss:  -13.0196152
Total loss:    -12.3037319
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.021507512778043747 of Gallente 10M Bounty Reimbursement Tag (33606, 0.01 m3)
Policy loss:   -61.3077736
Value loss:    12.1746922
Entropy loss:  -12.7999134
Total loss:    -11.5995235
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.11571341753005981 of Khanid Warbird Insignia (16187, 0.1 m3)
Policy loss:   -61.066124
Value loss:    11.7693167
Entropy loss:  -12.8538733
Total loss:    -11.9119205
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.03708565980195999 of Damnation Purity of the Throne SKIN (42582, 0.01 m3)
Policy loss:   -62.2864342
Value loss:    11.8023357
Entropy loss:  -13.0824547
Total loss:    -12.3519354
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 245
Action 1: Buy 0.023159215226769447 of Nova Light Missile Blueprint (814, 0.01 m3)
Policy loss:   -40.4113312
Value loss:    5.86034298
Entropy loss:  -12.8758678
Total loss:    -9.8415184
Gain:          3.799990000203252


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 3685


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 11695


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.01611940935254097 of Large Beam Laser Specialization (12205, 0.01 m3)
Policy loss:   -78.4350586
Value loss:    18.628561
Entropy loss:  -13.0369053
Total loss:    -11.76406
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.6003318428993225 of Zainou 'Gypsy' Weapon Disruption WD-905 (27234, 1.0 m3)
Policy loss:   -68.3458939
Value loss:    14.586915
Entropy loss:  -12.9064369
Total loss:    -11.9070683
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.013409238308668137 of Arbalest Compact XL Torpedo Launcher Blueprint (41183, 0.01 m3)
Policy loss:   -64.6595
Value loss:    12.93048
Entropy loss:  -12.923295
Total loss:    -12.1394625
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.0357951819896698 of Standup AXL-SR Missile Blueprint (37854, 0.01 m3)
Policy loss:   -63.1445313
Value loss:    12.3939095
Entropy loss:  -12.8971825
Total loss:    -12.0905752
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 12826
Transferred 2 of 12826
Action 1: Buy 2.0293266773223877 of Focused Warp Disruption Script (29003, 1.0 m3)
Policy loss:   1531.08521
Value loss:    60596
Entropy loss:  -13.6743937
Total loss:    61187.4063
Gain:          764.9849699996412


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.04112976789474487 of Guristas Mjolnir Torpedo (27337, 0.05 m3)
Policy loss:   -1201.78064
Value loss:    5029.98633
Entropy loss:  -12.2607393
Total loss:    4565.61914
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.04655760899186134 of Capital Radar Sensor Cluster Blueprint (29096, 0.01 m3)
Policy loss:   -533.321838
Value loss:    846.990906
Entropy loss:  -13.2122135
Total loss:    640.861511
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.11202387511730194 of Sin Spirit SKIN (44932, 0.01 m3)
Policy loss:   -366.753784
Value loss:    389.114868
Entropy loss:  -13.3735647
Total loss:    247.334259
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 28999
Transferred 139 of 28999
Action 1: Buy 1.6377997398376465 of Zainou 'Beancounter' Research RR-605 (27179, 1.0 m3)
Policy loss:   661.90509
Value loss:    33511.1367
Entropy loss:  -13.2880993
Total loss:    33766.7539
Gain:          994.1820000000298


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 3673
Action 1: Buy 0.08628718554973602 of Rodiva Red Forests Thunderbird SKIN (52443, 0.01 m3)
Policy loss:   -581.491516
Value loss:    991.584534
Entropy loss:  -13.2667198
Total loss:    766.845459
Gain:          0.10999999940395355


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 11691
Transferred 27 of 11691


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 3685
Transferred 2 of 249
Transferred 141 of 249


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.1108083724975586 of R.A.M.- Energy Tech (11482, 0.04 m3)
Policy loss:   -396.196259
Value loss:    462.848846
Entropy loss:  -13.2597647
Total loss:    309.69458
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 2.572943687438965 of Pure Synth Exile Booster (28689, 1.0 m3)
Policy loss:   -297.236847
Value loss:    253.804169
Entropy loss:  -13.4415026
Total loss:    138.879425
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 11691
Transferred 27 of 11691
Action 1: Buy 0.04474788159132004 of Thorax Police SKIN (52435, 0.01 m3)
Policy loss:   241.317886
Value loss:    18383.5762
Entropy loss:  -13.1379261
Total loss:    18476.7129
Gain:          977.1720000002533


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 44
Transferred 71 of 44
Action 1: Buy 0.291253924369812 of Republic Fleet Raid Leader Insignia (15663, 0.1 m3)
Policy loss:   261.486176
Value loss:    18564.1777
Entropy loss:  -13.5878429
Total loss:    18665.1016
Gain:          992.3731199987233


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.1387087106704712 of Men's 'Street' Shirt (urban camo) (4198, 0.1 m3)
Policy loss:   -556.353394
Value loss:    884.831177
Entropy loss:  -13.4272032
Total loss:    669.802551
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 11554


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 2 of 3777
Transferred 8 of 3777


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 12828
Transferred 8 of 4247
Action 1: Buy 0.11101614683866501 of Optimal Range Disruption Script Blueprint (29006, 0.01 m3)
Policy loss:   5769.20313
Value loss:    114026.961
Entropy loss:  -13.6532669
Total loss:    116255.672
Gain:          598.0809400007129


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.046987518668174744 of 1200mm Artillery Cannon I Blueprint (832, 0.01 m3)
Policy loss:   -2898.52466
Value loss:    27847.5508
Entropy loss:  -12.5301867
Total loss:    26727.6816
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.14453938603401184 of Mining Connections (3893, 0.01 m3)
Policy loss:   -447.932892
Value loss:    565.464172
Entropy loss:  -13.6235971
Total loss:    392.320068
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 12818
Transferred 1 of 12563
Action 1: Buy 0.5060168504714966 of Scourge Rage XL Torpedo (41271, 0.3 m3)
Policy loss:   2001.01331
Value loss:    12887.6641
Entropy loss:  -13.6063814
Total loss:    13660.6172
Gain:          199.69621999934316


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.01513778418302536 of Men's 'Form' T-shirt (brown) (4182, 0.1 m3)
Policy loss:   -786.736206
Value loss:    1751.12024
Entropy loss:  -13.5950394
Total loss:    1447.08704
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 28999
Transferred 1 of 3687
Transferred 1 of 47975


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 28999


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 3689


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.13428832590579987 of Vargur In Rust We Trust SKIN (52432, 0.01 m3)
Policy loss:   -608.633301
Value loss:    1022.37213
Entropy loss:  -13.6841908
Total loss:    787.144531
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 3715
Action 1: Buy 0.003927897661924362 of Medium Graviton Smartbomb I Blueprint (3942, 0.01 m3)
Policy loss:   -492.313599
Value loss:    694.062744
Entropy loss:  -13.4874716
Total loss:    503.774078
Gain:          0.094930000603199


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 3713
Action 1: Buy 0.005611300468444824 of Women's 'Function' T-shirt (green) (4208, 0.1 m3)
Policy loss:   -413.59848
Value loss:    455.36087
Entropy loss:  -13.8353653
Total loss:    295.479614
Gain:          9.697000000625849


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 11690
Action 1: Buy 0.12324155867099762 of Hound Blueprint (12035, 0.01 m3)
Policy loss:   -191.536545
Value loss:    169.621094
Entropy loss:  -13.6872978
Total loss:    95.5296478
Gain:          33.08696999959648


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 16272
Action 1: Buy 1.1554620265960693 of Heavy Water (16272, 0.4 m3)
Policy loss:   -353.676422
Value loss:    352.517639
Entropy loss:  -13.5349541
Total loss:    215.788071
Gain:          0.1499799992889166


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.03821854665875435 of Scimitar Blue Tiger SKIN (36821, 0.01 m3)
Policy loss:   -307.981323
Value loss:    269.356567
Entropy loss:  -13.4997025
Total loss:    150.280548
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.14784806966781616 of Loki In Rust We Trust SKIN (52428, 0.01 m3)
Policy loss:   -271.163086
Value loss:    209.109085
Entropy loss:  -13.4895315
Total loss:    104.25705
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.24679267406463623 of Minmatar Freedom Fighter Insignia I (15674, 0.1 m3)
Policy loss:   -246.712799
Value loss:    166.754837
Entropy loss:  -13.7686586
Total loss:    71.3466873
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.1406552940607071 of Women's 'Vise' Cybernetic Arm (white and gray ringed right) (34037, 0.1 m3)
Policy loss:   -221.75444
Value loss:    134.695251
Entropy loss:  -13.7257051
Total loss:    48.9295197
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.5255944728851318 of Women's 'Impress' Skirt (silver) (4074, 0.1 m3)
Policy loss:   -192.278549
Value loss:    105.490875
Entropy loss:  -14.0035067
Total loss:    31.1105652
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 29009


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 12563
Transferred 25 of 12563


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 2 of 3673
Action 1: Buy 0.1267314851284027 of Anshar Spirit SKIN (44948, 0.01 m3)
Policy loss:   -184.732666
Value loss:    92.5758133
Entropy loss:  -13.8135195
Total loss:    21.1120205
Gain:          0.22000000067055225


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.06056595593690872 of Federation Navy Sergeant Major Insignia I (15591, 0.1 m3)
Policy loss:   -172.419571
Value loss:    78.1432114
Entropy loss:  -13.9884243
Total loss:    11.4350948
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 3 of 44
Transferred 69 of 44
Action 1: Buy 3.8438425064086914 of Enriched Uranium (44, 1.5 m3)
Policy loss:   69.9404
Value loss:    9433.7832
Entropy loss:  -13.6465273
Total loss:    9460.70801
Gain:          992.3731199987233


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.09361684322357178 of Nereus Serpentis SKIN (42195, 0.01 m3)
Policy loss:   -180.265884
Value loss:    90.3280792
Entropy loss:  -13.689661
Total loss:    20.5907898
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.07560356706380844 of Harpy Blueprint (11382, 0.01 m3)
Policy loss:   -166.036118
Value loss:    75.8651123
Entropy loss:  -13.6936808
Total loss:    11.6251545
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 2 of 9840


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.03337707743048668 of Prophecy Blood Raiders SKIN (30 Days) (35434, 0.01 m3)
Policy loss:   -153.446411
Value loss:    64.6165
Entropy loss:  -13.7718382
Total loss:    5.23975563
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.19042092561721802 of Silver Ouroboros (42230, 0.1 m3)
Policy loss:   -142.087418
Value loss:    55.9557037
Entropy loss:  -13.6727819
Total loss:    0.967947841
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 3824
Action 1: Buy 0.1599520891904831 of Arazu Spirit SKIN (44918, 0.01 m3)
Policy loss:   -127.528084
Value loss:    46.7837715
Entropy loss:  -13.8882017
Total loss:    -2.58080387
Gain:          0.0989999994635582


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.03144529461860657 of Men's 'Mirelle' T-Shirt (Upwell) (40526, 0.1 m3)
Policy loss:   -90.378006
Value loss:    31.8181973
Entropy loss:  -13.9675598
Total loss:    -3.1948123
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 2 of 29005
Action 1: Buy 0.8557557463645935 of Toxic Waste (3729, 1.0 m3)
Policy loss:   -19.1098461
Value loss:    43.1759605
Entropy loss:  -14.2241898
Total loss:    35.6940536
Gain:          21.967799998819828


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 12 of 10246


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 11533
Transferred 22 of 11533


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.03692999482154846 of High Energy Physics (11433, 0.01 m3)
Policy loss:   -118.252014
Value loss:    41.2927551
Entropy loss:  -13.9196482
Total loss:    -4.48843288
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 1.7336647510528564 of Inherent Implants 'Noble' Repair Proficiency RP-901 (27073, 1.0 m3)
Policy loss:   -124.686836
Value loss:    40.921505
Entropy loss:  -14.0163755
Total loss:    -7.34631205
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.14757844805717468 of Basilisk Blueprint (11986, 0.01 m3)
Policy loss:   -72.112236
Value loss:    20.6535873
Entropy loss:  -14.4920292
Total loss:    -7.30650139
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.2072766125202179 of R.A.M.- Electronics (11483, 0.04 m3)
Policy loss:   -100.479622
Value loss:    29.8427
Entropy loss:  -14.2859373
Total loss:    -9.07507
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.040087148547172546 of Republic Fleet Commander Insignia II (15661, 0.1 m3)
Policy loss:   -70.1028595
Value loss:    16.0999393
Entropy loss:  -14.0802794
Total loss:    -11.0809975
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 12563
Transferred 25 of 12563
Transferred 1 of 245


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.0033600330352783203 of Logistics Frigates (40328, 0.01 m3)
Policy loss:   -80.9001694
Value loss:    25.5064526
Entropy loss:  -14.1209269
Total loss:    -5.84607267
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.11699183285236359 of Stabber Tronhadar Ink SKIN (40487, 0.01 m3)
Policy loss:   -104.409256
Value loss:    29.1350632
Entropy loss:  -13.9230118
Total loss:    -11.2983017
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.15794025361537933 of Mastodon Blue Tiger SKIN (36842, 0.01 m3)
Policy loss:   -58.0440254
Value loss:    15.2799
Entropy loss:  -14.3754845
Total loss:    -7.24443
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.2967083752155304 of Nuclear Physics (11451, 0.01 m3)
Policy loss:   -87.5709457
Value loss:    26.1225681
Entropy loss:  -13.9010601
Total loss:    -7.8055315
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 2 of 11540
Transferred 1 of 3713
Action 1: Buy 0.10977594554424286 of Sansha Claws Fairytale Book (37878, 0.01 m3)
Policy loss:   116.604927
Value loss:    1124.43359
Entropy loss:  -14.1673536
Total loss:    1169.38245
Gain:          155.64690000005066


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 244


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 11541


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.08716732263565063 of Gallente Starship Engineering (11450, 0.01 m3)
Policy loss:   -125.997665
Value loss:    42.8610229
Entropy loss:  -13.8705845
Total loss:    -5.91218567
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.03499988466501236 of R.Db - Kaalakiota (11464, 0.04 m3)
Policy loss:   -116.640656
Value loss:    38.1460304
Entropy loss:  -13.6088161
Total loss:    -7.01047611
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.05485159531235695 of Aeon Ironblood SKIN (43522, 0.01 m3)
Policy loss:   -102.408882
Value loss:    30.6546364
Entropy loss:  -14.1370668
Total loss:    -9.00742149
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.17016717791557312 of Cheetah Tronhadar Ink SKIN (40475, 0.01 m3)
Policy loss:   -88.7305069
Value loss:    23.9549522
Entropy loss:  -14.2154369
Total loss:    -10.4233122
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.19383877515792847 of Moa Blueprint (968, 0.01 m3)
Policy loss:   -95.9970932
Value loss:    27.8754215
Entropy loss:  -14.2969017
Total loss:    -9.3106966
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 12828


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 11541


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.2782896161079407 of Angel Silver Tag (12528, 0.1 m3)
Policy loss:   -94.9860916
Value loss:    25.7505741
Entropy loss:  -13.9725218
Total loss:    -11.0427055
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.10883078724145889 of Null M (12785, 0.0125 m3)
Policy loss:   -94.8106384
Value loss:    25.3035069
Entropy loss:  -13.6382523
Total loss:    -11.4196577
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 28999
Action 1: Buy 0.012621350586414337 of Medium Energy Nosferatu I Blueprint (12258, 0.01 m3)
Policy loss:   -24.0384827
Value loss:    3.97309852
Entropy loss:  -13.9973173
Total loss:    -5.41129494
Gain:          7.101299999281764


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 2 of 7247
Action 1: Buy 0.11915546655654907 of Federation Navy Fleet Colonel Insignia II (15673, 0.1 m3)
Policy loss:   1848.54565
Value loss:    29681.2676
Entropy loss:  -13.9164391
Total loss:    30395.3145
Gain:          436.9999899994582


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 41155
Transferred 1 of 12826
Action 1: Buy 0.18325607478618622 of DNA Sample (13288, 0.1 m3)
Policy loss:   3046.51733
Value loss:    25295.8965
Entropy loss:  -13.889595
Total loss:    26472.7539
Gain:          264.68998999893665


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 2 of 16275
Action 1: Buy 2.915029287338257 of Strontium Clathrates (16275, 3.0 m3)
Policy loss:   -884.817444
Value loss:    2136.47607
Entropy loss:  -13.7483158
Total loss:    1794.55029
Gain:          5.699980000033975


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.05540504306554794 of Anode Scoped Triple Neutron Blaster Cannon Blueprint (41088, 0.01 m3)
Policy loss:   -704.882813
Value loss:    1369.6731
Entropy loss:  -13.7434664
Total loss:    1097.26123
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.1897854059934616 of Nocxium (38, 0.01 m3)
Policy loss:   -596.218384
Value loss:    961.10968
Entropy loss:  -13.8229561
Total loss:    730.677307
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.2773190140724182 of Agitated Firestorm Filament (47896, 0.1 m3)
Policy loss:   -515.1745
Value loss:    712.00531
Entropy loss:  -13.862793
Total loss:    512.882141
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.02850949764251709 of Women's 'Quafe' T-shirt YC 113 (4066, 0.1 m3)
Policy loss:   -447.763214
Value loss:    546.049622
Entropy loss:  -13.8330593
Total loss:    372.969604
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 245


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.11537893116474152 of Naglfar In Rust We Trust SKIN (52433, 0.01 m3)
Policy loss:   -402.391052
Value loss:    427.525452
Entropy loss:  -14.028286
Total loss:    271.972595
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.0241561122238636 of Anode Scoped Dual Giga Beam Laser Blueprint (41122, 0.01 m3)
Policy loss:   -352.979492
Value loss:    340.733307
Entropy loss:  -13.7627439
Total loss:    204.271393
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.7189099788665771 of Focused Warp Disruption Script (29003, 1.0 m3)
Policy loss:   -313.763641
Value loss:    271.098938
Entropy loss:  -13.6915731
Total loss:    149.787689
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.16986888647079468 of Vagabond Blueprint (12000, 0.01 m3)
Policy loss:   -282.964081
Value loss:    219.012405
Entropy loss:  -13.7814989
Total loss:    109.599274
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.07569257915019989 of Women's 'Minima' Heels (graphite/white) (4120, 0.1 m3)
Policy loss:   -255.808212
Value loss:    179.001144
Entropy loss:  -13.7750111
Total loss:    80.0791397
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 3777


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 2 of 244
Transferred 1 of 11533
Action 1: Buy 0.24470368027687073 of Jaguar In Rust We Trust SKIN (52423, 0.01 m3)
Policy loss:   29.1355305
Value loss:    189.38504
Entropy loss:  -13.8022451
Total loss:    200.544617
Gain:          44.55237000063062


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.05557302385568619 of Women's 'Sterling' Dress Blouse (platinum) (4065, 0.1 m3)
Policy loss:   -249.406235
Value loss:    171.111343
Entropy loss:  -13.6729622
Total loss:    74.6633072
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.009112440049648285 of Phased Plasma S Blueprint (885, 0.01 m3)
Policy loss:   -230.782059
Value loss:    142.54332
Entropy loss:  -13.9285622
Total loss:    53.2885475
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.21498411893844604 of Women's 'Blades' Headwear (platinum) (34067, 0.1 m3)
Policy loss:   -212.183731
Value loss:    120.545219
Entropy loss:  -13.8694429
Total loss:    38.4759178
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.1150256097316742 of Punisher Blueprint (944, 0.01 m3)
Policy loss:   -192.149338
Value loss:    102.77327
Entropy loss:  -13.649374
Total loss:    28.4453487
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 2 of 11540


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 2 of 3685


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 11540


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.10199044644832611 of Large Pulse Laser Specialization (12215, 0.01 m3)
Policy loss:   -180.544968
Value loss:    88.1060104
Entropy loss:  -13.8163357
Total loss:    18.2600174
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.06281070411205292 of Men's Cap (Blood Raiders) (42726, 0.1 m3)
Policy loss:   -165.905502
Value loss:    76.2963562
Entropy loss:  -13.6707668
Total loss:    12.1070194
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.09677086025476456 of Quad 800mm Repeating Cannon I Blueprint (41092, 0.01 m3)
Policy loss:   -156.223373
Value loss:    66.4674377
Entropy loss:  -13.7749729
Total loss:    6.01785231
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.18822424113750458 of Women's 'Sterling' Dress Blouse (gold) (4157, 0.1 m3)
Policy loss:   -135.547424
Value loss:    51.6438599
Entropy loss:  -14.3063536
Total loss:    -0.821731508
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 3824
Action 1: Buy 0.1959531158208847 of Cerberus Blueprint (11994, 0.01 m3)
Policy loss:   -140.853867
Value loss:    52.9137878
Entropy loss:  -14.0108547
Total loss:    -1.59976792
Gain:          0.0989999994635582


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 28999


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.10953669250011444 of Wing Command (11574, 0.01 m3)
Policy loss:   -128.156265
Value loss:    45.6532249
Entropy loss:  -13.8981113
Total loss:    -3.95410681
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.10263074189424515 of Odin Synthetic Eye (left/dark) (4043, 0.1 m3)
Policy loss:   -125.559006
Value loss:    42.8314896
Entropy loss:  -13.806941
Total loss:    -5.77181101
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 29005
Action 1: Buy 0.05621931329369545 of Small Focused Beam Laser I Blueprint (838, 0.01 m3)
Policy loss:   -33.2764854
Value loss:    11.4184036
Entropy loss:  -14.1432199
Total loss:    -1.53590822
Gain:          10.983899999409914


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.003664843738079071 of Structure Advertisement Nexus Blueprint (36955, 0.01 m3)
Policy loss:   -111.609039
Value loss:    37.2653198
Entropy loss:  -13.938674
Total loss:    -5.94963264
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.17556068301200867 of Women's 'Excursion' Pants (black/gray) (3997, 0.1 m3)
Policy loss:   -120.431412
Value loss:    38.4578362
Entropy loss:  -13.9335775
Total loss:    -8.16541386
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 2 of 11540


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 1.1736775636672974 of Pure Synth Crash Booster (28687, 1.0 m3)
Policy loss:   -113.730301
Value loss:    34.7278633
Entropy loss:  -13.8927794
Total loss:    -9.30627251
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.12369078397750854 of Bestower Purity of the Throne SKIN (42595, 0.01 m3)
Policy loss:   -109.108612
Value loss:    31.6672306
Entropy loss:  -13.9824772
Total loss:    -10.5820436
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 2 of 12563
Action 1: Buy 0.31083858013153076 of Quafe Ultra (12865, 0.1 m3)
Policy loss:   921.345032
Value loss:    2229.99219
Entropy loss:  -13.9753981
Total loss:    2585.83618
Gain:          75.19347999989986


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 9840
Action 1: Buy 0.0117940753698349 of Badger Blueprint (983, 0.01 m3)
Policy loss:   -146.291336
Value loss:    66.7985382
Entropy loss:  -13.7623854
Total loss:    10.1860685
Gain:          14.449680000543594


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.13351643085479736 of Covert Cynosural Field Generator I Blueprint (28647, 0.01 m3)
Policy loss:   -211.563721
Value loss:    118.712013
Entropy loss:  -13.9801912
Total loss:    36.8814659
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 11691


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 7 of 21


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 11855


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 3 of 21


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 2 of 11554


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 11540


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Transferred 1 of 3685


[I 2020-03-10 17:19:04,426] Finished trial#10 resulted in value: -12.889166999980807. Current best value is -12.889166999980807 with parameters: {'policy_weight': 0.38632813953684, 'entropy_weight': 0.00697645668277605, 'sgd_rate': 2.3413004095498875e-06, 'bandwidth': 94, 'd_notes': 8, 'n_layers_start': 4, 'd_heads_start_0': 9, 'n_heads_start_0': 15, 'd_heads_start_1': 13, 'n_heads_start_1': 11, 'd_heads_start_2': 3, 'n_heads_start_2': 5, 'd_heads_start_3': 3, 'n_heads_start_3': 1, 'n_layers_step': 4, 'd_heads_step_0': 10, 'n_heads_step_0': 6, 'd_heads_step_1': 10, 'n_heads_step_1': 2, 'd_heads_step_2': 15, 'n_heads_step_2': 4, 'd_heads_step_3': 6, 'n_heads_step_3': 1}.


Policy weight:  7.06e-04
Entropy weight: 1.77e+00
Learning rate:  3.87e-03
n_layers_start: 3
n_layers_step: 8


HBox(children=(FloatProgress(value=0.0, description='Encoding systems', max=972.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Encoding types', max=4606.0, style=ProgressStyle(descript…


Bandwidth: 107


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

KeyboardInterrupt: 