In [1]:
import evebox as box
import pandas as pd
import tensorflow as tf
import cProfile
from tqdm.notebook import tqdm, trange

In [2]:
gpus = tf.config.experimental.list_physical_devices('GPU')

# Currently, memory growth needs to be the same across GPUs
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

print('Found {} gpus'.format(len(gpus)))

Found 1 gpus


In [3]:
uni = box.Universe.from_esi(cache = 'universe.txt', tqdm = tqdm).range('Jita', min_sec = 0.5).with_market_types()

In [4]:
#orders = box.load_orders(uni, tqdm = tqdm)
orders = pd.read_csv('market/current.csv')

In [4]:
#orders.to_csv('market/current.csv')

In [5]:
# Filter all buy orders which are 50x above the median
median   = orders.groupby('type_id')[['price']].median().rename(columns = {'price' : 'median_price'})

orders = orders[
    (50 * orders.merge(median, on = 'type_id')['median_price'] >= orders['price']) |
    ~orders['is_buy_order']
]

In [6]:
# Filter all orders that can not be handled profitably
max_buy  = orders[ orders['is_buy_order']].groupby('type_id')[['price']].max().rename(columns = {'price' : 'max_buy'})
min_sell = orders[~orders['is_buy_order']].groupby('type_id')[['price']].min().rename(columns = {'price' : 'min_sell'})

orders = orders.merge(max_buy, on = 'type_id').merge(min_sell, on = 'type_id')

orders = orders[
    (
        # Sell orders must be below max_buy
        orders['is_buy_order'] |
        (orders['price'] <= orders['max_buy'])
    ) & 
    (
        # Buy orders must be above min_sell
        ~orders['is_buy_order'] |
        (orders['price'] >= orders['min_sell'])
    )
]

In [8]:
# Reduce universe types to all types appearing in the order list

# Pre-filter types
def check_type(t):
    if t["volume"] > 155000:
        return False
    
    return t['type_id'] in orders['type_id']

# Reduce universe
types = [t for t, v in tqdm(uni.types.items()) if check_type(v)]
print('Reducing from {} to {} types'.format(len(uni.types), len(types)))
uni = uni.with_types(types)

# Reduce orders
orders = orders[(orders['type_id'].isin(uni.types)) & (orders['system_id'].isin(uni.systems))]

HBox(children=(FloatProgress(value=0.0, max=4317.0), HTML(value='')))


Reducing from 4317 to 4317 types


In [9]:
class Model(tf.keras.Model):
    def __init__(self, universe, bandwidth = 128, d_notes = 4, layers_start = [(16, 8)] * 2, layers_step = [(16, 8)] * 2):
        super().__init__()
        
        self.bandwidth = bandwidth
        self.d_notes  = d_notes
        
        kw = {
            'use_bias' : True
        }
        
        self.embedding = box.tf.Embedding(universe, d_notes = self.d_notes, tqdm = tqdm)
        
        self.input_transforms = {
            k : tf.keras.layers.Dense(self.bandwidth)
            for k in ['state', 'orders', 'cargo', 'systems']
        }
        
        def otf(n):
            return tf.keras.layers.Dense(n, **kw)
        
        self.output_transforms = {
            'actions' : otf(3),
            'move_targets' : otf(1),
            'buy_params' : otf(3),
            'sell_params' : otf(3),
            'value' : otf(1)
        }
        
        def stack(layer_info):
            return box.tf.StackedTogether(
                tf.keras.Sequential([
                    box.tf.MultiHeadedAttention(self.bandwidth, d_head, n_heads) for d_head, n_heads in layer_info
                ])
            )
        
        self.stack_start = stack(layers_start)
        self.stack_step = stack(layers_step)
        
        self.rnn_cell = tf.keras.layers.GRUCell(self.bandwidth)
            
    
    def _preprocess_input(self, input):
        # Pre-process input (adds "note" variables to types & systems, joins orders with their types & systems)
        input = self.embedding(input)
        
        # Expand state to have the same shape as the other stuff
        input['state'] = tf.expand_dims(input['state'], axis = -2)
        
        # Expand all items into a [...,bandwidth] shape
        input = {
            k : self.input_transforms[k](v)
            for k, v in input.items()
        }
        
        return input
        
    def get_initial_state(self, input):
        input = self._preprocess_input(input)
        
        input = self.stack_start(input)
        
        return [tf.reshape(input['state'], [-1, self.bandwidth])]
    
    def call(self, input):
        input, rnn_state = input
        
        batch_shape = tf.shape(input['state'])[:-1]
        
        input = self._preprocess_input(input)
        
        # Add RNN state to the mix
        input['rnn_state'] = tf.reshape(rnn_state, tf.concat([batch_shape, [1, self.bandwidth]], axis = 0))
        
        # We don't want orders in here, too costly (but probably not present anyway)
        if 'orders' in input:
            del input['orders']
        
        # Apply attention stack
        input = self.stack_step(input)
        
        # Extract rnn state & apply to cell
        rnn_in = tf.reshape(input['rnn_state'], [-1, self.bandwidth])
        _, rnn_state = self.rnn_cell(rnn_in, rnn_state)
        del input['rnn_state']
        
        output = {
            'actions' : tf.squeeze(
                self.output_transforms['actions'](input['state']),
                axis = -2
            ),
            
            'move_targets' : tf.squeeze(
                self.output_transforms['move_targets'](input['systems']),
                axis = -1
            ),
            
            'buy_params'  : self.output_transforms['buy_params'] (input['cargo']),
            'sell_params' : self.output_transforms['sell_params'](input['cargo']),
            
            'value' : tf.squeeze(
                self.output_transforms['value'](input['state']),
                axis = [-2, -1]
            )
        }
        
        return output, rnn_state

In [14]:
import optuna as tuna
from time import time

gym = box.tf.TradingGym(uni, orders)

jita = [s["system_id"] for s in uni.systems.values() if s["name"] == "Jita"][0]

state = box.MutableState()
state.universe  = uni
state.time_left = 100.0
state.system    = jita
state.wallet    = 1e7
state.collateral_limit = 1e6
state.volume_limit = 1e4

state = box.State(state)

def opt_fun(trial):   
    policy_weight = trial.suggest_loguniform('policy_weight', 1e-6, 1e2)
    entropy_weight = trial.suggest_loguniform('entropy_weight', 1e-6, 1e2)
    learning_rate = trial.suggest_loguniform('sgd_rate', 1e-8, 1)
    
    print('Policy weight:  {:.2e}'.format(policy_weight))
    print('Entropy weight: {:.2e}'.format(entropy_weight))
    print('Learning rate:  {:.2e}'.format(learning_rate))
    
    def make_layer_info(name, nmin, nmax):
        n = trial.suggest_int('n_layers_{}'.format(name), nmin, nmax)
        print('n_layers_{}: {}'.format(name, n))
        return [
            (
                trial.suggest_int('d_heads_{}_{}'.format(name, i), 1, 16),
                trial.suggest_int('n_heads_{}_{}'.format(name, i), 1, 16)
            )
            for i in range(n)
        ]
    
    model = Model(
        uni,
        bandwidth    = trial.suggest_int('bandwidth', 8, 128),
        d_notes      = trial.suggest_int('d_notes', 1, 8),
        layers_start = make_layer_info('start', 1, 4),
        layers_step  = make_layer_info('step' , 1, 8)
    )
    
    print('Bandwidth: {}'.format(model.bandwidth))
    
    unroller = gym.unroll_model(model, tqdm)
    
    # We have to unroll once to make sure the weights exist
    unroller(state, 2)
    
    loss_sideline = tf.Variable(0, dtype = tf.float32)
    
    def loss():
        result = unroller(state, 100)

        policy_loss, value_loss, entropy_loss = gym.losses(result)

        total_loss = value_loss + policy_weight * policy_loss + entropy_weight * entropy_loss
        
        print('Action 1: {}'.format(result[0][1]))
        
        #print('Final state:')
        #print(result[-1][0])
        
        loss_sideline.assign(total_loss)

        tf.print('Policy loss:  ', policy_loss)
        tf.print('Value loss:   ', value_loss)
        tf.print('Entropy loss: ', entropy_loss) 
        tf.print('Total loss:   ', total_loss)
        tf.print('Gain:         ', result[-1][0].value - result[0][0].value)
        
        return total_loss
    
    def performance(n = 20):        
        def single_run():
            result = unroller(state, 100)
            return result[-1][0].value - state.value
        
        return sum([single_run() for i in range(n)]) / n
        #return sum([loss() for i in range(n)]) / n
    
    opt = tf.keras.optimizers.SGD(learning_rate)
    
    print('Trial info')
    print(trial)
    print('Beginning')
    
    last_time = time()
    counter = 0
    
    for i in trange(0, 100, desc = 'Iterations', leave = False):
        opt.minimize(loss, model.trainable_variables)
        
        tf.debugging.assert_all_finite(loss_sideline, 'Non-finite loss encountered')
        
        # Report every minute for pruning
        if time() - last_time > 60:
            trial.report(-performance(n = 5), counter)
            counter += 1
            
            last_time = time()
        
        if trial.should_prune():
            raise tuna.exceptions.TrialPruned()
    
    return -performance()

In [15]:
study = tuna.create_study(
    pruner = tuna.pruners.HyperbandPruner(),
    sampler = tuna.samplers.TPESampler()
)



In [16]:
study.optimize(opt_fun, catch = (Exception,))

Policy weight:  2.23e-04
Entropy weight: 2.34e-02
Learning rate:  8.76e-03
n_layers_start: 3
n_layers_step: 6


HBox(children=(FloatProgress(value=0.0, description='Encoding systems', max=972.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Encoding types', max=4317.0, style=ProgressStyle(descript…


Bandwidth: 61


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

Trial info
<optuna.trial.Trial object at 0x0000000064715908>
Beginning


HBox(children=(FloatProgress(value=0.0, description='Iterations', style=ProgressStyle(description_width='initi…



HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000069
Policy loss:   -417.697205
Value loss:    993.005615
Entropy loss:  -14.4373398
Total loss:    992.574707
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 2531258.0 of Intermediate Medal
Policy loss:   1.30506726e+16
Value loss:    1.46369079e+29
Entropy loss:  -24.5469093
Total loss:    1.46369079e+29
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30000158
Policy loss:   nan
Value loss:    nan
Entropy loss:  -14.8583546
Total loss:    nan
Gain:          0.0


[W 2020-03-10 18:35:28,455] Setting status of trial#0 as TrialState.FAIL because of the following error: InvalidArgumentError()
Traceback (most recent call last):
  File "d:\programme\python 3.6.6\lib\site-packages\optuna\study.py", line 648, in _run_trial
    result = func(trial)
  File "<ipython-input-14-b0b10766bd93>", line 97, in opt_fun
    tf.debugging.assert_all_finite(loss_sideline, 'Non-finite loss encountered')
  File "d:\programme\python 3.6.6\lib\site-packages\tensorflow_core\python\ops\numerics.py", line 67, in verify_tensor_all_finite_v2
    verify_input = array_ops.check_numerics(x, message=message)
  File "d:\programme\python 3.6.6\lib\site-packages\tensorflow_core\python\ops\gen_array_ops.py", line 902, in check_numerics
    _ops.raise_from_not_ok_status(e, name)
  File "d:\programme\python 3.6.6\lib\site-packages\tensorflow_core\python\framework\ops.py", line 6606, in raise_from_not_ok_status
    six.raise_from(core._status_to_exception(e.code, message), None)
  File 

Policy weight:  1.22e-01
Entropy weight: 3.50e+00
Learning rate:  2.49e-01
n_layers_start: 4
n_layers_step: 3


HBox(children=(FloatProgress(value=0.0, description='Encoding systems', max=972.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Encoding types', max=4317.0, style=ProgressStyle(descript…


Bandwidth: 120


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

Trial info
<optuna.trial.Trial object at 0x00000000502EF1D0>
Beginning


HBox(children=(FloatProgress(value=0.0, description='Iterations', style=ProgressStyle(description_width='initi…



HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 0.005382454954087734 of Alliance Tournament VIII: HYDRA RELOADED (33389, 0.01 m3)
Policy loss:   -155.367264
Value loss:    88.8519
Entropy loss:  -11.8713074
Total loss:    28.3082
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 17825792.0 of Retriever (17478, 150000.0 m3)
Policy loss:   8.67493636e+13
Value loss:    6.093287e+24
Entropy loss:  -25.2907
Total loss:    6.093287e+24
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 77422592.0 of PLEX (44992, 0.0002 m3)
Policy loss:   nan
Value loss:    nan
Entropy loss:  -25.3000832
Total loss:    nan
Gain:          0.0


[W 2020-03-10 18:35:50,119] Setting status of trial#1 as TrialState.FAIL because of the following error: InvalidArgumentError()
Traceback (most recent call last):
  File "d:\programme\python 3.6.6\lib\site-packages\optuna\study.py", line 648, in _run_trial
    result = func(trial)
  File "<ipython-input-14-b0b10766bd93>", line 97, in opt_fun
    tf.debugging.assert_all_finite(loss_sideline, 'Non-finite loss encountered')
  File "d:\programme\python 3.6.6\lib\site-packages\tensorflow_core\python\ops\numerics.py", line 67, in verify_tensor_all_finite_v2
    verify_input = array_ops.check_numerics(x, message=message)
  File "d:\programme\python 3.6.6\lib\site-packages\tensorflow_core\python\ops\gen_array_ops.py", line 902, in check_numerics
    _ops.raise_from_not_ok_status(e, name)
  File "d:\programme\python 3.6.6\lib\site-packages\tensorflow_core\python\framework\ops.py", line 6606, in raise_from_not_ok_status
    six.raise_from(core._status_to_exception(e.code, message), None)
  File 

Policy weight:  7.65e-02
Entropy weight: 3.08e-06
Learning rate:  1.93e-03
n_layers_start: 1
n_layers_step: 7


HBox(children=(FloatProgress(value=0.0, description='Encoding systems', max=972.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Encoding types', max=4317.0, style=ProgressStyle(descript…


Bandwidth: 111


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

Trial info
<optuna.trial.Trial object at 0x000000007E8462B0>
Beginning


HBox(children=(FloatProgress(value=0.0, description='Iterations', style=ProgressStyle(description_width='initi…



HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30001363
Policy loss:   1.90227294
Value loss:    0.0160747077
Entropy loss:  -12.2110252
Total loss:    0.161592916
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 0.21595865488052368 of Serpentis Brass Tag
Policy loss:   -50561.8242
Value loss:    8123202.5
Entropy loss:  -12.7609386
Total loss:    8119333.5
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[W 2020-03-10 18:36:11,872] Setting status of trial#2 as TrialState.FAIL because of the following error: ValueError('cannot convert float NaN to integer',)
Traceback (most recent call last):
  File "d:\programme\python 3.6.6\lib\site-packages\optuna\study.py", line 648, in _run_trial
    result = func(trial)
  File "<ipython-input-14-b0b10766bd93>", line 95, in opt_fun
    opt.minimize(loss, model.trainable_variables)
  File "d:\programme\python 3.6.6\lib\site-packages\tensorflow_core\python\keras\optimizer_v2\optimizer_v2.py", line 316, in minimize
    loss, var_list=var_list, grad_loss=grad_loss)
  File "d:\programme\python 3.6.6\lib\site-packages\tensorflow_core\python\keras\optimizer_v2\optimizer_v2.py", line 350, in _compute_gradients
    loss_value = loss()
  File "<ipython-input-14-b0b10766bd93>", line 56, in loss
    result = unroller(state, 100)
  File "D:\Daten\Repos\tf_playground\eveplan\evebox\tf\trading_gym.py", line 242, in run
    state = action(state)
  File "D:\Daten\R

Policy weight:  3.57e-01
Entropy weight: 5.37e+01
Learning rate:  5.24e-03
n_layers_start: 2
n_layers_step: 8


HBox(children=(FloatProgress(value=0.0, description='Encoding systems', max=972.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Encoding types', max=4317.0, style=ProgressStyle(descript…


Bandwidth: 60


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

Trial info
<optuna.trial.Trial object at 0x000000007EC379E8>
Beginning


HBox(children=(FloatProgress(value=0.0, description='Iterations', style=ProgressStyle(description_width='initi…



HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 0.0011503067798912525 of Alliance Tournament VIII: HYDRA RELOADED
Policy loss:   1643.83154
Value loss:    11484.9639
Entropy loss:  -11.0387087
Total loss:    11479.668
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 17965056.0 of Federation Navy Fleet Commander Insignia
Policy loss:   -3.14280782e+23
Value loss:    inf
Entropy loss:  -25.3002415
Total loss:    inf
Gain:          0.0


[W 2020-03-10 18:36:37,165] Setting status of trial#3 as TrialState.FAIL because of the following error: InvalidArgumentError()
Traceback (most recent call last):
  File "d:\programme\python 3.6.6\lib\site-packages\optuna\study.py", line 648, in _run_trial
    result = func(trial)
  File "<ipython-input-14-b0b10766bd93>", line 97, in opt_fun
    tf.debugging.assert_all_finite(loss_sideline, 'Non-finite loss encountered')
  File "d:\programme\python 3.6.6\lib\site-packages\tensorflow_core\python\ops\numerics.py", line 67, in verify_tensor_all_finite_v2
    verify_input = array_ops.check_numerics(x, message=message)
  File "d:\programme\python 3.6.6\lib\site-packages\tensorflow_core\python\ops\gen_array_ops.py", line 902, in check_numerics
    _ops.raise_from_not_ok_status(e, name)
  File "d:\programme\python 3.6.6\lib\site-packages\tensorflow_core\python\framework\ops.py", line 6606, in raise_from_not_ok_status
    six.raise_from(core._status_to_exception(e.code, message), None)
  File 

Policy weight:  9.81e-02
Entropy weight: 3.49e-02
Learning rate:  5.80e-04
n_layers_start: 2
n_layers_step: 6


HBox(children=(FloatProgress(value=0.0, description='Encoding systems', max=972.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Encoding types', max=4317.0, style=ProgressStyle(descript…


Bandwidth: 65


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

Trial info
<optuna.trial.Trial object at 0x000000009426DDD8>
Beginning


HBox(children=(FloatProgress(value=0.0, description='Iterations', style=ProgressStyle(description_width='initi…



HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 156669.5625 of Retriever
Policy loss:   1533.88525
Value loss:    2946.58179
Entropy loss:  -20.3227577
Total loss:    3096.38159
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Move to system 30002780
Policy loss:   -9.15259228e+10
Value loss:    6.56576887e+19
Entropy loss:  -9.13986492
Total loss:    6.56576887e+19
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[W 2020-03-10 18:36:58,864] Setting status of trial#4 as TrialState.FAIL because of the following error: ValueError('cannot convert float NaN to integer',)
Traceback (most recent call last):
  File "d:\programme\python 3.6.6\lib\site-packages\optuna\study.py", line 648, in _run_trial
    result = func(trial)
  File "<ipython-input-14-b0b10766bd93>", line 95, in opt_fun
    opt.minimize(loss, model.trainable_variables)
  File "d:\programme\python 3.6.6\lib\site-packages\tensorflow_core\python\keras\optimizer_v2\optimizer_v2.py", line 316, in minimize
    loss, var_list=var_list, grad_loss=grad_loss)
  File "d:\programme\python 3.6.6\lib\site-packages\tensorflow_core\python\keras\optimizer_v2\optimizer_v2.py", line 350, in _compute_gradients
    loss_value = loss()
  File "<ipython-input-14-b0b10766bd93>", line 56, in loss
    result = unroller(state, 100)
  File "D:\Daten\Repos\tf_playground\eveplan\evebox\tf\trading_gym.py", line 242, in run
    state = action(state)
  File "D:\Daten\R

Policy weight:  4.74e+00
Entropy weight: 7.57e+00
Learning rate:  8.09e-02
n_layers_start: 3
n_layers_step: 4


HBox(children=(FloatProgress(value=0.0, description='Encoding systems', max=972.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Encoding types', max=4317.0, style=ProgressStyle(descript…


Bandwidth: 83


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

Trial info
<optuna.trial.Trial object at 0x0000000094266F28>
Beginning


HBox(children=(FloatProgress(value=0.0, description='Iterations', style=ProgressStyle(description_width='initi…



HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Buy 45694.22265625 of Retriever (17478, 150000.0 m3)
Policy loss:   -374.207611
Value loss:    201.393265
Entropy loss:  -18.9675751
Total loss:    -1717.65369
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[W 2020-03-10 18:37:14,572] Setting status of trial#5 as TrialState.FAIL because of the following error: ValueError('cannot convert float NaN to integer',)
Traceback (most recent call last):
  File "d:\programme\python 3.6.6\lib\site-packages\optuna\study.py", line 648, in _run_trial
    result = func(trial)
  File "<ipython-input-14-b0b10766bd93>", line 95, in opt_fun
    opt.minimize(loss, model.trainable_variables)
  File "d:\programme\python 3.6.6\lib\site-packages\tensorflow_core\python\keras\optimizer_v2\optimizer_v2.py", line 316, in minimize
    loss, var_list=var_list, grad_loss=grad_loss)
  File "d:\programme\python 3.6.6\lib\site-packages\tensorflow_core\python\keras\optimizer_v2\optimizer_v2.py", line 350, in _compute_gradients
    loss_value = loss()
  File "<ipython-input-14-b0b10766bd93>", line 56, in loss
    result = unroller(state, 100)
  File "D:\Daten\Repos\tf_playground\eveplan\evebox\tf\trading_gym.py", line 242, in run
    state = action(state)
  File "D:\Daten\R

Policy weight:  7.34e-02
Entropy weight: 1.61e-03
Learning rate:  3.69e-06
n_layers_start: 3
n_layers_step: 3


HBox(children=(FloatProgress(value=0.0, description='Encoding systems', max=972.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Encoding types', max=4317.0, style=ProgressStyle(descript…


Bandwidth: 25


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

Trial info
<optuna.trial.Trial object at 0x00000000887C5CF8>
Beginning


HBox(children=(FloatProgress(value=0.0, description='Iterations', style=ProgressStyle(description_width='initi…



HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 0.009732084348797798 of Alliance Tournament VIII: HYDRA RELOADED
Policy loss:   61.5337029
Value loss:    12.471756
Entropy loss:  -12.4996777
Total loss:    16.9658508
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 0.013898919336497784 of Capital Magpulse Thruster Blueprint
Policy loss:   31.4387131
Value loss:    3.4027648
Entropy loss:  -12.2474728
Total loss:    5.68942451
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 0.011494382284581661 of Naglfar Tronhadar Ink SKIN
Policy loss:   15.8190298
Value loss:    0.862081349
Entropy loss:  -12.3194218
Total loss:    2.00272727
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 0.01844615861773491 of Quantum Microprocessor Blueprint
Policy loss:   6.1624527
Value loss:    0.135109097
Entropy loss:  -12.0799065
Total loss:    0.567711711
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 0.015904009342193604 of Small Beam Laser Specialization
Policy loss:   0.188847691
Value loss:    0.00012662269
Entropy loss:  -12.1217966
Total loss:    -0.00557638239
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 0.008920351043343544 of Minmatar Drone Specialization
Policy loss:   -3.7976687
Value loss:    0.0470992066
Entropy loss:  -12.5614901
Total loss:    -0.251773655
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 0.017400240525603294 of Scourge Light Missile
Policy loss:   -6.21234512
Value loss:    0.138440773
Entropy loss:  -12.0474186
Total loss:    -0.336749136
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 0.008974367752671242 of Scythe Tronhadar Ink SKIN
Policy loss:   -7.81207
Value loss:    0.212066233
Entropy loss:  -12.1480141
Total loss:    -0.380645633
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 1.0821446180343628 of Microwave M
Policy loss:   -8.70280457
Value loss:    0.270457
Entropy loss:  -12.0293159
Total loss:    -0.387409836
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 0.010272103361785412 of Capital Oscillator Capacitor Unit Blueprint
Policy loss:   -9.38305664
Value loss:    0.307571411
Entropy loss:  -12.1370316
Total loss:    -0.400374174
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 0.49450916051864624 of Eifyr and Co. 'Rogue' Warp Drive Speed WS-610
Policy loss:   -10.0597467
Value loss:    0.335130513
Entropy loss:  -12.4914207
Total loss:    -0.423030406
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 0.13452403247356415 of Novice Medal
Policy loss:   -10.2712727
Value loss:    0.36055097
Entropy loss:  -12.3144789
Total loss:    -0.412842542
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 0.06086477264761925 of Imperial Navy General Insignia I
Policy loss:   -10.1902924
Value loss:    0.372585654
Entropy loss:  -12.0460463
Total loss:    -0.394433856
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 7.514000415802002 of Corpus X-Type Armor Explosive Hardener
Policy loss:   -10.4056282
Value loss:    0.373461723
Entropy loss:  -12.2144213
Total loss:    -0.409627
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 0.02491459622979164 of Scourge Heavy Missile
Policy loss:   -10.4486
Value loss:    0.379544884
Entropy loss:  -12.2485781
Total loss:    -0.406751454
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 0.023378834128379822 of Drone Navigation
Policy loss:   -10.3721151
Value loss:    0.382279754
Entropy loss:  -12.0453491
Total loss:    -0.398077607
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 0.02706942707300186 of Rifter Nefantar SKIN
Policy loss:   -10.3665314
Value loss:    0.380933732
Entropy loss:  -12.0792627
Total loss:    -0.399068683
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 0.12226598709821701 of Cadmium
Policy loss:   -10.4939308
Value loss:    0.380425423
Entropy loss:  -12.2714329
Total loss:    -0.409233361
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 0.07343754917383194 of Caldari Navy Raid Leader Insignia
Policy loss:   -10.6077757
Value loss:    0.383745551
Entropy loss:  -12.3528175
Total loss:    -0.414396524
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 0.03927070274949074 of Large Remote Armor Repairer I Blueprint
Policy loss:   -10.2559261
Value loss:    0.387694389
Entropy loss:  -11.9103756
Total loss:    -0.383921266
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 2.1244585514068604 of High-grade Asklepian Epsilon
Policy loss:   -10.3696823
Value loss:    0.379255444
Entropy loss:  -12.1498623
Total loss:    -0.401092023
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 0.04139240086078644 of Mjolnir Auto-Targeting Light Missile I
Policy loss:   -10.6853685
Value loss:    0.380069
Entropy loss:  -12.5069189
Total loss:    -0.424014062
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 0.011531672440469265 of Cyclone Thukker Tribe SKIN (30 Days)
Policy loss:   -10.684515
Value loss:    0.388867855
Entropy loss:  -12.2976646
Total loss:    -0.414815
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 0.5211386680603027 of Compressed Vitric Hedbergite
Policy loss:   -10.6803331
Value loss:    0.391112119
Entropy loss:  -12.2770596
Total loss:    -0.4122307
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 0.0025909096002578735 of Stork Blue Tiger SKIN
Policy loss:   -10.5239468
Value loss:    0.391584367
Entropy loss:  -12.0647154
Total loss:    -0.399943
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 0.07704535871744156 of Men's 'Street' Shirt (gray urban camo)
Policy loss:   -10.8131533
Value loss:    0.387489855
Entropy loss:  -12.4929686
Total loss:    -0.425945312
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 0.19843539595603943 of Ammatar Navy Major Insignia I
Policy loss:   -10.6542139
Value loss:    0.394253939
Entropy loss:  -12.2022924
Total loss:    -0.4070521
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 0.03642246127128601 of Federation Navy Comet Sapphire Sungrazer SKIN
Policy loss:   -10.4416828
Value loss:    0.391707957
Entropy loss:  -12.0083008
Total loss:    -0.393693298
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 0.012056276202201843 of Phased Plasma M Blueprint
Policy loss:   -10.5715218
Value loss:    0.385314703
Entropy loss:  -12.2119541
Total loss:    -0.409940422
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Action 1: Sell 0.019747626036405563 of Large Artillery Specialization
Policy loss:   -10.2720804
Value loss:    0.387123913
Entropy loss:  -11.9053221
Total loss:    -0.385668725
Gain:          0.0


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

KeyboardInterrupt: 