In [1]:
import evebox as box
import pandas as pd
import tensorflow as tf
import cProfile
from tqdm.notebook import tqdm, trange

In [2]:
uni = box.Universe.from_esi(cache = 'universe.txt', tqdm = tqdm).range('Jita', min_sec = 0.5).with_market_types()

In [3]:
#orders = box.load_orders(uni, tqdm = tqdm)
orders = pd.read_csv('market/current.csv')

In [4]:
#orders.to_csv('market/current.csv')

In [4]:
orders

Unnamed: 0,order_id,duration,is_buy_order,issued,location_id,min_volume,price,range,system_id,type_id,volume_remain,volume_total
0,911190994,365,False,2020-03-09T20:18:58+00:00,60000004,1,14.87,region,30002780,41,23572,23572
1,911190995,365,False,2020-03-09T20:18:58+00:00,60000007,1,14.03,region,30002779,41,23572,23572
2,911190996,365,False,2020-03-09T20:18:58+00:00,60000010,1,14.87,region,30002776,41,23572,23572
3,911190997,365,False,2020-03-09T20:18:58+00:00,60000013,1,14.03,region,30002779,41,23572,23572
4,911190998,365,False,2020-03-09T20:18:58+00:00,60000016,1,14.80,region,30002778,41,23572,23572
...,...,...,...,...,...,...,...,...,...,...,...,...
891209,5629686039,1,False,2020-03-09T22:21:26+00:00,60012103,1,1086.00,region,30005316,37544,1,1
891210,5629686166,30,False,2020-03-09T22:21:37+00:00,60012010,1,999999.99,region,30004970,656,2,2
891211,5629686170,90,False,2020-03-09T22:21:38+00:00,60014077,1,12500.00,region,30003389,20763,1000,1000
891212,5629686218,3,False,2020-03-09T22:21:42+00:00,60015036,1,31000.00,region,30045305,12195,1,1


In [4]:
gym = box.tf.TradingGym(uni, orders)

In [17]:
jita = [s["system_id"] for s in uni.systems.values() if s["name"] == "Jita"][0]

state = box.MutableState()
state.time_left = 100.0
state.system    = jita

state = box.State(state)

In [22]:
class Model(tf.keras.Model):
    def __init__(self, universe):
        super().__init__()
        
        self.bandwidth = 128
        self.d_notes  = 4
        self.memory = 64
        
        self.embedding = box.tf.Embedding(universe, d_notes = self.d_notes, tqdm = tqdm)
        
        self.input_transforms = {
            k : tf.keras.layers.Dense(self.bandwidth)
            for k in ['state', 'orders', 'cargo', 'systems']
        }
        
        def otf(n):
            return tf.keras.layers.Dense(n)
        
        self.output_transforms = {
            'actions' : otf(3),
            'move_targets' : otf(1),
            'buy_params' : otf(3),
            'sell_params' : otf(3),
            'value' : otf(1)
        }
        
        self.stack_start = box.tf.StackedTogether(
            tf.keras.Sequential([
                box.tf.MultiHeadedAttention(self.bandwidth, 16, 8) for i in range(2)
            ])
        )
        
        self.stack_step = box.tf.StackedTogether(
            tf.keras.Sequential([
                box.tf.MultiHeadedAttention(self.bandwidth, 16, 8) for i in range(2)
            ])
        )
        
        self.rnn_cell = tf.keras.layers.GRUCell(self.bandwidth)
            
    
    def _preprocess_input(self, input):
        # Pre-process input (adds "note" variables to types & systems, joins orders with their types & systems)
        input = self.embedding(input)
        
        # Expand state to have the same shape as the other stuff
        input['state'] = tf.expand_dims(input['state'], axis = -2)
        
        # Expand all items into a [...,bandwidth] shape
        input = {
            k : self.input_transforms[k](v)
            for k, v in input.items()
        }
        
        return input
        
    def get_initial_state(self, input):
        input = self._preprocess_input(input)
        
        input = self.stack_start(input)
        
        return [tf.reshape(input['state'], [-1, self.bandwidth])]
    
    def call(self, input):
        input, rnn_state = input
        
        batch_shape = tf.shape(input['state'])[:-1]
        
        input = self._preprocess_input(input)
        
        # Add RNN state to the mix
        input['rnn_state'] = tf.reshape(rnn_state, tf.concat([batch_shape, [1, self.bandwidth]], axis = 0))
        
        # We don't want orders in here, too costly (but probably not present anyway)
        if 'orders' in input:
            del input['orders']
        
        # Apply attention stack
        input = self.stack_step(input)
        
        # Extract rnn state & apply to cell
        rnn_in = tf.reshape(input['rnn_state'], [-1, self.bandwidth])
        _, rnn_state = self.rnn_cell(rnn_in, rnn_state)
        del input['rnn_state']
        
        output = {
            'actions' : tf.squeeze(
                self.output_transforms['actions'](input['state']),
                axis = -2
            ),
            
            'move_targets' : tf.squeeze(
                self.output_transforms['move_targets'](input['systems']),
                axis = -1
            ),
            
            'buy_params'  : self.output_transforms['buy_params'] (input['cargo']),
            'sell_params' : self.output_transforms['sell_params'](input['cargo']),
            
            'value' : tf.squeeze(
                self.output_transforms['value'](input['state']),
                axis = [-2, -1]
            )
        }
        
        return output, rnn_state
        
        
        

model = Model(uni)

HBox(children=(FloatProgress(value=0.0, description='Encoding systems', max=972.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Encoding types', max=14472.0, style=ProgressStyle(descrip…




In [None]:
unroller = gym.unroll_model(model, tqdm);

opt = tf.keras.optimizers.SGD(1e-5)

unroller(state, 1)

def get_loss():
    result = unroller(state, 100)
    
    policy_loss, value_loss, entropy_loss = gym.losses(result)
    
    total_loss = policy_loss + value_loss + 1e-3 * entropy_loss
    tf.print(policy_loss)
    tf.print(value_loss)
    tf.print(entropy_loss)
    tf.print(total_loss)
    return total_loss

def mean_loss():
    losses = tf.stack(
        [get_loss() for i in tqdm(range(1), desc = 'Batch', leave = False)],
        axis = -1
    )
    
    mean_loss = tf.math.reduce_mean(losses)
    
    return mean_loss

for i in trange(100, desc = 'Iterations'):
    print(opt.minimize(mean_loss, model.trainable_variables))

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Iterations', style=ProgressStyle(description_width='initi…

HBox(children=(FloatProgress(value=0.0, description='Batch', max=2.0, style=ProgressStyle(description_width='i…



HBox(children=(FloatProgress(value=0.0), HTML(value='')))


-649.748474
323.713348
-25.9855518
-326.061096


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


-649.469849
323.713348
-25.9872437
-325.782501


In [13]:
states, actions, logps, values = zip(*result)

actions

(Buy 61411932.0 of 42241,
 Buy 10420804.0 of 11567,
 Buy 41306360.0 of 11567,
 Buy 56765748.0 of 11567,
 Buy 25071562.0 of 42241,
 Buy 71965600.0 of 42241,
 Buy 69669368.0 of 42241,
 Buy 70687472.0 of 11567,
 Buy 44765412.0 of 42241,
 Buy 31007356.0 of 42241)

In [16]:
uni.types[11567]

{'capacity': 11250.0,
 'description': 'Casting his sight on his realm, the Lord witnessed\r\nThe cascade of evil, the torrents of war.\r\nBurning with wrath, He stepped \r\ndown from the Heavens\r\nTo judge the unworthy,\r\nTo redeem the pure.\r\n\r\n<i>-The Scriptures, Revelation Verses 2:12</i>\r\n\r\n',
 'dogma_attributes': [{'attribute_id': 3, 'value': 0.0},
  {'attribute_id': 4, 'value': 2400000000.0},
  {'attribute_id': 9, 'value': 390000.0},
  {'attribute_id': 11, 'value': 1250000.0},
  {'attribute_id': 12, 'value': 8.0},
  {'attribute_id': 13, 'value': 5.0},
  {'attribute_id': 14, 'value': 8.0},
  {'attribute_id': 15, 'value': 0.0},
  {'attribute_id': 18, 'value': 0.0},
  {'attribute_id': 19, 'value': 1.0},
  {'attribute_id': 21, 'value': 0.0},
  {'attribute_id': 37, 'value': 60.0},
  {'attribute_id': 38, 'value': 11250.0},
  {'attribute_id': 552, 'value': 22760.0},
  {'attribute_id': 48, 'value': 815.0},
  {'attribute_id': 49, 'value': 0.0},
  {'attribute_id': 564, 'value': 70

In [8]:
losses = gym.losses(result)

In [9]:
losses

(<tf.Tensor: shape=(), dtype=float32, numpy=2.7522936>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.0017755391>,
 <tf.Tensor: shape=(), dtype=float32, numpy=217.35147>)

In [None]:
with tf.GradientTape() as tape:
    result = unroller(
        state, 150
    )
    #cProfile.run('unroller(state, 100)', 'profile_stats')

In [None]:
import pstats
p = pstats.Stats('profile_stats')
p.sort_stats('cumtime')
p.print_stats()

In [None]:
result[0:1]

In [None]:
tape.gradient(result[0][2], sources=model.trainable_variables)

In [None]:
model.trainable_variables