In [1]:
import tensorflow as tf
import tensorflow_probability as tfp
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import multiprocessing as mp

import pickle
import json
import functools
import matplotlib
import itertools
import copy

import esipy as esi

from tqdm.notebook import tqdm, trange

import requests
import time

In [2]:
tf.debugging.set_log_device_placement(
    False
)

gpus = tf.config.experimental.list_physical_devices('GPU')

# Currently, memory growth needs to be the same across GPUs
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)
    
logical_gpus = tf.config.experimental.list_logical_devices('GPU')
print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")

1 Physical GPUs, 1 Logical GPUs


In [3]:
import time

class Timer:    
    def __enter__(self):
        self.start = time.clock()
        return self

    def __exit__(self, *args):
        self.end = time.clock()
        self.interval = self.end - self.start
        
        print(self.interval)

In [4]:
# Set up network access
esi_client = esi.EsiClient(
    transport_adapter = requests.adapters.HTTPAdapter(
        pool_connections=100,
        pool_maxsize=100,
        max_retries=10,
        pool_block=False
    )
)
esi_app    = esi.EsiApp()

app = esi_app.get_latest_swagger

def try_request(req):
    while True:
        response = esi_client.request(req)
        
        if response.status == 200:
            return response.data
        
        print(response.status)
        print(response.header)
        time.sleep(5)

Defining a 'User-Agent' header is a good practice, and allows CCP to contact you if required. To do this, simply add the following when creating the client: headers={'User-Agent':'something'}.


In [5]:
# Load universe data
def intd(f):
    return {int(k) : v for k,v in json.load(f).items()};

with open('universe/systems.txt') as f:
    systems = intd(f);

with open('universe/stargates.txt') as f:
    stargates = intd(f);
    
with open('universe/market_types.txt') as f:
    types = intd(f)
    
with open('universe/constellations.txt') as f:
    constellations = intd(f)

In [6]:
# Construct derived data about the universe
systems_graph = nx.Graph();

for system in systems:
    systems_graph.add_node(system);

for stargate in stargates.values():
    systems_graph.add_edge(stargate["system_id"], stargate["destination"]["system_id"])

root = 'Jita'
root_id = [v for v in systems if systems[v]["name"] == root][0]

# Limit to high-sec systems
subselect = [k for k in systems if systems[k]["security_status"] >= 0.5]
subgraph = systems_graph.subgraph(subselect)

# Limit to component connected to Jita
subselect = nx.node_connected_component(subgraph, root_id)
subgraph = systems_graph.subgraph(subselect)
print('High-sec has {} systems'.format(len(subgraph)))

system_ids = list(subgraph)
type_ids = list(types)

landmark_names = ['Jita', 'Amarr', 'Rens', 'Hek', 'Dodixie', 'Oursulaert', 'Tash-Murkon Prime', 'Agil'];
landmarks = [v["system_id"] for v in systems.values() if v["name"] in landmark_names]

# Compute pair-wise distances
print('Computing system distances')
distances = dict(tqdm(nx.shortest_path_length(subgraph), total = len(subgraph)))

@functools.lru_cache(maxsize=None)
def system_distance(s1, s2):
    #return nx.shortest_path_length(subgraph, s1, s2)
    return distances[s1][s2]

@functools.lru_cache(maxsize=None)
def get_region_id(system_id):
    return constellations[systems[system_id]["constellation_id"]]["region_id"]

@functools.lru_cache(maxsize=None)
def get_station(station_id):
    esi_op = app.op['get_universe_stations_station_id'](station_id = station_id);
    return esi_client.request(esi_op).data

@functools.lru_cache(maxsize=None)
def in_highsec(station_id):
    try:
        return get_station(station_id).system_id in subgraph
    except:
        return False

print('Loading regions')
region_ids = list(set(
    get_region_id(system_id) for system_id in subgraph
))

High-sec has 972 systems
Computing system distances


HBox(children=(FloatProgress(value=0.0, max=972.0), HTML(value='')))


Loading regions


In [7]:
# Load market orders from csv file
orders = pd.read_csv('market/current.csv', index_col = 'order_id')
orders['region_id'] = orders['system_id'].apply(get_region_id)
orders.sort_index(inplace = True)

  mask |= (ar1 == a)


In [8]:
orders[orders.type_id == 34]

Unnamed: 0_level_0,duration,is_buy_order,issued,location_id,min_volume,price,range,system_id,type_id,volume_remain,volume_total,region_id
order_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
4865513902,90,True,2020-01-15T02:00:54+00:00,60001864,1,0.07,40,30002659,34,14742738,20000000,10000032
4877460168,90,True,2020-01-31T03:33:37+00:00,60002293,1,0.07,20,30002737,34,18944474,20000000,10000033
4986961514,90,True,2020-02-17T22:41:35+00:00,60003427,1,3.11,1,30001395,34,282358093,500000000,10000016
5024726125,90,True,2020-02-01T10:54:24+00:00,60011848,1,2.02,2,30002731,34,89129125,100000000,10000032
5031138667,90,False,2020-01-13T22:03:30+00:00,60003469,1,6619.00,region,30000142,34,67460,100000,10000002
...,...,...,...,...,...,...,...,...,...,...,...,...
5623248037,90,False,2020-03-01T10:56:14+00:00,60012739,1,5.00,region,30005199,34,120000,120000,10000067
5623250342,90,False,2020-03-01T10:59:17+00:00,60015046,1,7.00,region,30023410,34,82158,82158,10000042
5623250516,90,True,2020-03-01T10:59:28+00:00,60015001,1,0.01,region,30011407,34,1000000,1000000,10000016
5623259316,90,False,2020-03-01T11:29:17+00:00,60003760,1,8.01,region,30000142,34,49318128,56786488,10000002


In [9]:
## Structure of the state space
class StateProp:
    def __init__(self, default, convert = (lambda x: x, lambda x: x)):
        self.convert_in = convert[0]
        self.convert_out = convert[1]
        self.default = default

def to_fixed(x):
    if x is None:
        return None
    return State(x)

def to_mutable(x):
    if x is None:
        return none
    return MutableState(x)

# Dict props should be stored as (key, value) tuples
dict_prop = (lambda x: tuple((i for i in x.items())), lambda x: {k:v for (k,v) in x})

state_props = {
    # Position of the ship
    'system' : StateProp(0),
    'station' : StateProp(None),
    
    # Configuration of the ship
    'volume_limit' : StateProp(0),
    'collateral_limit' : StateProp(0),
    
    # Resource balance
    'time_left' : StateProp(0),
    'wallet' : StateProp(0),
    'cargo' : StateProp({}, dict_prop),
    
    # Market cursors
    'market_group' : StateProp(None),
    'market_item' : StateProp(None),
    'last_modified_item' : StateProp(None),
    'updated_orders' : StateProp({}, dict_prop)
}
    
# Implementation of the state space
class State:
    def __init__(self, inp):
        for name, prop in state_props.items():
            inval = getattr(inp, name, prop.default)
            inval = prop.convert_in(inval)
            object.__setattr__(self, '_prop_' + name, inval)
            
        self._encode_cache = None
    
    def __getattr__(self, name):
        if name in state_props:
            return getattr(self, '_prop_' + name)
        else:
            raise AttributeError('Unknown attribute ' + name)
    
    def __setattr__(self, name, val):
        if name in state_props:
            raise AttributeError('Attribute {} is immutable'.format(name))
        
        object.__setattr__(self, name, val)
    
    def __hash__(self):
        h = 0
        
        for name in state_props:
            h = h ^ hash(getattr(self, name))
        
        return h
    
    def __eq__(self, other):
        for name in state_props:
            if getattr(self, name) != getattr(other, name):
                return False
        
        return True
    
    def __repr__(self):
        (vleft, cleft) = self.limits_left()
        vmax = self.volume_limit
        cmax = self.collateral_limit
        v = vmax - vleft
        c = cmax - cleft
        
        orders_by_id = orders.loc[[k for k,v in self.updated_orders]]
        orders_by_id['vol_mod'] = [v for k,v in self.updated_orders]
        
        cargo_df = pd.DataFrame.from_dict(dict(self.cargo), orient = 'index', columns = ['Amount', 'Value'])
        cargo_df.index.name = 'Item ID'
        cargo_df['Item name'] = cargo_df.index.map(lambda x : types[x]["name"])
        cargo_df = cargo_df[['Item name', 'Amount', 'Value']]
        
        def cargo_entry_info(entry):
            item_id, (amount, value) = entry
            
            item_name = types[item_id]["name"]
            
            return "{:>10} x {:<20} ({:>10.2f} ISK)".format(amount, item_name, value)
        
        def indent(x, n):
            return x.replace("\n", "\n" + " " * n)
        
        cargo_string = "\n".join([cargo_entry_info(e) for e in self.cargo])
        
        try:
            self.validate()
            validated = 'Valid'
        except Exception as e:
            validated = str(e)
            
        return """
State:
    Validation: {valid}
    
    System:     {system} ({sysid})
    Station ID: {station}
    
    Resources:
        Time:       {t:>10.2f}
        Wallet:     {wallet:>10.2f}
        Collateral: {cleft:>10.2f} / {cmax:>10.2f} ({c:>10.2f} used)
        Cargo:      {vleft:>10.2f} / {vmax:>10.2f} ({v:>10.2f} used)
        
    Cargo contents:
        {cargo}
        
    Modified orders:
        {orders}
        """.format(
            valid = validated,
            system = systems[self.system]["name"] if self.system is not None and self.system in systems else "Unknown",
            sysid = self.system,
            station = self.station,
            
            t = self.time_left,
            wallet = self.wallet,
            c = c, cmax = cmax, cleft = cleft,
            v = v, vmax = vmax, vleft = vleft,
            
            cargo = indent(cargo_string, 8),
            #cargo  = indent(cargo_df.to_string(), 8),
            orders = indent(orders_by_id.to_string(), 8)
        )
    
    def limits_left(self):
        total_vol = 0
        total_col = 0
        
        for item, (count, value) in self.cargo:
            total_vol += types[item]["volume"] * count
            total_col += value
        
        return (self.volume_limit - total_vol, self.collateral_limit - total_col)
    
    def validate(self):
        assert self.system is not None, 'No system specified'
        assert self.system in system_ids, 'Not in a known system'
        
        (v, c) = self.limits_left()
        assert v >= 0, 'Volume exceeded'
        assert c >= 0, 'Collateral exceeded'
        
        assert self.time_left >= 0, 'Out of time'
    
    @property
    def value(self):
        return self.wallet
        

# Helper class for incremental state modification
class MutableState:
    def __init__(self, other = None):
        if other is None:
            for name, prop in state_props.items():
                setattr(self, name, copy.copy(prop.default))
        else:
            for name, prop in state_props.items():
                setattr(self, name, prop.convert_out(getattr(other, name)))

In [195]:
orders[(orders.type_id == 34)]

Unnamed: 0_level_0,duration,is_buy_order,issued,location_id,min_volume,order_id.1,price,range,system_id,type_id,volume_remain,volume_total,region_id
order_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
5550894015,90,True,2020-02-04T18:26:04+00:00,60007981,1,5550894015,4.5,station,30005231,34,90130416,100000000,10000067
5566396132,90,True,2019-12-17T01:30:02+00:00,60004711,1,5566396132,0.04,40,30003458,34,10000000,10000000,10000042
5566562160,90,True,2020-02-17T20:43:27+00:00,60001117,1,5566562160,4.16,3,30002534,34,9298909,10000000,10000030
5582159532,90,True,2020-01-07T01:17:34+00:00,60002215,1,5582159532,4.0,station,30005037,34,95189,100000,10000065
5592994789,90,True,2020-02-04T15:43:23+00:00,60013867,1,5592994789,3.59,station,30000019,34,991126093,1000000000,10000001
5593074676,90,True,2020-01-21T19:00:08+00:00,60008917,1,5593074676,5.5,1,30002272,34,91231367,100000000,10000043
5594644241,90,True,2020-01-24T02:16:56+00:00,60000067,1,5594644241,0.05,region,30003081,34,9709759,10000000,10000038
5602009090,90,True,2020-02-24T16:48:12+00:00,60010639,1,5602009090,4.7,1,30003832,34,10881724,19350000,10000048
5605814977,90,True,2020-02-08T14:22:30+00:00,1021153079633,1,5605814977,4.12,region,30005308,34,49791174,50000000,10000068
5608045325,90,True,2020-02-11T04:33:57+00:00,60004909,1,5608045325,4.5,station,30002392,34,500000,500000,10000028


In [9]:
# Tests involving state manipulation

s = MutableState()
s.system = root_id
s.updated_orders = {
    911191013 : 200
}
s.cargo = {
    34 : (5, 200),
    36 : (10000, 800)
}

s.volume_limit = 10000
s.collateral_limit = 10000
s.time_left = -1

s = State(s)

print(s)


State:
    Validation: Out of time
    
    System:     Jita
    Station ID: None
    
    Resources:
        Time:            -1.00
        Wallet:           0.00
        Collateral:    9000.00 /   10000.00 (   1000.00 used)
        Cargo:         9899.95 /   10000.00 (    100.05 used)
        
    Cargo contents:
                 5 x Tritanium            (    200.00 ISK)
             10000 x Mexallon             (    800.00 ISK)
        
    Modified orders:
                   duration  is_buy_order                     issued  location_id  min_volume  order_id.1  price   range  system_id  type_id  volume_remain  volume_total  region_id  vol_mod
        order_id                                                                                                                                                                             
        911191013       365         False  2020-02-28T12:33:48+00:00     60000061           1   911191013   14.0  region   30003084       41          2357

In [10]:
# --- Action calculus ---

# Wrapper that allows an action to just modify a mutable state representation
# and performs validity checking of resulting state
class ActionImpl:
    def __init__(self, f, desc = None, repr = None):
        self.wrapped = f
        self.desc = desc
        self.repr = desc if repr is None else repr
    
    def __call__(self, state):
        s = MutableState(state)
        s.last_modified_item = None
        self.wrapped(s)
        s = State(s)
        
        s.validate()
        
        return s
    
    def __repr__(self):
        return self.repr
    
    def __str__(self):
        return self.desc
        
def action(**kwargs):
    def impl(f):
        return ActionImpl(f, **kwargs)
    return impl

In [56]:
# --- Marker broker implementation and market actions ---

def market_transaction(state, type_id, amount, order_type):
    assert order_type in ['buy', 'sell']
    
    amount = int(amount)
    
    # For the duration of this function, we need this element to exist
    if type_id not in state.cargo:
        state.cargo[type_id] = (0, 0)
    
    # Get all possible orders in appropriate order
    candidate_orders = orders[
        #(orders.location_id == state.station) &
        (orders.system_id == state.system) &
        (orders.type_id == type_id) &
        (orders.is_buy_order == (order_type == 'sell'))
    ].sort_values('price', ascending = order_type == 'buy').reset_index()
    
    # Check that we only sell as many items as we have
    if order_type == 'sell':
        amount = min(amount, state.cargo[type_id][0])    
    
    factor = 1 if order_type == 'buy' else -1
    
    total_transfer = 0
    
    for order in candidate_orders.itertuples():
        # Check if we even need anything more
        if amount == 0:
            break
        
        # Check if we can get something from this order
        remaining = order.volume_remain
        
        if order.order_id in state.updated_orders:
            remaining = state.updated_orders[order.order_id]
                
        transfer = min(amount, remaining)
        
        # Don't buy more than we can afford or store on our ship
        if order_type == 'buy':
            transfer = min(
                transfer,
                int(state.wallet / order.price)
            )
    
            vol_left, coll_left = State(state).limits_left()
            can_store = int(min(
                vol_left / types[type_id]["volume"],
                coll_left / order.price
            ))
            
            transfer = min(
                transfer,
                can_store
            )
        
        if transfer == 0:
            continue
        
        # Conduct transaction
        delta = transfer if order_type == 'buy' else -transfer
        state.wallet -= delta * order.price
        
        (cnum, cval) = state.cargo[type_id]
        state.cargo[type_id] = (
            cnum + delta,
            cval + delta * order.price
        )
        
        # Record that the order was partially filled
        state.updated_orders[order.order_id] = remaining - transfer
        
        # Continue buying if neccessary
        amount -= transfer
        
        total_transfer += transfer
    
    #if total_transfer > 0:
    #    print('Transferred {} of {}'.format(total_transfer, types[type_id]["name"]))
    
    # Remove empty cargo categories
    if state.cargo[type_id][0] == 0:
        del state.cargo[type_id]
    
def buy(type_id, amount):
    @action(desc = 'Buy {} of {}'.format(amount, types[type_id]["name"]))
    def do_buy(s):
        market_transaction(s, type_id, amount, 'buy')
        
    return do_buy

def sell(type_id, amount):
    @action(desc = 'Sell {} of {}'.format(amount, types[type_id]["name"]))
    def do_sell(s):
        market_transaction(s, type_id, amount, 'sell')
    
    return do_sell

In [12]:
# --- Movement actions ---

warp_cost = 1.0

@functools.lru_cache(maxsize=None)
def warp_to_station(station_id):
    station = get_station(station_id)
    
    distances = nx.shortest_path_length(subgraph, station.system_id)
    
    @action(desc = 'Move to {} in {}'.format(station.name, systems[station.system_id]["name"]))
    def do_warp_to_station(s):
        assert s.station != station_id, 'Cannot warp from to same station'
        
        if s.system == station.system_id:
            t = warp_cost
        else:
            #t = warp_time * nx.shortest_path_length(subgraph, s.system, station.system_id)
            t = warp_cost * distances[s.system]
        
        s.station = station_id
        s.system = station.system_id
        
        s.time_left -= t
    
    return do_warp_to

@functools.lru_cache(maxsize=None)
def warp_to_system(system_id):
    distances = nx.shortest_path_length(subgraph, system_id)
    
    @action(desc = 'Move to system {}'.format(systems[system_id]["name"]))
    def do_warp_to_system(s):
        t = warp_cost * system_distance(system_id, s.system)
        
        s.system = system_id
        s.station = None
        
        s.time_left -= t
    
    return do_warp_to_system

In [13]:
# --- Encoding of table data into TF tensors ---

@functools.lru_cache(maxsize=None)
def encode_system(system_id, state = None):
    assert system_id in system_ids
    
    index = system_ids.index(system_id)
    system = systems[system_id]
    
    return [index, system["security_status"]] + [system_distance(l, system_id) for l in landmarks]

@functools.lru_cache(maxsize=None)
def encode_type(type_id, state = None):
    assert type_id in type_ids
    
    index = type_ids.index(type_id)
    t = types[type_id]
    
    return [
        index,
        t["volume"]
    ]

# Encoding of a single order
def encode_order(order, state = None):
    tdata = encode_type(order.type_id, state)
    sdata = encode_system(order.system_id, state)
    
    if tdata is None or sdata is None:
        return None
    
    remaining = order.volume_remain
    
    if state is not None:
        updates = {k:v for k,v in state.updated_orders}
        
        if order.order_id in updates:
            remaining = updates[order.order_id]
    
    return [tdata[0], sdata[0]] + tdata[1:] + sdata[1:] + [
        order.location_id,
        remaining,
        1 if order.is_buy_order else 0,
        0 if order.is_buy_order else 1
    ]

# Encodes an order dataframe into a ragged tensor and an object that can be used to apply deltas to that tensor
def encode_orders(orders):
    # Identify all orders that have valid system and type ids
    candidates = orders[
        (orders.system_id.isin(system_ids)) &
        (orders.type_id.isin(type_ids))
    ].reset_index().set_index(['region_id', 'type_id']).sort_index()

    # Compute the no. of counts per group
    counts = candidates.order_id.groupby(['region_id', 'type_id']).count()
    
    # Fill empty counts with 0
    new_idx = pd.MultiIndex.from_product(
        [region_ids, types],
        names = counts.index.names
    )
    counts = counts.reindex(new_idx, fill_value = 0)
    
    # Encode the identified orders into a tensor
    print('Encoding orders & transferring to GPU')
    flat_data = tf.constant([
        encode_order(o)
        for o in tqdm(list(
            candidates.reset_index().itertuples()
        ))
    ])
    
    # Create a ragged tensor from the uniform tensor
    result = tf.RaggedTensor.from_uniform_row_length(
        tf.RaggedTensor.from_uniform_row_length(
            tf.RaggedTensor.from_row_lengths(
                flat_data,
                counts.to_numpy()
            ),
            len(type_ids)
        ),
        len(region_ids)
    )
    result = result[0]
    
    # We need the candidates, indexed by order id but sorted as above to know where the orders are located
    delta_base = candidates.reset_index().set_index('order_id')
    
    return result, delta_base

# Encoding of an order delta that can be applied to the base order tensor retrieved from the dataframe
def encode_delta(delta_base, state):
    if not state.updated_orders:
        return lambda x: x
    
    # Compute the indices of orders requiring updates
    indices = [
        delta_base.index.get_loc(oid) for oid, _ in state.updated_orders
    ]
    
    # Encode the updated orders
    orders = list(delta_base.iloc[indices].reset_index().itertuples())
    updates = [
        encode_order(o) for o in orders
    ]
    
    # Create a function that applies the update to an encoded tensor
    indices = tf.constant([[i] for i in indices], dtype = tf.int32)
    updates = tf.constant(updates, dtype = tf.float32)
    
    def apply(x):        
        return tf.ragged.map_flat_values(
            tf.tensor_scatter_nd_update,
            x, indices, updates
        )
    
    return apply

def encode_cargo(data):
    result = tf.constant([
        encode_type(t) + [a, val]
        for t, (a, val) in data
    ], dtype = tf.float32)
    
    result = tf.reshape(result, [len(data), 4])
    
    return result

In [151]:
# Test for encoder
encoded, delta_base = encode_orders(orders)

s = MutableState()
s.updated_orders = {5581552764: 5}
s = State(s)

for i in trange(0, 1000):
    encode_delta(delta_base, s)

Encoding orders & transferring to GPU


HBox(children=(FloatProgress(value=0.0, max=123510.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))




In [49]:
def sample_from(state, parameters):
    action_params = parameters['actions']
    move_params   = parameters['systems']
    buy_params    = parameters['types']
    sell_params   = parameters['cargo']
    
    # We want to ignore the 'sell' action if there is nothing to sell
    if len(state.cargo) == 0:
        action_params = action_params[...,0:1]
        
    Joint = tfp.distributions.JointDistributionCoroutine
    
    def model():
        action_id = yield Joint.Root(tfp.distributions.Categorical(logits = action_params[...,0]))
        
        if action_id == 0:
            target = yield tfp.distributions.Categorical(logits = move_params[...,0])
            target = system_ids[target.numpy()]
            
            return warp_to_system(target)
        
        if action_id == 1:
            params = buy_params
        elif action_id == 2:
            params = sell_params
            
        idx = yield tfp.distributions.Categorical(logits = params[...,0])
        amount = yield tfp.distributions.Normal(loc = params[...,1], scale = params[...,2])[0]
        
        if action_id == 1:
            return buy(type_ids[idx].numpy(), amount.numpy())
        elif action_id == 2:
            return sell([type_id for type_id, _ in state.cargo][idx.numpy()], amount.numpy())
        
        assert False
    
    dist = Joint(model)
    
    sample = dist.sample()
    log_p = dist.log_prob(sample)
        
    # Interpret sample
    generator = model()
    try:
        next(generator)
        for s in sample:
            generator.send(s)
    except StopIteration as s:
        return s.value, log_p
    
@tf.function
def samples_and_distinfo(parameters, p_bootstrap = 0.0):
    try_sell = tf.shape(parameters['cargo'])[-2] > 0
    n_actions = 3 if try_sell else 2
    
    batch_dims = len(parameters['actions'].shape) - 2
    
    action_id_dist = tfp.distributions.Categorical(logits = parameters['actions'][...,0,0:n_actions])
    action_id   = action_id_dist.sample()
    
    warp_target_dist = tfp.distributions.Categorical(logits = parameters['systems'][...,0])
    warp_target = warp_target_dist.sample()
    
    buy_item_dist    = tfp.distributions.Categorical(logits = parameters['types'][...,0])
    buy_item    = buy_item_dist.sample()
    
    buydist_data = tf.abs(tf.gather(parameters['types'], buy_item, batch_dims = batch_dims))
    buy_amount_dist  = tfp.distributions.TruncatedNormal(loc = buydist_data[...,1], scale = buydist_data[...,2], low = 0, high = 1e8)
    buy_amount  = buy_amount_dist.sample()
    
    e_tot = action_id_dist.entropy() + warp_target_dist.entropy() + buy_item_dist.entropy() + buy_amount_dist.entropy()
    
    log_p = action_id_dist.log_prob(action_id)
    tf.debugging.assert_all_finite(
        log_p, 'Invalid action logp'
    )

    
    log_p += tf.where(
        action_id == 0,
        warp_target_dist.log_prob(warp_target),
        0
    )
    tf.debugging.assert_all_finite(
        log_p, 'Invalid warp logp'
    )
    
    log_p += tf.where(
        action_id == 1,
        buy_item_dist.log_prob(buy_item) + buy_amount_dist.log_prob(buy_amount),
        0
    )
    tf.debugging.assert_all_finite(
        log_p, 'Invalid buy logp'
    )
    
    if try_sell:
        sell_item_dist   = tfp.distributions.Categorical(logits = parameters['cargo'][...,0])
        sell_item = sell_item_dist.sample()
        
        selldist_data    = tf.abs(tf.gather(parameters['cargo'], sell_item, batch_dims = batch_dims))
        sell_amount_dist = tfp.distributions.TruncatedNormal(loc = selldist_data[...,1], scale = selldist_data[...,2], low = 0, high = 1e8)
        sell_amount = sell_amount_dist.sample()
        
        log_p += tf.where(
            action_id == 2,
            sell_item_dist.log_prob(sell_item) + sell_amount_dist.log_prob(sell_amount),
            0
        )
        
        e_tot += sell_item_dist.entropy() + sell_amount_dist.entropy()
        
        tf.debugging.assert_all_finite(
            log_p, 'Invalid sell logp'
        )
    else:
        sell_item = tf.constant(-1, dtype = tf.int32)
        sell_amount = tf.constant(0, dtype = tf.float32)
    
    return (action_id, warp_target, buy_item, buy_amount, sell_item, sell_amount), (log_p, e_tot)

def sample_to_action(state, sample):
    # Only try sell actions
    (action_id, warp_target, buy_item, buy_amount, sell_item, sell_amount) = sample
    
    if action_id.numpy() == 0:
        target = system_ids[warp_target.numpy()]
        action = warp_to_system(target)
    
    if action_id.numpy() == 1:
        t = type_ids[buy_item.numpy()]
        
        action = buy(t, buy_amount.numpy())
    
    if action_id.numpy() == 2:
        t, _ = state.cargo[sell_item.numpy()]
        action = sell(t, sell_amount.numpy())
    
    return action
    
    

#def sample_chain(states, length, model, history):
#    log_p_tot = tf.constant(np.zeros([len(starts)], dtype = tf.float32))
#    
#    for i in range(0, length):
#        # Encode states
#        state_data = encode_states(states)
#        
#        control, history = model(state_data, history)
#        
#        actions, log_p = pick_actions(starts, *control)
#        
#        states = [action(state) for action, state in zip(actions, states)]
#        log_p_tot += log_p
#    
#    return states, log_p_tot

In [50]:
# --- Global embedding of data ---

# Helper layer that holds the encoded universe and can map order tensors to their full representation
class Universe(tf.keras.layers.Layer):
    def __init__(self, d_system_notes = 8, d_type_notes = 8):
        super(Universe, self).__init__()
        
        with tf.name_scope('Universe/'):
            self.system_data_const = tf.constant([encode_system(system) for system in tqdm(system_ids, desc = 'Encoding systems')], dtype = tf.float32)
            self.system_data_var   = tf.Variable(np.zeros([len(system_ids), d_system_notes]), dtype = tf.float32)

            self.type_data_const = tf.constant([encode_type(type_id) for type_id in tqdm(types, desc = 'Encoding types')], dtype = tf.float32)
            self.type_data_var   = tf.Variable(np.zeros([len(types), d_type_notes]), dtype = tf.float32)

            self.type_ids = tf.constant(list(types), dtype = tf.int32)
            self.system_ids = tf.constant(list(systems), dtype = tf.int32)

    def system_data(self):
        with tf.name_scope('Universe/SystemData/'):
            return tf.concat([self.system_data_const, self.system_data_var], axis = -1)

    def type_data(self):
        with tf.name_scope('Universe/TypeData/'):
            return tf.concat([self.type_data_const, self.type_data_var], axis = -1)

    def embed_orders(self, order_tensor):
        with tf.name_scope('Universe/OrderEmbedding'):
            sys_idx = tf.cast(order_tensor[...,0], tf.int32)
            typ_idx = tf.cast(order_tensor[...,1], tf.int32)

            return tf.concat(
                [
                    tf.gather(self.system_data(), sys_idx),
                    tf.gather(self.type_data(),   typ_idx),
                    order_tensor[...,2:]
                ],
                axis = -1
            )
    
    def embed_cargo(self, cargo_tensor):
        with tf.name_scope('Universe/CargoEmbedding'):
            typ_idx = tf.cast(cargo_tensor[...,0], tf.int32)

            result = tf.concat(
                [
                    tf.gather(self.type_data(), typ_idx),
                    cargo_tensor[...,1:]
                ],
                axis = -1
            )

            return result
        
    

In [51]:
# Attention layers

def full_attention(keys, values, queries, mask_kv = None, mask_q = None):
    """
        keys    - batch_shape + [n_sequence1, dim_k] tensor holding query keys
        values  - batch_shape + [n_sequence1, dim_v] tensor holding query results
        queries - batch_shape + [n_sequence2, dim_k] tensor holding queries
        mask_kv - batch_shape + [n_sequence1] tensor holding the mask for the keys / values (0 if)
        mask_q  - batch_shape + [n_sequence2] tensor holding the mask for the 

        Returns batch_shape + [n_sequence2, dim_v] tensor holding result
    """
    with tf.name_scope('scaled_dot_product_attention'):
        # Compute the [..., n_sequence2, n_sequence1] matrix of weights
        weights = tf.linalg.matmul(queries, keys, transpose_b = True)
        
        # Normalize against the multiplied dimension dimension
        dk = tf.cast(tf.shape(keys)[-1], tf.float32)
        weights = weights / tf.math.sqrt(dk)
        
        if mask_kv is not None or mask_q is not None:
            mask_kv = mask_kv if mask_kv is not None else [True]
            mask_q  = mask_q  if mask_q  is not None else [True]
            
            mask = tf.math.logical_and(
                tf.expand_dims(mask_kv, axis = -2),
                tf.expand_dims(mask_q , axis = -1)
            )
            
            weights = tf.where(mask, weights, -1e9)
        
        # Compute softmax over the sequence1 dimension
        weights = tf.nn.softmax(weights, axis = -1)
        
        # Apply weights to values
        #result = tf.linalg.matmul(weights, values)
        result = values
        
        return result


def reduced_attention(keys, values, queries, mask_kv = None, mask_q = None):
    """
        keys    - batch_shape + [n_sequence1, dim_k] tensor holding query keys
        values  - batch_shape + [n_sequence1, dim_v] tensor holding query results
        queries - batch_shape + [n_sequence2, dim_k] tensor holding queries

        Returns batch_shape + [n_sequence2, dim_v] tensor holding result
    """
    with tf.name_scope('reduced_scaled_dot_product_attention'):
        if mask_kv is not None:
            keys = tf.where(mask_kv, keys, 0)
            values = tf.where(mask_kv, values, 0)
        
        if mask_q is not None:
            queries = tf.where(mask_q, queries, 0)
        
        # Compute the [..., dim_k, dim_v] matrix of weights
        #weights = tf.linalg.matmul(keys, values, transpose_a = True)
        weights = tf.eye(tf.shape(keys)[-1], tf.shape(values)[-1])
        
        # Normalize against the key dimension
        if mask_kv is not None:
            dn = tf.reduce_sum(tf.cast(mask_kv, tf.float32), axis = -1)
            
            # We need to expand from a batch_shape to a batch_shape + [1, 1] array
            # to maintain correct broadcasting
            dn = tf.expand_dims(dn, axis = -1)
            dn = tf.expand_dims(dn, axis = -1)
        else:
            dn = tf.cast(tf.shape(keys)[-2], tf.float32)
            
        weights = weights / tf.math.sqrt(dn)
        
        # Compute softmax over the dim_k dimension
        weights = tf.nn.softmax(weights, axis = -2)
        
        # Apply weights to values
        result = tf.linalg.matmul(queries, weights)
        
        return result

class MultiHeadedAttention(tf.keras.layers.Layer):
    def __init__(self, d_out, d_model, num_heads, f = reduced_attention, skip = True):
        super(MultiHeadedAttention, self).__init__()
        
        self.d_model = d_model
        self.num_heads = num_heads
    
        self.wk = tf.keras.layers.Dense(num_heads * d_model, name = 'Wk', use_bias = True, kernel_initializer = 'glorot_normal')
        self.wv = tf.keras.layers.Dense(num_heads * d_model, name = 'Wv', use_bias = True, kernel_initializer = 'glorot_normal')
        self.wq = tf.keras.layers.Dense(num_heads * d_model, name = 'Wq', use_bias = True, kernel_initializer = 'glorot_normal')
        
        self.wout = tf.keras.layers.Dense(d_out, name = 'Wout', use_bias = True, activation = 'relu', kernel_initializer = 'glorot_normal')
        
        self.f = f
        self.skip = skip
        
    def _split_heads(self, x):
        with tf.name_scope('split_heads/'):
            # Ensure static shape
            #x.set_shape(tf.TensorShape([None, None, num_heads * d_model]))
            #static_shape = x.get_shape();

            # Dynamic reshape
            batch_shape = tf.shape(x)[:-2]
            batch_dims = tf.size(batch_shape)
            
            #batch_size = tf.shape(x)[0]
            seq_len = tf.shape(x)[-2]

            x = tf.reshape(x, tf.concat([
                batch_shape,
                [seq_len, self.num_heads, self.d_model]
            ], axis = 0))

            #x = tf.transpose(x, [0, 2, 1, 3])
            x = tf.transpose(x, tf.concat([
                tf.range(batch_dims),
                [batch_dims + 1, batch_dims + 0, batch_dims + 2]
            ], axis = 0))
        return x
    
    def _merge_heads(self, x):
        with tf.name_scope('merge_heads/'):
            batch_shape = tf.shape(x)[:-3]
            batch_dims  = tf.size(batch_shape)
            
            seq_len = tf.shape(x)[-2]
            
            x = tf.transpose(x, tf.concat([
                tf.range(batch_dims),
                [batch_dims + 1, batch_dims + 0, batch_dims + 2]
            ], axis = 0))
            
            x = tf.reshape(x, tf.concat([
                batch_shape,
                [seq_len, self.num_heads * self.d_model]
            ], axis = 0))
            
            return x
        
    def call(self, kvq, mask = None):
        if isinstance(kvq, tuple):
            k, v, q = kvq
            
            if mask is not None:
                kv_mask = mask[0]
                q_mask  = mask[2]
        else:
            k = kvq
            v = kvq
            q = kvq
            kv_mask = mask
            q_mask  = mask
        
        if self.skip:
            q2 = q

        k = self.wk(k)
        v = self.wv(v)
        q = self.wq(q)
        
        k = self._split_heads(k)
        v = self._split_heads(v)
        q = self._split_heads(q)
        
        result = self.f(k, v, q)
        result = self._merge_heads(result)
        
        result = self.wout(result)
        
        if self.skip:
            result += q2
        
        return result
    
    def compute_mask(self, kvq, mask = None):
        if isinstance(kvq, tuple) and mask is not None:
            return mask[2]
        
        return mask

In [52]:
# --- Wraps another callable as a layer that accepts packed tensors ---

class StackedTogether(tf.keras.layers.Layer):
    def __init__(self, wrapped, axis = -2, mask_axis = None):
        super(StackedTogether, self).__init__()
        
        self._supports_ragged_inputs = True
        
        self.wrapped = wrapped
        self.axis = axis
        self.mask_axis = mask_axis
    
    def call(self, inputs, mask = None, *args, **kwargs):
        unpacked = tf.nest.flatten(inputs)
        sizes = [tf.shape(x)[self.axis] for x in unpacked]
        
        if mask is not None:
            assert self.mask_axis is not None, 'No axis for mask stacking specified'
            mask = tf.concat(
                tf.nest.flatten(mask),
                axis = self.mask_axis
            )
            kwargs['mask'] = mask
        
        stacked = tf.concat(unpacked, axis = self.axis)
        processed = self.wrapped(stacked, *args, **kwargs)
        unstacked = tf.split(processed, sizes)
        
        packed = tf.nest.pack_sequence_as(inputs, unstacked)
        
        return packed

In [53]:
# ------------------ Model class -----------------------------------------

class Model(tf.keras.Model):
    def __init__(self, n_layers = 2, n_layers_2 = 8, bandwidth = 32, n_heads = 16, head_dimension = 8):
        super(Model, self).__init__(name = 'Model')
        
        init = tf.keras.initializers.GlorotNormal()
        
        with tf.name_scope('Model/'):
            # Variable inputs feeding into the network
            self.universe    = Universe()
            self.action_feed = tf.Variable(np.zeros([1,bandwidth]), dtype = tf.float32, name = 'action_feed')
            
            kwargs = {
                'use_bias' : True,
                'kernel_initializer' : 'glorot_normal',
                'bias_initializer' : 'glorot_normal'
            }

            # Input embeddings
            self.embeddings = {
                k : tf.keras.layers.Dense(bandwidth, name = 'embed_' + k, **kwargs)
                for k in ['systems', 'types', 'orders', 'cargo']
            }

            substack1 = tf.keras.Sequential([
                MultiHeadedAttention(bandwidth, n_heads, head_dimension)
                for i in range(0, n_layers)
            ])

            substack2 = tf.keras.Sequential([
                MultiHeadedAttention(bandwidth, n_heads, head_dimension)
                for i in range(0, n_layers_2)
            ])

            self.substack1 = StackedTogether(substack1, axis = -2, mask_axis = -1) if n_layers > 0 else None
            self.substack2 = StackedTogether(substack2, axis = -2, mask_axis = -1) if n_layers_2 > 0 else None

            self.exits = {
                'actions' : tf.keras.layers.Dense(3, name = 'exit_actions', **kwargs),
                'types' : tf.keras.layers.Dense(3, name = 'exit_types', **kwargs),
                'systems' : tf.keras.layers.Dense(1, name = 'exit_systems', **kwargs),
                'cargo' : tf.keras.layers.Dense(3, name = 'exit_cargo', **kwargs)
            }
        
    # TODO: Masking
    def call(self, input):
        with tf.name_scope('Model/'):
            data = {
                'systems' : self.universe.system_data(),
                'types' : self.universe.type_data(),
                'orders' : self.universe.embed_orders(input['orders']),
                'cargo' : self.universe.embed_cargo(input['cargo'])
            }

            data = {
                k : self.embeddings[k](v)
                for k,v in data.items()
            }

            data['actions'] = self.action_feed
            
            if self.substack1 is not None:
                data = self.substack1(data)
            del data['orders']
            
            if self.substack2 is not None:
                data = self.substack2(data)

            data = {
                k : self.exits[k](v)
                for k,v in data.items()
                if k in self.exits
            }
            
            return data

In [57]:
# Create starting state for chain
test_state = MutableState()

# Start in Jita with 1M ISK
test_state.cargo = {}
test_state.volume_limit = 8900
test_state.collateral_limit = 10000000
test_state.time_left = 100
test_state.wallet = 1000000
test_state.system = root_id # in jita

test_state = State(test_state)

# Encode global data
print('Encoding market orders')
enc, d_base = encode_orders(orders)

# Initialize model
m = Model(n_layers = 2, n_layers_2 = 4)
result = m({'orders' : enc.flat_values, 'cargo' : encode_cargo(test_state.cargo)})

# Initialize our chain state
accumulators = {
    'log_p'  : tf.Variable(0, dtype = tf.float32),
    'entropy' : tf.Variable(0, dtype = tf.float32)
}

accum_grads = {
    k : [tf.Variable(tf.zeros(v.shape, dtype = tf.float32), name = 'grad_for_' + v.name) for v in m.trainable_weights]
    for k in accumulators
}

reward = tf.Variable(0, dtype = tf.float32)

i_step = tf.Variable(0, dtype = tf.int64)
j_step = tf.Variable(0, dtype = tf.int64)

entropy_weight = 1

schedule  = tf.keras.optimizers.schedules.InverseTimeDecay(1e-3, 10, 1e-1)

def rate():
    #r = schedule(i_step)
    r = 1e-4
    return r

optimizer = tf.keras.optimizers.Adam(rate)

@tf.function
def begin():
    for k in accumulators:
        accumulators[k].assign(0)
        
        for v in accum_grads[k]:
            v.assign(tf.zeros(v.shape, dtype = tf.float32))

# Run our sampling chain
@tf.function
def step(data):
    with tf.GradientTape(persistent = True) as g:
        g.watch(m.trainable_weights)
        
        result = m(data)
        
        #if(j_step == 0):
        #    for k, v in result.items():
        #        tf.summary.histogram('result_' + k, v, step = i_step)
        
        for r in result.values():
            tf.debugging.assert_all_finite(r, 'Invalid model result')
        
        s, (log_p, entropy) = samples_and_distinfo(result)
        
        accum_deltas = {
            'log_p' : log_p,
            'entropy' : entropy
        }
    
    for k in accumulators:
        accumulators[k].assign_add(accum_deltas[k])
    
    for k in accumulators:
        local_grads = g.gradient(accum_deltas[k], m.trainable_weights)
        
        for (grad, local_grad) in zip(accum_grads[k], local_grads):
            if local_grad is not None:
                grad.assign_add(local_grad)
    
    return s

@tf.function
def end():
    @tf.function
    def objective():
        return accumulators['entropy'] * entropy_weight
    
    with tf.GradientTape(persistent = True) as g:
        val = objective()
    
    grads = g.gradient(val, m.trainable_weights)
    grads = [
        g if g is not None else tf.zeros(w.shape, dtype = tf.float32)
        for g, w in zip(grads, m.trainable_weights)
    ]
    
    agrads = g.gradient(val, accumulators.values())
    
    del g
    
    for k, g in zip(accumulators, agrads):
        if g is None:
            continue
            
        grads = [
            base_grad + g * a_grad if a_grad is not None else base_grad
            for base_grad, a_grad in zip(grads, accum_grads[k])
        ]
    
    with sw.as_default():
        tf.summary.histogram('Objective', val, step = i_step)
    
        #for v, g in zip(m.trainable_variables, grads):
        #    tf.summary.histogram('grad_' + v.name, g, step = i_step)
        #    tf.summary.histogram( v.name, v, step = i_step)
            
            #tf.print(v.name)
            #tf.print(
            #    tf.math.reduce_mean(g)
            #)
            #tf.print(
            #    tf.math.reduce_variance(g)
            #)
        
        for k in accumulators:
            tf.summary.histogram('accum_' + k, accumulators[k], step = i_step)
    
    optimizer.apply_gradients(
        zip(grads, m.trainable_variables)
    )
    
    return val

import ipywidgets as widgets
from IPython.display import display

text = widgets.Textarea(height = 300, width = 500)
text.value = 'Hi\nthere'
display(text)

try:
    counter = counter + 1
except:
    counter = 0
    
logdir = 'logs/{:03}'.format(counter)

sw = tf.summary.create_file_writer(logdir)

with sw.as_default():
    for i in trange(0, 200):    
        i_step.assign(i)    
        s = test_state
        begin()

        strdata = ''

        for j in range(0, 10):
            j_step.assign(j)
            
            delta = encode_delta(d_base, s)
            c = encode_cargo(s.cargo)

            sample = step({'orders' : delta(enc).flat_values, 'cargo' : c})

            act = sample_to_action(s, sample)
            
            try:
                strdata += str(act) + '\n'
                s = act(s)
                #print('Success')
            except Exception as e:
                #print(e)
                strdata += '  ' + str(e) + '\n'
                pass
        
        text.value = strdata

        reward.assign(s.wallet)
        end()

Encoding market orders
Encoding orders & transferring to GPU


HBox(children=(FloatProgress(value=0.0, max=898280.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Encoding systems', max=972.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Encoding types', max=14466.0, style=ProgressStyle(descrip…




Textarea(value='Hi\nthere')

HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


State:
    Validation: Valid
    
    System:     Jita (30000142)
    Station ID: None
    
    Resources:
        Time:           100.00
        Wallet:      999379.39
        Collateral: 9999379.39 / 10000000.00 (    620.61 used)
        Cargo:         8896.64 /    8900.00 (      3.36 used)
        
    Cargo contents:
                 1 x Pyerite              (      7.69 ISK)
                 1 x Hedbergite           (    547.96 ISK)
                 1 x Plagioclase          (     64.96 ISK)
        
    Modified orders:
                    duration  is_buy_order                     issued  location_id  min_volume   price   range  system_id  type_id  volume_remain  volume_total  region_id   vol_mod
        order_id                                                                                                                                                                    
        5623259315        90         False  2020-03-01T11:29:17+00:00     60003760           1    7.69  reg

KeyboardInterrupt: 

In [22]:
del m

In [132]:
encoded_3 = [delta(encoded) for delta in tqdm(encoded_2)]

HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))




In [22]:
tf.summary.trace_off()

In [1]:
%load_ext tensorboard

In [5]:
!del /q %TMP%\.tensorboard-info\*
%tensorboard --logdir=logs

Launching TensorBoard...

KeyboardInterrupt: 

In [13]:
!kill 5096

Der Befehl "kill" ist entweder falsch geschrieben oder
konnte nicht gefunden werden.


In [51]:
%tensorboard --logdir=logs

Reusing TensorBoard on port 6006 (pid 5600), started 0:31:02 ago. (Use '!kill 5600' to kill it.)

In [66]:
x = np.asarray([[0, 1], [2, 3], [4, 5]])
x[[[0, 0], [1, 1]],:]

array([[[0, 1],
        [0, 1]],

       [[2, 3],
        [2, 3]]])