In [1]:
import gym
from gym import spaces
import or_gym
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import copy
from or_gym.utils.env_config import *
%matplotlib inline

In [2]:
env = or_gym.make('PortfolioOpt-v0')

In [3]:
s = env.reset()
s

array([100.        ,  10.48945811,  16.89324694,  12.33857148,
         0.        ,   0.        ,   0.        ])

In [None]:
s = env.reset()

State consists of cash plus three assets with their own prices and the quantities held by the agent. 
- 0-3 contain asset prices
- 4-7 contain the quantities held
- 8 contains the total portfolio value

Actions indicate buying or selling given assets with indexes 0-2 corresponding to the non-cash asset classes.

In [8]:
env.sample_action()

array([-8.59545988, -3.83430658, 21.53107067])

In [21]:
env.step([2.08, 4.3, 9.42])

(array([ 1.        ,  0.71928476,  1.43438168,  1.01073949, 12.24823989,
        20.8       , 43.        , 94.2       , 69.91658968]),
 -35.859541932404724,
 True,
 {'Status': 'Termination Condition: End of Horizon'})

In [25]:
x = spaces.Discrete(10)

In [27]:
[x.sample() for i in range(10)]

[5, 4, 7, 4, 8, 0, 9, 5, 3, 5]

In [32]:
env = gym.make('CartPole-v0')
[env.action_space.sample() for i in range(10)]

[1, 1, 0, 0, 1, 1, 0, 1, 0, 0]

In [86]:
a = np.arange(3)
b = np.arange(3)

np.dot(a, b)

5

In [87]:
[i*j for i, j in zip(a, b)]

[0, 1, 4]

In [116]:
class POEnv(gym.Env):
    
    def __init__(self, *args, **kwargs):
        self.num_assets = 1
        self.initial_cash = 100
        self.cash = copy.copy(self.initial_cash)
        self.buy_cost = np.array([0.045, 0.025, 0.035])
        self.sell_cost = np.array([0.04, 0.02, 0.03])
        self.step_limit = 10
        self.asset_price_means = np.ones(self.num_assets)
        self.asset_price_var = np.ones(self.num_assets) * 0.25
        assign_env_config(self, kwargs)
        
        # Cash on hand, asset prices, num of shares, portfolio value
        self.obs_length = 1 + 2 * self.num_assets
#         self.observation_space = spaces.Dict({
#             "action_mask": spaces.Box(0, 1, shape=(self.num_assets,)),
#             "avail_actions": spaces.Box(0, 1, shape=(self.num_assets,)),
#             "state": spaces.Box(-1000, 1000, shape=(self.obs_length,))
#         })
        self.observation_space = spaces.Box(0, 1000, shape=(self.obs_length,))
        self.action_space = spaces.Box(-1000, 1000, shape=(self.num_assets,))
        
        self.reset()
        
    def reset(self):
        self.step_count = 0
        self.asset_prices = self._generate_asset_prices()        
        self.holdings = np.zeros(self.num_assets)
        self.cash = copy.copy(self.initial_cash)
        self.state = np.hstack([
            self.initial_cash,
            self.asset_prices[:, self.step_count],
            self.holdings])
        return self.state
    
    def _generate_asset_prices(self):
        asset_prices = np.vstack([
            np.random.normal(mu, sig, self.step_limit) 
            for mu, sig in zip(self.asset_price_means, self.asset_price_var)])
        # Zero out negative asset prices and all following prices - implies
        # equity is bankrupt and worthless.
        zero_vals = np.vstack(np.where(asset_prices<0))
        cols = np.unique(zero_vals[0])
        for c in cols:
            first_zero = zero_vals[1][np.where(zero_vals[0]==c)[0].min()]
            asset_prices[c,first_zero:] = 0
        return asset_prices
    
    def step(self, action):
        assert self.action_space.contains(action)
        # Round actions to integer values
        action = np.round(action)
        asset_prices = self.asset_prices[:, self.step_count].copy()
        for idx, a in enumerate(action):
            if a == 0:
                continue
            # Sell a shares of asset
            elif a < 0:
                a = np.abs(a)
                if a > self.holdings[idx]:
                    a = self.holdings[idx]
                self.holdings[idx] -= a
                self.cash += asset_prices[idx] * a * (1 - self.sell_cost[idx])
            # Buy a shares of asset
            elif a > 0:
                purchase_cost = asset_prices[idx] * a * (1 + self.buy_cost[idx])
                if self.cash < purchase_cost:
                    a = np.max(np.floor(self.cash / (
                        asset_prices[idx] * (1 + self.buy_cost[idx]))), 0)
#                     print('Action: {:.0f}\nCost: {:.2f}\nCash: {:.2f}'.format(a, purchase_cost, self.cash))
                self.holdings[idx] += a
                self.cash -= asset_prices[idx] * a * (1 + self.buy_cost[idx])
                
        # Return total portfolio value as reward
        reward = np.dot(asset_prices, self.holdings) + self.cash
        self.step_count += 1
        if self.step_count >= self.step_limit:
            done = True
        else:
            self._update_state()
            done = False
            
        return self.state, reward, done, {}
    
    def _update_state(self):
        self.state = np.hstack([
            self.cash,
            self.asset_prices[:, self.step_count],
            self.holdings
        ])

In [7]:
env = gym.make('PortfolioOpt-v0')
for i in range(1):
    s = env.reset()
    done = False
    rewards = []
    actions = []
    while done == False:
        action = env.action_space.sample()
        actions.append(action)
        s_1, r, done, _ = env.step(action)
        valid_state = env.observation_space.contains(s_1)
        if valid_state == False:
            msg = 'Observation Space does not match:'
            msg += '\nobservation_space:\nShape:\t{}\n\t{}'.format(s.shape, s)
            msg += '\nAction:\t{}'.format(action)
            raise ValueError(msg)
        s = s_1.copy()
        rewards.append(r)
#         rewards += r
        if done:
            print(rewards)

[96.07059218957423, 90.35981092295586, 84.12623044347052, 81.4467052949233, 80.01829886946142, 78.91350659096938, 76.99061562830107, 74.19296473718171, 75.97125525068196, 75.72959507129573]


In [118]:
actions

[array([291.60596], dtype=float32),
 array([821.52466], dtype=float32),
 array([-610.1736], dtype=float32),
 array([776.8539], dtype=float32),
 array([-70.79235], dtype=float32),
 array([332.34055], dtype=float32),
 array([-170.35948], dtype=float32),
 array([-152.44739], dtype=float32),
 array([-536.04205], dtype=float32)]

In [119]:
x = env.asset_prices.copy()

In [120]:
s = env.reset()
env.asset_prices = x.copy()
states = [s]
for a in actions:
    s, r, d, _ = env.step(a)
    states.append(s)
    if not env.observation_space.contains(s):
        raise ValueError(a)

ValueError: [-536.04205]

In [121]:
states

[array([100.        ,   0.91313828,   0.        ]),
 array([ 96.29834743,   1.09089705, 292.        ]),
 array([  0.53940423,   1.33581907, 376.        ]),
 array([4.82716656e+02, 3.61390904e-01, 0.00000000e+00]),
 array([1.89279890e+02, 5.94038938e-01, 7.77000000e+02]),
 array([229.76958442,   1.17167071, 706.        ]),
 array([8.07552358e-01, 1.21309325e+00, 8.93000000e+02]),
 array([198.78437125,   1.43757126, 723.        ]),
 array([408.55476988,   1.16069024, 571.        ]),
 array([1005.79954115,    1.17736517,   35.        ])]

In [133]:
actions = np.vstack(actions)
actions

array([[ 291.60596],
       [ 821.52466],
       [-610.1736 ],
       [ 776.8539 ],
       [ -70.79235],
       [ 332.34055],
       [-170.35948],
       [-152.44739],
       [-536.04205]], dtype=float32)

In [147]:
cash = 100
holdings = np.zeros(10)
print('{}\tCash = ${:.2f}'.format(0, cash))
for i, a in enumerate(actions):
    if i > 0:
        holdings[i] += holdings[i-1]
    a = np.round(a.take(0))
    price = env.asset_prices[:, i].take(0)
    if a > 0:
        purchase = a * price
        if cash < purchase:
            # Use all cash to complete purchase
            a = np.floor(cash / price)
            purchase = a * price
        holdings[i] += a
        cash -= purchase
        print('Purchase {} shares for {:.2f}/share => {:.2f}'.format(
            a, price, purchase))
    elif a < 0:
        a = np.abs(a)
        if a > holdings[i]:
            a = holdings[i]
        holdings[i] -= a
        cash += price * a
        print('Sell {} shares for {:.2f}/share => {:.2f}'.format(
            a, price, purchase))
        
    print('{}\tCash = ${:.2f}\tPortfolio = ${:.2f}'.format(i+1, cash, holdings[i]*price + cash))

0	Cash = $100.00
Purchase 292.0 shares for 0.01/share => 3.54
1	Cash = $96.46	Portfolio = $100.00
Purchase 88.0 shares for 1.09/share => 96.00
2	Cash = $0.46	Portfolio = $415.00
Sell 380.0 shares for 1.34/share => 96.00
3	Cash = $508.07	Portfolio = $508.07
Purchase 777.0 shares for 0.36/share => 280.80
4	Cash = $227.27	Portfolio = $508.07
Sell 71.0 shares for 0.59/share => 280.80
5	Cash = $269.45	Portfolio = $688.84
Purchase 229.0 shares for 1.17/share => 268.31
6	Cash = $1.13	Portfolio = $1096.65
Sell 170.0 shares for 1.21/share => 268.31
7	Cash = $207.36	Portfolio = $1135.38
Sell 152.0 shares for 1.44/share => 268.31
8	Cash = $425.87	Portfolio = $1307.10
Sell 536.0 shares for 1.16/share => 268.31
9	Cash = $1048.00	Portfolio = $1137.37


In [149]:
292 * 1.09 + 96.46

414.74

In [150]:
asset_prices_means = np.array([
    [0.729104  , 0.70066482, 1.33728305],
    [0.71028955, 1.15127388, 0.65365377],
    [0.83731888, 0.78674174, 1.14186928],
    [0.83644462, 0.97910886, 0.94767697],
    [0.69826764, 1.14386794, 0.94392694],
    [0.69017948, 0.86546669, 0.82813273],
    [0.61135848, 0.72119583, 0.70126934],
    [0.58991467, 0.86416669, 1.18881049],
    [1.48227405, 1.41814408, 0.96752138],
    [0.5027847 , 0.5380547 , 0.62442277],
    [0.56073499, 1.27841103, 1.18236989]])

In [None]:
def _generate_asset_prices(self):
    asset_prices = np.array([np.random.normal(mu, sig) 
        for mu, sig in zip(price_means.flatten(), price_var.flatten())]
                           ).reshape(price_means.shape)
    # Zero out negative asset prices and all following prices - implies
    # equity is bankrupt and worthless.
    zero_vals = np.vstack(np.where(asset_prices<0))
    cols = np.unique(zero_vals[0])
    for c in cols:
        first_zero = zero_vals[1][np.where(zero_vals[0]==c)[0].min()]
        asset_prices[c,first_zero:] = 0
    return asset_prices

In [153]:
prices = np.array([np.random.normal(np.random.normal(1, 0.25), 0.25) for i in range(30)]).reshape(3, -1)

In [156]:
price_means = np.random.normal(1, 0.25, (env.num_assets, env.step_limit))
price_var = np.ones(price_means.shape) * 0.25

In [157]:
prices = np.array([np.random.normal(mu, sig) for mu, sig in 
                   zip(price_means.flatten(), price_var.flatten())]).reshape(price_means.shape)

In [163]:
np.random.randint(10, 50, 3) * np.ones((3, 10))

ValueError: operands could not be broadcast together with shapes (3,) (3,10) 