### Uses Paper Trade - test money
##### https://alpaca.markets/docs/api-documentation/api-v2/

In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import time, datetime as dt
import pandas as pd, numpy as np

import alpha, database as db
import alpaca_trade as alpaca
from populate import download_data
from populate import *
from portfolios import *
from history import *
from rl_algos import TD3

DataStore = db.DataStore()

import gym
from gym import spaces

In [6]:
from paper_config import *
import requests, json

def get_price(sym):
    """get latest price for a symbol"""
    #v2/stocks/{symbol}/trades/latest
    r = requests.get(f'{ALPACA_LATEST_VALUE_URL}/{sym}/trades/latest', headers=HEADERS)
    print(r)
    return json.loads(r.content)

get_price('SPY')

<Response [404]>


JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [2]:
class Portfolio:
    def __init__(self, use_alpaca=True, cash=10000, positions=POPULAR):
        assert isinstance(positions, list)
        self.use_alpaca = use_alpaca
        if use_alpaca:
            self.cash_remaining = int(alpaca.get_account()['cash'])
            self.positions = alpaca.get_positions() # check on how returned from alpaca...?????????
        else:
            self.cash_remaining = cash
            self.positions = positions
            self.position_amount = dict()
            for pos in positions:
                self.position_amount[pos] = 0
        print(f'Portfolio loaded - available cash: {self.cash_remaining}')
        
    def position(self, sym):
        # check logic - need amount as well
        if not self.use_alpaca: return sym in self.positions
        if sym in self.positions: # not sure format positions returned...
            return alpaca.get_position(sym)
        else:
            return "no position"
        
    def buy(self, sym, amount, current_price):
        # or check current_price in call? ToDo check alpaca API on getting current price...
        if self.use_alpaca:
            alpaca.create_order(symbol=sym, qty=amount) # need other params...
        else:
            cost = current_price * amount
            
            ToDo : update cash_remaining? positions? before...
            
            if cost > self.cash_remaining:
                amount = self.cash_remaining//current_price # go all in
                cost = current_price * amount
            if not self.position(sym):
                self.positions.append(sym)
                self.position_amount[sym] = amount
            else:
                self.position_amount[sym] += amount
            self.cash_remaining -= cost
        

class StockTraderEnvironment(gym.Env):
    def __init__(self,
                portfolio,
                history, 
                short=False):
        """dfIndicators is a DataFrame where each column is a different indicator; short=True would allow shorting position"""
        
        self.portfolio = portfolio
        self.indicators = history.indicators
        self.prices = history.prices
        self.symbol = history.symbol
        num_indicators = self.indicators.shape[1]
        assert num_indicators > 0, "supply 1 or more indicators"

        self.action_space = spaces.Discrete(3)

        # set space for alpha indicators at +- infinity...?
        low_array = np.full((num_indicators), -np.inf)
        high_array = np.full((num_indicators), np.inf)
        self.observation_space = spaces.Box(low=low_array, high=high_array, dtype=np.float64)
        
        self.nS, self.nA = self.observation_space.shape[0], self.action_space.n
        
        self.previous_price = 0  # didn't exist before first day, so set previous_price to 0
        first_day = self.indicators.index[0] # starting at first day indicators exist
        self.prices = self.prices.loc[first_day:] # rewriting prices to fit indicator list
        
        self.state = np.array(self.indicators.iloc[0]) # first day is inititial state
        self.days = iter(self.prices.index.values)
        
        # Iterate through days, checking action/reward, etc. in step...
        self.trades = pd.DataFrame(0, index = self.prices.index, columns = self.prices.columns)
        self.trades_dupl = self.trades.copy(deep = True)
        
        # position is how much long (positive), short (negative) or holding (zero)
        self.position = 0  # how parse?

    def reset(self):
        self.previous_price = 0
        self.days = iter(self.prices.index.values)
        self.state = np.array(self.indicators.iloc[0])
        self.trades = pd.DataFrame(0, index = self.prices.index, columns = self.prices.columns)
        self.position = 0
    
    def step(self, action):
        #https://github.com/openai/gym/blob/master/gym/envs/classic_control/cartpole.py
        err_msg = "%r (%s) invalid" % (action, type(action))
        assert self.action_space.contains(action), err_msg
        
        #Calculate reward here... first day = 0
        # 0 is buy, 1 is sell, 2 is hold
        try:
            new_day = next(trader.days)
            current_price = self.prices.loc[new_day]
            self.state = self.indicators.loc[new_day]

            if action == 0 and current_price > self.previous_price:
                reward = 2
            elif action == 0 and current_price < self.previous_price:
                reward = -2
            elif action == 1 and current_price < self.previous_price:
                reward = 2
            elif action == 1 and current_price > self.previous_price:
                reward = -2
            elif action == 2 and (current_price > self.previous_price or current_price < self.previous_price):
                reward = -2
            elif action == 2 and current_price == self.previous_price:
                reward = 2
            else:
                reward = 0
                
            if action == 0 and self.position == 0:
                # buying into a position, use nn probability for how much...?
                
            elif action == 0 and self.position > 0:
                # don't overcommit to one symbol?
                pass
            elif action == 0 and self.position < 0:
                # more signal to buy... keep same as last?
                pass
            elif action == 1 and self.position == 0:
                # sell, with no position
                pass
            elif action == 1 and self.position > 0:
                # sell, with some bought - sell all? or fraction? can't sell more than have if self.short=False
                if not self.short:
                    pass
                pass
            elif action == 1 and self.position < 0:
                if not self.short:
                    print(f'ERROR - short {self.symbol} with shorting disabled!')

            elif action == 2:
                # hold long or short (or no) position
                pass

            
            self.position = portfolio.position(self.symbol)
            self.previous_price = current_price
            done = False
        except StopIteration:
            if self.trades.equals(self.trades_dupl):
                done = True
            else:
                done = False
                self.trades_dupl = self.trades.copy(deep = True)

        return self.state, reward, done, {}
    
    def render(self):
        #ToDo - show progression via graph?
        pass

In [30]:
class Experiment:
    def __init__(self, DataStore, td3_kwargs, portfolio, sym, dates=None, indicators='all', shorting_allowed=False):
        print('Setting up experiment, loading history...')
        self.available_cash = portfolio.cash_remaining
        
        #date format for range: dates=[dt.datetime(2000,1,1), dt.datetime(2020,12,31)]
        self.history = History(DataStore, sym, dates, indicators=indicators)
        self.indicators = self.history.indicators
        self.prices = self.history.prices
        
        self.num_days = self.prices.shape[0]
        training = int(.75* self.num_days)
        validation = int(.3*training)
        test = self.num_days-training
        
        self.price_train = self.prices.iloc[:(training-validation)]
        self.indicator_train = self.indicators.iloc[:(training-validation)]
        
        self.price_validation = self.prices.iloc[(training-validation):training]
        self.indicator_validation = self.indicators.iloc[(training-validation):training] 
        
        self.price_test = self.prices.iloc[training:]
        self.indicator_test = self.indicators.iloc[training:]
        
        self.portfolio = portfolio
        self.symbol = sym
        
        self.env = StockTraderEnvironment(self.portfolio, 
                                          self.history, 
                                          short=shorting_allowed)
        
        self.batch_size = 64 # not parameterized...
        self.buffer = ReplayBuffer(self.env.nS, self.env.nA, max_buffer=int(1e6), batch_size=self.batch_size)
        
        self.max_action = 2 # 3 actions: [0,1,2], so 2 is max
        
        kwargs = {
            "state_dim": self.env.nS,
            "action_dim": self.env.nA,
            "max_action": self.max_action,
            "discount": td3_kwargs['discount'],
            "tau": td3_kwargs['tau'],
            "policy_noise": td3_kwargs['policy_noise'],            
            "noise_clip": td3_kwargs['noise_clip'],
            "policy_freq": td3_kwargs['policy_freq']
        }
        
        self.policy = TD3(**kwargs)
        self.expl_noise = td3_kwargs['expl_noise']
        
        
    def run(self, num_episodes, max_steps=int(1e6)):
        
        random_warmup = 25e3
        total_days_run = 0
        
        for idx in range(num_episodes):
            
            state, done = self.env.reset(), False
            episode_reward = 0
            steps = 0
            
            for days_passed in range(self.num_days):
                
                steps += 1
                
                if total_days_run < random_warmup:
                    action = env.action_space.sample()
                else:
                    #ToDo - what is this doing? https://github.com/sfujim/TD3/blob/master/main.py
                    action = (self.policy.select_action(np.array(state)) + np.random.normal(0, self.max_action * self.expl_noise, size=self.env.nA)).clip(-self.max_action, self.max_action)
            
                next_state, reward, done, _ = self.env.step(action)
                experience = [state, action, next_state, reward, done]
                self.buffer.update(experience)
                
                #ToDo: collect per episode, per iteration reward, total reward, etc.; portfolio value (or final portfolio value?)
                
                if done:
                    print(f'Episode finished after {days_passed+1} timesteps')
                    break
                
                state = next_state
                if days_passed >= random_warmup:
                    policy.train(self.buffer, self.batch_size)
                
                if days_passed == self.num_days - 1:
                    if num_episodes - idx - 1 == 0:
                        print('Finished all episodes, did not converge')
                    else:
                        print(f'Finished all days without converging, starting from day 1 for {num_episodes - idx - 1} more episodes.')
                        
                total_days_run += 1
                    
        self.env.close() # not defined
    
fake_portfolio = Portfolio(use_alpaca=False, positions=['JPM'])

kwargs = {
    "discount": 0.99,
    "tau": 0.005,
    "policy_noise": 0.2,            
    "noise_clip": 0.5,
    "policy_freq": 2,
    "expl_noise": 0.1
}

exp1 = Experiment(DataStore, kwargs, portfolio=fake_portfolio, sym='AAPL',indicators=['ADX', 'CCI', 'EMA'])

Portfolio loaded - available cash: 10000
Setting up experiment, loading history...


In [35]:
history = History(DataStore, 'JPM', indicators='all')


In [36]:
history.prices

Unnamed: 0_level_0,adjusted close
Date,Unnamed: 1_level_1
2000-01-19,25.456137
2000-01-20,25.432099
2000-01-21,25.047492
2000-01-24,25.068096
2000-01-25,25.624402
...,...
2021-07-02,156.030000
2021-07-06,153.410000
2021-07-07,153.590000
2021-07-08,150.940000


In [18]:
env = StockTraderEnvironment(fake_portfolio, history, short=False)