### Uses Paper Trade - test money
##### https://alpaca.markets/docs/api-documentation/api-v2/

In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import time, datetime as dt
import pandas as pd

import alpha, database as db
import alpaca_trade as alpaca
from populate import *
from portfolios import *
from history import *

DataStore = db.DataStore()

In [34]:
import gym
import numpy as np
from gym import spaces
import pandas as pd

# class DDQN:
#     #https://arxiv.org/pdf/1802.09477.pdf
#     def __init__(self,
#                 replay_buffer,
#                 )

class Portfolio:
    def __init__(self, use_alpaca=True, cash=10000, positions=POPULAR):
        self.use_alpaca = use_alpaca
        if use_alpaca:
            self.cash_remaining = int(alpaca.get_account()['cash'])
            self.positions = alpaca.get_positions() # check on how returned from alpaca...
        else:
            self.cash_remaining = cash
            self.positions = positions
        print(f'Portfolio loaded - available cash: {self.cash_remaining}')
        
    def position(self, sym):
        if not self.use_alpaca: return sym in self.positions
        if sym in self.positions: # not sure format positions returned...
            return alpaca.get_position(sym)
        else:
            return "no position"
        

class StockTraderEnvironment(gym.Env):
    def __init__(self,
                portfolio,
                history, 
                short=False):
        """dfIndicators is a DataFrame where each column is a different indicator; short=True would allow shorting position"""
        
        self.portfolio = portfolio
        self.indicators = history.dfIndicators
        self.prices = history.dfPrices
        self.symbol = history.symbol
        num_indicators = self.indicators.shape[1]
        assert num_indicators > 0, "supply 1 or more indicators"

        self.action_space = spaces.Discrete(3)

        # set space for alpha indicators at +- infinity...?
        low_array = np.full((num_indicators), -np.inf)
        high_array = np.full((num_indicators), np.inf)
        self.observation_space = spaces.Box(low=low_array, high=high_array, dtype=np.float64)
        
        self.previous_price = 0  # didn't exist before first day, so set previous_price to 0
        first_day = self.indicators.index[0] # starting at first day indicators exist
        self.prices = self.prices.loc[first_day:] # rewriting prices to fit indicator list
        
        self.state = np.array(self.indicators.iloc[0]) # first day is inititial state
        self.days = iter(self.prices.index.values)
        
        # Iterate through days, checking action/reward, etc. in step...
        self.trades = pd.DataFrame(0, index = self.prices.index, columns = self.prices.columns)
        self.trades_dupl = self.trades.copy(deep = True)
        
        # position is how much long (positive), short (negative) or holding (zero)
        self.position = 0  # how parse?

    def reset(self):
        self.previous_price = 0
        self.days = iter(self.prices.index.values)
        self.state = np.array(self.indicators.iloc[0])
        self.trades = pd.DataFrame(0, index = self.prices.index, columns = self.prices.columns)
        self.position = 0
    
    def step(self, action):
        #https://github.com/openai/gym/blob/master/gym/envs/classic_control/cartpole.py
        err_msg = "%r (%s) invalid" % (action, type(action))
        assert self.action_space.contains(action), err_msg
        
        #Calculate reward here... first day = 0
        # 0 is buy, 1 is sell, 2 is hold
        try:
            new_day = next(trader.days)
            current_price = self.prices.loc[new_day]
            self.state = self.indicators.loc[new_day]

            if action == 0 and current_price > self.previous_price:
                reward = 2
            elif action == 0 and current_price < self.previous_price:
                reward = -2
            elif action == 1 and current_price < self.previous_price:
                reward = 2
            elif action == 1 and current_price > self.previous_price:
                reward = -2
            elif action == 2 and (current_price > self.previous_price or current_price < self.previous_price):
                reward = -2
            elif action == 2 and current_price == self.previous_price:
                reward = 2
            else:
                reward = 0
                
            if action == 0 and self.position == 0:
                # buying into a position, use nn probability for how much...?
                pass
            elif action == 0 and self.position > 0:
                # don't overcommit to one symbol?
                pass
            elif action == 0 and self.position < 0:
                # more signal to buy... keep same as last?
                pass
            elif action == 1 and self.position == 0:
                # sell, with no position
                pass
            elif action == 1 and self.position > 0:
                # sell, with some bought - sell all? or fraction? can't sell more than have if self.short=False
                if not self.short:
                    pass
                pass
            elif action == 1 and self.position < 0:
                if not self.short:
                    print(f'ERROR - short {self.symbol} with shorting disabled!')

            elif action == 2:
                # hold long or short (or no) position
                pass

            self.position = portfolio.position(self.symbol)
            self.previous_price = current_price
            done = False
        except StopIteration:
            if self.trades.equals(self.trades_dupl):
                done = True
            else:
                done = False
                self.trades_dupl = self.trades.copy(deep = True)

        return self.state, reward, done, {}
    
    def render(self):
        #ToDo - show progression via graph?
        pass

In [33]:
class Experiment:
    def __init__(self, DataStore, portfolio, sym, dates=None, indicators='all'):
        print('Setting up experiment, loading history...')
        self.available_cash = portfolio.cash_remaining
        
        self.history = History(DataStore, sym, dates, indicators=indicators)
        self.indicators = self.history.indicators
        self.prices = self.history.prices
        
        num_rows = self.prices.shape[0]
        training = int(.75* num_rows)
        validation = int(.3*training)
        test = num_rows-training
        
        self.price_train = self.prices.iloc[:(training-validation)]
        self.indicator_train = self.indicators.iloc[:(training-validation)]
        
        self.price_validation = self.prices.iloc[(training-validation):training]
        self.indicator_validation = self.indicators.iloc[(training-validation):training] 
        
        self.price_test = self.prices.iloc[training:]
        self.indicator_test = self.indicators.iloc[training:]
        
        self.portfolio = portfolio
        self.symbol = sym
        
    def run(self, num_episodes):
        self.env = StockTraderEnvironment(self.portfolio, 
                                          self.history, 
                                          short=False) # hard-coded to prevent shorting
        
        for idx in range(num_episodes):
            state = self.env.reset()
            for t in range(100): # fix to be some other stopping criterio
                action = env.action_space.sample()  # RL algo returns action
                state, reward, done, _ = self.env.step()
                if done:
                    print("Episode finished after {} timesteps".format(t+1))
                    break
                    
        self.env.close() # not defined
    
fake_portfolio = Portfolio(use_alpaca=False)
exp1 = Experiment(DataStore,
                  fake_portfolio, 
                  sym='AAPL', 
                  #dates=[dt.datetime(2000,1,1), dt.datetime(2020,12,31)], 
                  indicators=['ADX', 'CCI', 'EMA'])

Portfolio loaded - available cash: 10000
Setting up experiment, loading history...
