### Uses Paper Trade - test money
##### https://alpaca.markets/docs/api-documentation/api-v2/

In [2]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import time, datetime as dt
import pandas as pd, numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import sys

import alpha
import alpaca
import database as db
from populate import download_data
from rl_algos import TD3, DDPG, ReplayBuffer, Actor, Critic
from portfolios import Portfolio
from history import *

DataStore = db.DataStore()

# to move to respective files...
import gym
from gym import spaces
import torch

In [3]:
class StockTraderEnvironment(gym.Env):
    def __init__(self,
                portfolio,
                history, 
                short=False):
        
        self.portfolio = portfolio
        self.indicators = history.indicators
        self.indicators.drop(columns='adjusted close', inplace=True)
        self.prices = history.prices
        self.symbol = history.symbol
        self.short = short
        
        # <<train/val/test split>>
        num_days = self.prices.shape[0]
        training = int(.75* num_days)
        validation = int(.3*training)
        test = num_days-training
        
        self.price_train = self.prices.iloc[:(training-validation)]
        self.indicator_train = self.indicators.iloc[:(training-validation)]
        
        self.training_days = self.price_train.shape[0]
        print(f'{self.training_days} training days...', end='')
        
        self.price_validation = self.prices.iloc[(training-validation):training]
        self.indicator_validation = self.indicators.iloc[(training-validation):training] 
        print(f'{self.price_validation.shape[0]} validation days...', end='')
        
        self.price_test = self.prices.iloc[training:]
        self.indicator_test = self.indicators.iloc[training:]
        # <<train/val/test split>>
        
        
        num_indicators = self.indicators.shape[1]
        assert num_indicators > 0, "supply 1 or more indicators"

        self.action_space = spaces.Discrete(3)

        # set space for alpha indicators at +- infinity...?
        low_array = np.full((num_indicators), -np.inf)
        high_array = np.full((num_indicators), np.inf)
        self.observation_space = spaces.Box(low=low_array, high=high_array, dtype=np.float64)
        
        self.nS, self.nA = self.observation_space.shape[0], self.action_space.n
        
        self.previous_price = 0  # didn't exist before first day, so set previous_price to 0
        first_day = self.indicator_train.index[0] # starting at first day indicators exist
        self.prices = self.price_train.loc[first_day:] # rewriting prices to fit indicator list
        
        self.state = np.array(self.indicator_train.iloc[0]) # first day is inititial state
        self.days = iter(self.price_train.index.values)
        
        # Iterate through days, checking action/reward, etc. in step...
        self.trades = pd.DataFrame(0, index = self.price_train.index, columns = self.price_train.columns)
        self.trades_dupl = self.trades.copy(deep = True)
        
        # position is how much long (positive), short (negative) or holding (zero)
        self.portfolio.positions.append(self.symbol)
        self.portfolio.position_amount[self.symbol] = 0  # how parse?
        
        self.end_data = False # marks end of dataset

        
    def reset(self):
        self.previous_price = 0
        self.days = iter(self.price_train.index.values)
        self.state = np.array(self.indicator_train.iloc[0])
        self.trades = pd.DataFrame(0, index = self.price_train.index, columns = self.price_train.columns)
        self.portfolio.position_amount[self.symbol] = 0
        self.portfolio.cash_remaining = self.portfolio.start_value
        self.end_data = False
        

    def eval_reset(self):
        self.eval_days = iter(self.price_validation.index.values)
        self.eval_state = np.array(self.indicator_validation.iloc[0])
        self.eval_end_data = False
        self.eval_previous_price = 0
        
    
    def eval_step(self, action):
        err_msg = "%r (%s) invalid" % (action, type(action))
        assert self.action_space.contains(action), action
        
        if self.eval_end_data:
            self.eval_end_data = False
        
        reward = 0
        done = False
        info = {}
        try:
            new_day = next(self.eval_days)
            current_price = self.price_validation.loc[new_day, 'adjusted close']
            self.state = self.indicator_validation.loc[new_day]

            if action == 0 and current_price > self.eval_previous_price:
                reward = 2
            elif action == 0 and current_price < self.eval_previous_price:
                reward = -2
            elif action == 1 and current_price < self.eval_previous_price:
                reward = 2
            elif action == 1 and current_price > self.eval_previous_price:
                reward = -2
            elif action == 2 and (current_price > self.eval_previous_price or current_price < self.eval_previous_price):
                reward = -2 # or -1, don't puniash as much when hold and goes up/down?
            elif action == 2 and current_price == self.eval_previous_price:
                reward = 2
            else:
                reward = 0
            
            info = {'current_day': new_day}
            
            self.eval_previous_price = current_price
            done = False
        except StopIteration:
            self.eval_end_data = True

        return self.state, reward, done, info
    
    
    def make_trade(self, action, current_price):
        position = self.portfolio.position_amount[self.symbol]
        if not self.short:
            assert position >= 0, "Error in logic - shorted position with shorting disabled"
        buysell_amount = 0
        if action == 0 and position == 0:
            buysell_amount = 100
            self.portfolio.buy(self.symbol, buysell_amount, current_price)
        elif action == 0 and position > 0:
            buysell_amount = 50
            self.portfolio.buy(self.symbol, buysell_amount, current_price)
        elif action == 1 and position < 0:
            if not self.short:
                pass # for clarity
            else:
                buysell_amount = -50
                self.portfolio.sell(self.symbol, -buysell_amount, current_price)
        elif action == 1 and position == 0:
            if not self.short:
                pass
            else:
                buysell_amount = -100
                self.portfolio.sell(self.symbol, -buysell_amount, current_price)
        elif action == 1 and position > 0:
            if not self.short:
                buysell_amount = -position # sell off all of position if not shorting
                self.portfolio.sell(self.symbol, -buysell_amount, current_price)
            else:
                buysell_amount = -position - 50 # sell off all of position if shorting and short additioanl 50
                self.portfolio.sell(self.symbol, -buysell_amount, current_price)
        elif action == 2:
            pass # no action
        return buysell_amount
    
    
    def step(self, action):
        #https://github.com/openai/gym/blob/master/gym/envs/classic_control/cartpole.py
        err_msg = "%r (%s) invalid" % (action, type(action))
        assert self.action_space.contains(action), action
        
        if self.end_data:
            self.end_data = False
        
        #Calculate reward here... first day = 0
        # 0 is buy, 1 is sell, 2 is hold
        reward = 0
        done = False
        info = {}
        try:
            new_day = next(self.days)

            current_price = self.price_train.loc[new_day, 'adjusted close']
            self.state = self.indicator_train.loc[new_day]

            right_choice = 'none'
            if action == 0 and current_price > self.previous_price:
                reward = 1
                right_choice = 'buy'
            elif action == 0 and current_price < self.previous_price:
                reward = -1
                right_choice = 'sell'
            elif action == 0 and current_price == self.previous_price:
                reward = -1
                right_choice = 'hold'
            elif action == 1 and current_price < self.previous_price:
                reward = 1
                right_choice = 'sell'
            elif action == 1 and current_price > self.previous_price:
                reward = -1
                right_choice = 'buy'
            elif action == 1 and current_price == self.previous_price:
                reward = -1
                right_choice = 'hold'
            elif action == 2 and current_price != self.previous_price:
                reward = -1 # or -1, don't puniash as much when hold and goes up/down?
                right_choice = 'buy or sell'
            elif action == 2 and current_price == self.previous_price:
                reward = 1
                right_choice = 'hold'
            else:
                print(current_price, self.previous_price, action)
                reward = 0
 
            
            buysell_amount = self.make_trade(action, current_price)
            self.trades.loc[new_day] = buysell_amount

            info = {'current_day': new_day, 
                    'current_price': current_price, 
                    'buysell_amount': buysell_amount, 
                    'right_choice': right_choice}            
            
            self.previous_price = current_price
            done = False
        except StopIteration:
            self.end_data = True
            done = True
            if self.trades.equals(self.trades_dupl):
                done = True
                print('Converged')
            else:
                self.trades_dupl = self.trades.copy(deep = True)
                
            info = {'current_day': None, 
                    'current_price': None, 
                    'buysell_amount': None}

        return self.state, reward, done, info
    
    
    def render(self):
        #ToDo - show progression via graph?
        pass

In [4]:
class Experiment:
    def __init__(self, DataStore, td3_kwargs, portfolio, sym, dates=None, indicators='all', shorting_allowed=False):
        print('Setting up experiment, loading history... ', end='')
        self.available_cash = portfolio.cash_remaining
        
        #date format for range: dates=[dt.datetime(2000,1,1), dt.datetime(2020,12,31)]
        self.history = History(DataStore, sym, dates, indicators=indicators)
        self.indicators = self.history.indicators
        self.prices = self.history.prices
        
        self.portfolio = portfolio
        self.symbol = sym
        
        self.env = StockTraderEnvironment(self.portfolio, 
                                          self.history, 
                                          short=shorting_allowed)
        
        self.batch_size = 64 # not parameterized...
        self.buffer = ReplayBuffer(self.env.nS, self.env.nA, max_buffer=50000, batch_size=self.batch_size)
        
        self.max_action = 2.0  # 3 actions: [0,1,2], so 2 is max
        print('Ready!')
        
        kwargs = {
            "state_dim": self.env.nS,
            "action_dim": self.env.nA,
            "max_action": self.max_action,
            "discount": td3_kwargs['discount'],
            "tau": td3_kwargs['tau'],
            "policy_noise": td3_kwargs['policy_noise']*self.max_action,            
            "noise_clip": td3_kwargs['noise_clip']*self.max_action,
            "policy_freq": td3_kwargs['policy_freq']
        }
        
        self.policy = TD3(**kwargs)
        #self.policy = DDPG(**kwargs)
        #self.policy = DuelingDDQN(**kwargs)
        self.expl_noise = td3_kwargs['expl_noise']
        self.expl_min = 0
        self.delta = (self.expl_noise - self.expl_min)/50000
        
        
    def eval_policy(self, eval_episodes=10):
        avg_reward = 0.
        for _ in range(eval_episodes):
            self.env.eval_reset()
            state = self.env.eval_state
            done = False
            while not self.env.eval_end_data:
                state = state.astype(float)
                if type(state) is np.ndarray:
                    s = torch.from_numpy(state) # not optimal - fix original data (and for tensor so not casting to float...)
                else:
                    s = torch.from_numpy(state.to_numpy())
                action = self.policy.select_action(s)
                action = np.argmax(action)
                state, rwrd, _, _ = self.env.eval_step(action)
                avg_reward += rwrd

        avg_reward /= eval_episodes

        print("---------------------------------------")
        print(f"Evaluation over {eval_episodes} episodes: {avg_reward:.3f}")
        print("---------------------------------------")
        return avg_reward

        
    def run(self, num_episodes, max_steps=int(1e6), warmup=25e3):
        
        random_warmup = warmup
        total_days_run = 0
        self.total_reward = 0
        training_started = False
        self.episode_reward = []
        self.eval_avg_reward = []
        self.action_match = []
        
        wrong = 0
        
        #self.eval_policy()

        for idx in range(num_episodes):
            
            self.env.reset()
            state = self.env.state
            done = False
            episode_reward = 0
            steps = 0
            year = None
            
            for days_passed in range(self.env.training_days+1):
                
                train = True

                if total_days_run < random_warmup:
                    action = self.env.action_space.sample()
                else:
                    #ToDo - what is this doing? https://github.com/sfujim/TD3/blob/master/main.py
                    state = state.astype(float)
                    if type(state) is np.ndarray:
                        s = torch.from_numpy(state) # not optimal - fix original data (and for tensor so not casting to float...)
                    else:
                        s = torch.from_numpy(state.to_numpy())
                    #self.expl_noise = max(self.expl_noise-self.delta, self.expl_min)
                    action = (self.policy.select_action(s) + np.random.normal(0, self.max_action * self.expl_noise, size=self.env.nA)).clip(-self.max_action, self.max_action)
                    x = action
                    action = np.argmax(action)
                    train = False
                        
                next_state, reward, done, info = self.env.step(action)
                
                if reward < 0: wrong += 1
                    
#                 if not train:          
#                     if reward < 0:
#                         print('wrong choice', x, action, info['right_choice'])
#                     else:
#                         try:
#                             print('right choice', x, action, info['right_choice'])
#                         except:
#                             pass
                
                experience = [state, action, next_state, reward, done]
                self.buffer.update(experience)
                
                self.total_reward += reward
                episode_reward += reward
                
                state = next_state
                if total_days_run >= random_warmup:
                    self.policy.train(self.buffer, self.batch_size)
                    if not training_started:
                        print('\n----Training has begun---\n')
                        training_started = True
                
                if done:
                    print(f'{num_episodes - idx - 1} more episodes. {total_days_run} total days run. ', end='')
                    
                total_days_run += 1
                
            print(f'Episode reward {episode_reward}; wrong = {wrong}')
            wrong = 0
            self.episode_reward.append(episode_reward)
            
        print(f'total reward {self.total_reward}')
        
        self.eval_policy()
                    
        #self.env.close() # not defined
    
fake_portfolio = Portfolio(use_alpaca=False)

kwargs = {
    "discount": 0.9, #0.99
    "tau": 0.01, # 0.005,  # soft update
    "policy_noise": 0.2, #0.2,            
    "noise_clip": 0.5,
    "policy_freq": 2, #2
    "expl_noise": 0.1
}

In [5]:
#DataStore.list_indicator_symbols()
#exp1.env.price_train.head()

In [6]:
dates = [dt.datetime(2008,1,1), dt.datetime(2010,12,31)]
sym = 'JPM'
indicators = ['SMA','OBV', 'AD', 'BBANDS', 'MFI', 'SAR', 'T3', 'MOM', 'MIDPRICE', 'WMA']
# exp1 = Experiment(DataStore, 
#                   kwargs, 
#                   dates=dates, 
#                   portfolio=fake_portfolio, 
#                   sym='JPM', 
#                   indicators=indicators,
#                   shorting_allowed=True)
# exp1.run(100, warmup=25e3)

In [7]:
def create_orders(df, symbol):
    orders = df.copy(deep = True)
    orders.columns = ['Shares']
    orders.insert(0, 'Symbol', ''.join(symbol))
    orders.insert(1, 'Order', 0)

    orders.ix[orders['Shares'] > 0, 'Order'] = "BUY"
    orders.ix[orders['Shares'] < 0, 'Order'] = "SELL" 

    # make all positive since Sell/Buy is used
    orders['Shares'] = orders['Shares'].abs().astype(int)

    return orders

# not factoring in commission or impact yet...
def compute_portvals(experiment):                                                                                      

    start_val = experiment.portfolio.start_value
    prices = experiment.env.price_train
    orders = experiment.env.trades.copy(deep=True)
    orders.rename(columns={'adjusted close': 'Shares'}, inplace=True)
    orders = orders.sort_index()
    symbol = experiment.symbol
    
    start_date = orders.index[0]
    end_date = orders.index[-1]
    dates = pd.date_range(start_date, end_date)
    
    prices.ffill(axis=0, inplace=True)
    prices.bfill(axis=0, inplace=True)
    prices['Cash'] = 1.0

    trades = prices.copy(deep = True)
    trades.loc[:,:] = 0.0
    trades.rename(columns={'adjusted close': symbol}, inplace=True)

    for date, row in orders.iterrows():
        if row[0] < 0:
            trades.loc[date,symbol] = trades.loc[date,symbol] - row['Shares']
            trades.loc[date,'Cash'] = trades.loc[date,'Cash'] + 1 # track commission multiplier
        else:
            trades.loc[date,symbol] = trades.loc[date,symbol] + row['Shares']
            trades.loc[date,'Cash'] = trades.loc[date,'Cash'] + 1

    print(trades)
    sys.exit()

    trades['Cash'] = (prices.iloc[:,:-1].mul(trades.iloc[:,:-1]).sum(axis=1)*-1).sub(commiss['Cash'],fill_value=0).sub(df_impact['Cash'],fill_value=0)

    holdings = trades.copy(deep = True)
    
    # each row is sum of all previous rows (excluding Cash)
    holdings.iloc[:,:-1] = holdings.rolling(len(holdings), min_periods=1).sum()
    holdings['Cash'][0] = holdings['Cash'][0] + start_val
    holdings.iloc[:,-1] = holdings.iloc[:,-1].rolling(len(holdings), min_periods=1).sum()

    values = holdings.copy(deep = True)
    values.loc[:,:] = 0
    values = prices*holdings

    portvals = values.copy(deep = True)
    portvals = values.sum(axis=1)
    portvals = portvals.to_frame()
    return portvals

In [8]:
#compute_portvals(exp1)

In [120]:
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler

from sklearn.metrics import accuracy_score

In [9]:
dates = [dt.datetime(2008,1,1), dt.datetime(2010,12,31)]
sym = 'JPM'
indicators = ['SMA','OBV', 'AD', 'BBANDS', 'MFI', 'SAR', 'T3', 'MOM', 'MIDPRICE', 'WMA']
history = History(DataStore, sym, dates, indicators=indicators)
indicators = history.indicators
prices = history.prices

In [10]:
prices.head()

Unnamed: 0_level_0,adjusted close
Date,Unnamed: 1_level_1
2008-01-02,30.060001
2008-01-03,29.85328
2008-01-04,29.176093
2008-01-07,29.468352
2008-01-08,28.299313


In [11]:
#indicators = indicators.iloc[:,1:]
indicators.rename(columns={'adjusted close': 'close'}, inplace=True)

In [12]:
indicators.rename(columns={'close': 'Close'}, inplace=True)
indicators.head()

Unnamed: 0,Close,SMA,OBV,Chaikin A/D,Real Upper Band,Real Middle Band,Real Lower Band,MFI,SAR,T3,MOM,MIDPRICE,WMA
2008-01-02,30.0600005186,31.1368,-142498600.0,162633200.0,31.7512,31.1368,30.5223,44.7185,33.5787,31.5512,-0.458,30.9037,31.063
2008-01-03,29.8532800976,31.0191,-159649300.0,150107400.0,31.6972,31.0191,30.341,46.8154,33.4331,31.4181,-1.1767,30.8181,30.9294
2008-01-04,29.1760925119,30.8695,-185523100.0,137502200.0,31.9971,30.8695,29.742,34.6668,33.2498,31.2575,-1.4955,30.4154,30.6566
2008-01-07,29.4683524173,30.6233,-159879000.0,154477900.0,32.2614,30.6233,28.9851,35.1914,32.9925,31.0516,-2.4627,30.2479,30.28
2008-01-08,28.2993127955,30.4915,-193525000.0,129554900.0,32.2283,30.4915,28.7546,20.4535,32.687,30.8224,-1.3183,29.9841,30.0907


In [123]:
#indicators['NextDay'] = prices.index.shift(-1, freq='d')

y = indicators.iloc[:, 0]
X = indicators.iloc[:, 1:]

scaler = StandardScaler()
X = scaler.fit_transform(X)

#y = y[:-1]
#X = X[1:, :]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

y_train = y_train.to_numpy().astype(float)
y_test = y_test.to_numpy().astype(float)

In [140]:
clf = tree.DecisionTreeRegressor()
clf = clf.fit(X_train, y_train)
clf.score(X_test, y_test)

0.9346977916617255

In [141]:
y_pred = clf.predict(X_test[50].reshape(1,-1), y_test[50])
accuracy_score(y_, y_pred)

NameError: name 'y_' is not defined

In [144]:
from sklearn import svm
regr = svm.SVR()
regr = regr.fit(X_train, y_train)
regr.score(X_test, y_test)

0.9255267396985659