In [55]:
import math
import numpy as np
import matplotlib.pyplot as plt
from scipy import interpolate
import pandas as pd
from random import randrange

In [56]:
class simple_buy_sell_spy():
    
    def __init__ (self,mv_feature_list = [5,10,15]):
        #first, load in the data
        index_data = pd.read_csv("SPY.csv")
        index_data = index_data.rename(columns = {"Date":"Date",\
                                   "Open":"index_open",\
                                   "High":'index_high',\
                                   'Low':'index_low',\
                                   'Close':'index_close',\
                                   'Adj Close':'index_adj_close',\
                                   'Volume':'index_volume'})
        #build feature matrix
        index_feature_dataframe = pd.DataFrame()
        index_feature_dataframe['index_raw_price'] = index_data['index_adj_close']
        period_list = [5,10,15]
        for period in period_list:
            ewm = index_feature_dataframe['index_raw_price'].ewm(span = period).mean()
            ratio = index_feature_dataframe['index_raw_price']/ewm
            index_feature_dataframe['ewm_'+str(period)] = ratio
        index_feature_dataframe = index_feature_dataframe.iloc[max(period_list):,:]
        
        index_feature_dataframe = index_feature_dataframe.reset_index(drop=True)
        self.index_feature_dataframe = index_feature_dataframe
        
        self.current_index = None
        self.current_portfolio_value = None
        self.have_position = None
        self.stock_quantity = None
        self.cash = None
        self.buy_and_hold_stock_quantity = None
        
        
        
    def reset(self):
        #pick a random starting point on the self.index_feature_dataframe
        self.current_index = randrange(0,self.index_feature_dataframe.shape[0]-500)
        observation = index_feature_dataframe.iloc[self.current_index][1:].to_numpy()
        observation = s = observation.reshape((-1,1))
        
        
        #initialize other variables
        self.cash = 1e5
        self.stock_quantity = 0
        self.current_portfolio_value = self.cash + self.stock_quantity*\
                                        index_feature_dataframe.iloc[self.current_index][0]
        self.have_position = False
        self.buy_and_hold_stock_quantity = self.cash/index_feature_dataframe.iloc[self.current_index][0]
        
        return observation
    
    
    
    def step(self,action):
        if self.current_portfolio_value == None:
            raise Exception("Please call reset first")
        
        if self.current_index == None:
            raise Exception("Please call reset first")
            
        if self.have_position == None:
            raise Exception("Please call reset first")
        
        current_stock_price = index_feature_dataframe.iloc[self.current_index][0]
        
        if action == 1:
            #buy
            if self.have_position == False:
                self.stock_quantity = np.floor(self.cash/current_stock_price)
                self.have_position = True
                self.cash -= self.stock_quantity*current_stock_price
            
        elif action == -1:
            #sell
            if self.have_position == True:
                self.cash += self.stock_quantity*current_stock_price
                self.stock_quantity = 0
                self.have_position = False
        
        elif action == 0 :
            #hold
            pass
        
        
        #compute reward
        self.current_portfolio_value = self.cash + self.stock_quantity*\
                                        index_feature_dataframe.iloc[self.current_index][0]
        reward = (self.current_portfolio_value/index_feature_dataframe.iloc[self.current_index][0])/self.buy_and_hold_stock_quantity
        
        #move one time step
        self.current_index += 1
        observation = index_feature_dataframe.iloc[self.current_index][1:].to_numpy()
        observation = observation.reshape((-1,1))
        
        return observation,reward
    
    
    

In [64]:
import math
import numpy as np
import matplotlib.pyplot as plt
from scipy import interpolate
import pandas as pd
from random import randrange

In [81]:
class simple_continuous_buy_sell_spy():
    
    def __init__ (self,mv_feature_list = [5,10,15]):
        #first, load in the data
        index_data = pd.read_csv("SPY.csv")
        index_data = index_data.rename(columns = {"Date":"Date",\
                                   "Open":"index_open",\
                                   "High":'index_high',\
                                   'Low':'index_low',\
                                   'Close':'index_close',\
                                   'Adj Close':'index_adj_close',\
                                   'Volume':'index_volume'})
        #build feature matrix
        index_feature_dataframe = pd.DataFrame()
        index_feature_dataframe['index_raw_price'] = index_data['index_adj_close']
        period_list = [5,10,15]
        for period in period_list:
            ewm = index_feature_dataframe['index_raw_price'].ewm(span = period).mean()
            ratio = index_feature_dataframe['index_raw_price']/ewm
            index_feature_dataframe['ewm_'+str(period)] = ratio
        index_feature_dataframe = index_feature_dataframe.iloc[max(period_list):,:]
        
        index_feature_dataframe = index_feature_dataframe.reset_index(drop=True)
        self.index_feature_dataframe = index_feature_dataframe
        
        self.current_time_index = None
        self.current_portfolio_value = None
        self.positions = None
        self.cash = None
        self.min_buy_value = None
        self.buy_and_hold_stock_quantity = None
        self.expert_reward = None

        
    def reset(self):
        #pick a random starting point on the self.index_feature_dataframe
        self.current_time_index = randrange(0,self.index_feature_dataframe.shape[0]-500)
        observation = self.index_feature_dataframe.iloc[self.current_time_index][1:].to_numpy()
        observation = observation.reshape((-1,1))
        current_stock_price = self.index_feature_dataframe.iloc[self.current_time_index][0]
        
        #initialize other variables
        self.cash = 1e5
        self.positions = []
        self.min_buy_value = 5e3
        self.buy_and_hold_stock_quantity = self.cash/current_stock_price
        self.expert_reward = 1
    
        value_in_stock = 0
        if len(self.positions)>0:
            for position in self.positions:
                value_in_stock += position['quantity']*current_stock_price
        else:
            value_in_stock = 0
        self.current_portfolio_value = self.cash + value_in_stock

        return np.concatenate((observation,[[0]]),axis = 0)
    
    
    
    def step(self,action,return_price = False):
        if self.current_portfolio_value == None:
            raise Exception("Please call reset first")

        execute_action = False
        execute_sell = False
            
        current_stock_price = self.index_feature_dataframe.iloc[self.current_time_index][0]
        
        value_in_stock = 0
        if len(self.positions)>0:
            for position in self.positions:
                value_in_stock += position['quantity']*current_stock_price
        else:
            value_in_stock = 0
        
        
        current_percent_value_in_stock = value_in_stock/self.cash
        if current_percent_value_in_stock<action:
            need_to_buy = True
            need_to_sell = False
        else:
            need_to_buy = False
            need_to_sell = True
        
        
        while current_percent_value_in_stock<action and need_to_buy:
            #print('buy')
            #buy
            new_position = {}
            new_position['quantity'] = np.floor(self.min_buy_value/current_stock_price)
            new_position['price'] = current_stock_price
            self.positions.append(new_position)
            execute_action = True
            
            value_in_stock = 0
            if len(self.positions)>0:
                for position in self.positions:
                    value_in_stock += position['quantity']*current_stock_price
            else:
                value_in_stock = 0
            
            self.cash -= new_position['quantity']*new_position['price']
                
            current_percent_value_in_stock = value_in_stock/self.cash
            
            self.positions = sorted(self.positions,key = lambda k : k['price'])
            
            
        
        while current_percent_value_in_stock>action and need_to_sell:
            #print('sell')
            #sell
            if len(self.positions)>0:
                sold_position = self.positions.pop(0)
                if sold_position['quantity']>current_stock_price:
                    self.cash += sold_position['quantity']*current_stock_price
                    execute_action = True
                    execute_sell = True
                else:
                    #print('cannot sell because price is too low')
                    break #not sell anything
            
            value_in_stock = 0
            if len(self.positions)>0:
                for position in self.positions:
                    value_in_stock += position['quantity']*current_stock_price
            else:
                value_in_stock = 0
                    
            
            current_percent_value_in_stock = value_in_stock/self.cash
        
    
        self.expert_reward = self.expert_reward*1.001
        self.current_time_index += 1
        current_stock_price = self.index_feature_dataframe.iloc[self.current_time_index][0]
        observation = self.index_feature_dataframe.iloc[self.current_time_index][1:].to_numpy()
        observation = observation.reshape((-1,1))
        
        value_in_stock = 0
        if len(self.positions)>0:
            for position in self.positions:
                value_in_stock += position['quantity']*current_stock_price
        else:
            value_in_stock = 0
        self.current_portfolio_value = self.cash + value_in_stock
        current_percent_value_in_stock = value_in_stock/self.cash
        
        reward = (self.current_portfolio_value/current_stock_price)/self.buy_and_hold_stock_quantity
        reward = reward-self.expert_reward
    
        observation = np.concatenate((observation,[[current_percent_value_in_stock]]),axis = 0)
        
        if return_price:
            return current_stock_price,observation,execute_action
        
        if execute_sell:
            return observation, reward
        
        return observation,0

In [82]:
env = simple_continuous_buy_sell_spy()

In [83]:
env.reset()

array([[1.01466003],
       [1.01901214],
       [1.01827378],
       [0.        ]])

In [84]:
import random

for _ in range(0,10):
    action = random.uniform(0, 1)
    
    print('action is',action)
    
    obs,reward = env.step(action)
    print('obs is',obs)
    print('reward is',reward)
    
    print(' ')

action is 0.956411493047587
obs is [[1.01039868]
 [1.01633361]
 [1.01684417]
 [1.0001171 ]]
reward is 0
 
action is 0.4642621151476647
obs is [[1.00354479]
 [1.0091709 ]
 [1.01025724]
 [0.42645573]]
reward is 0.0010266533920231247
 
action is 0.7079880761842721
obs is [[0.99465301]
 [0.99800463]
 [0.99880642]
 [0.80369553]]
reward is 0
 
action is 0.7959101162436197
obs is [[1.00292934]
 [1.00636047]
 [1.00751217]
 [0.66225771]]
reward is -0.00045688286070832973
 
action is 0.5327375783957081
obs is [[1.02277894]
 [1.0309594 ]
 [1.03421024]
 [0.43958029]]
reward is -0.02297663730864763
 
action is 0.4741038698839679
obs is [[1.0182258 ]
 [1.02910881]
 [1.0340246 ]
 [0.55225991]]
reward is 0
 
action is 0.19871265810550998
obs is [[1.01129221]
 [1.02271593]
 [1.02859568]
 [0.17846963]]
reward is -0.02698169085666635
 
action is 0.17464287997540417
obs is [[1.01945115]
 [1.0333716 ]
 [1.04095195]
 [0.11366853]]
reward is -0.04354979987631691
 
action is 0.5340385526795458
obs is [[1.0287