# Using RL to predict Stock Prices

### Basic Data
Using only the basic Open, Min, Max and Close to predict the prices

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("AAPL.csv")
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,1980-12-12,0.100178,0.100614,0.100178,0.100178,469033600,0.0,0.0
1,1980-12-15,0.095388,0.095388,0.094952,0.094952,175884800,0.0,0.0
2,1980-12-16,0.088418,0.088418,0.087983,0.087983,105728000,0.0,0.0
3,1980-12-17,0.09016,0.090596,0.09016,0.09016,86441600,0.0,0.0
4,1980-12-18,0.092774,0.09321,0.092774,0.092774,73449600,0.0,0.0


In [3]:
df = df.drop(['Date', 'Volume', 'Dividends', 'Stock Splits'], axis=1)

In [4]:
df.head()

Unnamed: 0,Open,High,Low,Close
0,0.100178,0.100614,0.100178,0.100178
1,0.095388,0.095388,0.094952,0.094952
2,0.088418,0.088418,0.087983,0.087983
3,0.09016,0.090596,0.09016,0.09016
4,0.092774,0.09321,0.092774,0.092774


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10483 entries, 0 to 10482
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Open    10483 non-null  float64
 1   High    10483 non-null  float64
 2   Low     10483 non-null  float64
 3   Close   10483 non-null  float64
dtypes: float64(4)
memory usage: 327.7 KB


In [6]:
df['remove'] = df.apply(lambda x: all([abs(i - x[0]) < 1e-8 for i in x[1:]]), axis = 1)

  df['remove'] = df.apply(lambda x: all([abs(i - x[0]) < 1e-8 for i in x[1:]]), axis = 1)


In [7]:
df.head()

Unnamed: 0,Open,High,Low,Close,remove
0,0.100178,0.100614,0.100178,0.100178,False
1,0.095388,0.095388,0.094952,0.094952,False
2,0.088418,0.088418,0.087983,0.087983,False
3,0.09016,0.090596,0.09016,0.09016,False
4,0.092774,0.09321,0.092774,0.092774,False


In [8]:
df = df.query("remove == False").reset_index(drop=True)
df.drop(['remove'], axis=1, inplace=True)
df.head()

Unnamed: 0,Open,High,Low,Close
0,0.100178,0.100614,0.100178,0.100178
1,0.095388,0.095388,0.094952,0.094952
2,0.088418,0.088418,0.087983,0.087983
3,0.09016,0.090596,0.09016,0.09016
4,0.092774,0.09321,0.092774,0.092774


In [9]:
df

Unnamed: 0,Open,High,Low,Close
0,0.100178,0.100614,0.100178,0.100178
1,0.095388,0.095388,0.094952,0.094952
2,0.088418,0.088418,0.087983,0.087983
3,0.090160,0.090596,0.090160,0.090160
4,0.092774,0.093210,0.092774,0.092774
...,...,...,...,...
10450,141.350006,144.119995,141.080002,142.919998
10451,143.289993,146.550003,143.279999,146.350006
10452,145.259995,147.550003,145.000000,147.039993
10453,145.669998,146.639999,143.779999,144.869995


In [10]:
# Normalizing the dataset
df['High'] = (df['High'] - df['Open']) /df['Open']
df['Low'] = (df['Low'] - df['Open']) /df['Open']
df['Close'] = (df['Close'] - df['Open']) /df['Open']

In [11]:
df

Unnamed: 0,Open,High,Low,Close
0,0.100178,0.004348,0.000000,0.000000
1,0.095388,0.000000,-0.004566,-0.004566
2,0.088418,0.000000,-0.004926,-0.004926
3,0.090160,0.004831,0.000000,0.000000
4,0.092774,0.004694,0.000000,0.000000
...,...,...,...,...
10450,141.350006,0.019597,-0.001910,0.011107
10451,143.289993,0.022751,-0.000070,0.021355
10452,145.259995,0.015765,-0.001790,0.012254
10453,145.669998,0.006659,-0.012975,-0.005492


In [20]:
class AAPL_env():
    def __init__(self, data, obs_bars = 10, test = False, commission_perc=0.1):
        self.data = data
        self.obs_bars = obs_bars
        self.have_position = False
        self.open_price = 0
        self.test = test
        self.commission_perc = commission_perc
        if test == False:
            self.curr_step = np.random.choice(self.data.High.shape[0] - self.obs_bars*10) + self.obs_bars
        else:
            self.curr_step = self.obs_bars
        
        self.state = self.data[self.curr_step - self.obs_bars : self.curr_step]
    
    def step(self, action):
        reward = 0
        done = False
        relative_close = self.state["Close"][self.curr_step - 1]
        open = self.state["Open"][self.curr_step - 1]
        close = open * (1 + relative_close)
        
        if action == "buy" and self.have_position == False:
            self.have_position = True
            self.open_price = close
            reward -= self.commission_perc
        elif action == "close" and self.have_position == True:
            reward -= self.commission_perc
            if self.test == False:
                done = True
            
            reward += 100.0 * (close - self.open_price) / self.open_price
            
            self.have_position = False
            self.open_price = 0.0
        
        self.curr_step = self.curr_step + 1
        self.state = self.data[self.curr_step - self.obs_bars : self.curr_step]
        
        if self.curr_step == len(self.data) - 1:
            done = True
            
        state = np.zeros((5, self.obs_bars), dtype=np.float32)
        state[0] = self.state.High.to_list()
        state[1] = self.state.Low.to_list()
        state[2] = self.state.Close.to_list()
        state[3] = int(self.have_position)
        if self.have_position:
            state[4] = (close - self.open_price) / self.open_price
        return state, reward, done

In [21]:
actions = {
    0: "do_nothing",
    1: "buy",
    2: "close"
}

In [22]:
AAPL = AAPL_env(data=df, test=False, obs_bars=50)

In [23]:
state, reward, done = AAPL.step("do_nothing")