In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pickle as pk

In [2]:
df=pd.read_csv('all_stocks_5yr.csv')
df.head()

Unnamed: 0,date,open,high,low,close,volume,Name
0,2013-02-08,15.07,15.12,14.63,14.75,8407500,AAL
1,2013-02-11,14.89,15.01,14.26,14.46,8882000,AAL
2,2013-02-12,14.45,14.51,14.1,14.27,8126000,AAL
3,2013-02-13,14.3,14.94,14.25,14.66,10259500,AAL
4,2013-02-14,14.94,14.96,13.16,13.99,31879900,AAL


In [3]:
df.shape

(619040, 7)

In [5]:
df.isnull().sum()

date       0
open      11
high       8
low        8
close      0
volume     0
Name       0
dtype: int64

In [6]:
df.dropna(inplace=True)

In [7]:
df.isnull().sum()

date      0
open      0
high      0
low       0
close     0
volume    0
Name      0
dtype: int64

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 619029 entries, 0 to 619039
Data columns (total 7 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   date    619029 non-null  object 
 1   open    619029 non-null  float64
 2   high    619029 non-null  float64
 3   low     619029 non-null  float64
 4   close   619029 non-null  float64
 5   volume  619029 non-null  int64  
 6   Name    619029 non-null  object 
dtypes: float64(4), int64(1), object(2)
memory usage: 37.8+ MB


In [9]:
#Creating Environment Matrix 2x2x3
env_rows=2
env_cols=2
n_action=3

q_table=np.zeros((env_rows,env_cols,n_action))
np.random.seed()
pk.dump(q_table,open("pickl.pkl",'wb'))

In [11]:
pk.load(open("pickl.pkl",'rb'))

array([[[0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.]]])

### Feature Engineering

In [12]:
#Defining Data Preprocessing Function

def data_prep(data,name):
    df=pd.DataFrame(data[data['Name']==name])
    #df.dropna(inplace=True)
    df.drop(['high','low','volume','Name'],axis=1,inplace=True)
    df.reset_index(drop=True,inplace=True)
    # Calculating 5 day and 1 day Moving Average for DF
    df['5day_MA']=df['close'].rolling(5).mean()
    df['1day_MA']=df['close'].rolling(1).mean()
    df['5day_MA'][:4]=0
    #Splitting into train and Test data
    train_df=df[:int(len(df)*0.8)]
    test_df=df[int(len(df)*0.8):].reset_index(drop=True)
    return train_df,test_df

In [13]:
# Get the state for datapoint by Moving Average
def get_state(long_ma,short_ma,t):
    if short_ma<long_ma:
        if t==1:
            return (0,1) #Cash
        else :
            return (0,0) #Stock
    
    elif short_ma>long_ma:
        if t==1:
            return (1,1) #Cash
        else :
            return (1,0) #Stock|

In [14]:
#Checking if the user can trade or not
def trade_t(num_of_stocks,port_value,current_price):
    if num_of_stocks>=0:
        if port_value>current_price:
            return 1
        else :return 0
    else:
        if port_value>current_price:
            return 1
        else :return 0

In [15]:
#Get next action by Epsilon greedy
# Epsilon greedy = Epsilon-Greedy is a simple method to balance exploration and exploitation by choosing between exploration and exploitation randomly. 
def next_act(state,qtable,epsilon,action=3):
    if np.random.rand() < epsilon:
        action=np.random.randint(action)
    else:
        action=np.argmax(qtable[state])
        
        
    return action

In [16]:
# Immidiate reward Generator based on cummulative wealth 
def get_reward(state,action,current_close,past_close,buy_history):
    if state==(0,0) or state==(1,0): #Stock position
        if action==0:
            return -1000
        elif action==1:
            return (current_close-buy_history)
        elif action==2:
            return (current_close-past_close)
    
    elif state==(0,1) or state==(1,1): #Cash Position
        if action==0:
            return 0
        elif action==1:
            return -1000
        elif action==2:
            return (current_close-past_close)


### Reading and preprocessing the Dataset

In [17]:
stocks=pd.read_csv('all_stocks_5yr.csv')
stocks_train,stocks_test=data_prep(stocks,'AAPL')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['5day_MA'][:4]=0


In [19]:
stocks_train.head()

Unnamed: 0,date,open,close,5day_MA,1day_MA
0,2013-02-08,67.7142,67.8542,0.0,67.8542
1,2013-02-11,68.0714,68.5614,0.0,68.5614
2,2013-02-12,68.5014,66.8428,0.0,66.8428
3,2013-02-13,66.7442,66.7156,0.0,66.7156
4,2013-02-14,66.3599,66.6556,67.32592,66.6556


In [20]:
stocks_test.head()

Unnamed: 0,date,open,close,5day_MA,1day_MA
0,2017-02-08,131.35,132.04,130.294,132.04
1,2017-02-09,131.65,132.42,131.072,132.42
2,2017-02-10,132.46,132.12,131.68,132.12
3,2017-02-13,133.08,133.29,132.28,133.29
4,2017-02-14,133.47,135.02,132.978,135.02


### Training the Dataset


In [21]:
episodes=100
port_value=1000
num_stocks=0
epsilon=1 #Epsilon Greedy
alpha=0.05 #Learning Rate
gamma=0.15 #Discount Factor
buy_history=0
net_worth=[1000] #Portfolio Value
np.random.seed()

In [22]:
episodes=100
port_value=1000
num_stocks=0
epsilon=1 #Epsilon Greedy
alpha=0.05 #Learning Rate
gamma=0.15 #Discount Factor
buy_history=0
net_worth=[1000] #Portfolio Value
np.random.seed()
for i in range(episodes): #Iteration for each episode
    port_value=1000
    num_stocks=0
    buy_history=0
    net_worth=[1000]
    

    for dt in range(len(stocks_train)): #Iteration through each dataset
        long_ma=stocks_train.iloc[dt]['5day_MA']
        short_ma=stocks_train.iloc[dt]['1day_MA']
        close_price=stocks_train.iloc[dt]['close']
        next_close=0
        
        if dt>0:
            past_close=stocks_train.iloc[dt-1]['close']
        else:
            past_close=close_price
        t=trade_t(num_stocks,net_worth[-1],close_price)
        state=get_state(long_ma,short_ma,t)
        action=next_act(state,q_table,epsilon)

        if action==0:#Buy
            
             num_stocks+=1
             buy_history=close_price
             to_append=net_worth[-1]-close_price
             net_worth.append(np.round(to_append,1))
             r=0
            
            
        
        elif action==1:#Sell
            # if num_stocks>0:
                num_stocks-=1               
                to_append=net_worth[-1]+close_price
                net_worth.append(np.round(to_append,1))
                # buy_history.pop(0)
        
        elif action==2:#hold
            to_append=net_worth[-1]+close_price
            net_worth.append(np.round(to_append,1))
            
        
                
         

        r=get_reward(state,action,close_price,past_close,buy_history) #Getting Reward
        
        try:
            next_state=get_state(stocks_train.iloc[dt+1]['5day_MA'],stocks_train.iloc[dt+1]['1day_MA'],t)
            
        except:
            break
        #Updating Q_table by Bellmen's Equation
        q_table[state][action]=(1.-alpha)*q_table[state][action]+alpha*(r+gamma*np.max(q_table[next_state]))
    
    if (epsilon-0.01)>0.15:
        epsilon-=0.01

print("Training Complete")

Training Complete


In [23]:
pk.dump(q_table,open('pickl.pkl1','wb'))

In [24]:
num_stocks

390