In [1]:
import math
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import pandas_datareader as data_reader
from tqdm import tqdm_notebook, tqdm
from collections import deque

  from pandas.util.testing import assert_frame_equal


In [19]:
# Checking if tf is working with GPU
all([tf.test.is_gpu_available(), tf.test.is_built_with_cuda()])

True

In [20]:
# Checking if GPU-0 ie Nvidia GTX 1060 AMP is set as default
tf.test.gpu_device_name()

'/device:GPU:0'

In [3]:
# assert(tf.__version__, '2.0.0')
class AI_Trader():

    # actions are NoOp, Buy, Sell
    def __init__(self, state_size, action_space=3, model_name='trader'):
        self.state_size = state_size
        self.action_space = action_space
        self.memory = deque(maxlen=2000)
        self.inventory = []
        self.model_name = model_name

        self.gamma = 0.95 # discount parameter
        self.epsilon = 1.0 # initial value is 1, ie initially take random actions
        self.epsilon_final = 0.01 # for fully trained network: if value less than this, stop decay
        self.epsilon_decay = 0.995 # Hold out when decaying the epsilon
        self.model = self.model_builder()

    
    def model_builder(self):

        model = tf.keras.models.Sequential([
            tf.keras.layers.Dense(units=32, activation=tf.nn.relu, input_dim=self.state_size), # or input_shape=(,self.state_size)
            tf.keras.layers.Dense(units=64, activation=tf.nn.relu),
            tf.keras.layers.Dense(units=128, activation=tf.nn.relu),
            tf.keras.layers.Dense(units=self.action_space, activation=None) # linear activation
        ])
        model.compile(loss=tf.losses.mean_squared_error , 
                      optimizer=tf.keras.optimizers.Adam(learning_rate=0.001))
        return model
    
    # Input: state; output: action
    def trade(self, state):
        
        # deciding whether to use random action or model action based on epsilon
        # if less than current epsilon, return random action between [0,3)
        if random.random() <= self.epsilon: return random.randrange(start=0, stop=self.action_space)
        # if greater than current epsilon
        actions = self.model.predict(state)
        return np.argmax(actions[0])
    
    
    def batch_trade(self, batch_size):
        
        batch = []
        for i in range(len(self.memory) - batch_size + 1, len(self.memory)):
            batch.append(self.memory[i])
        
        for state, action, reward, next_state, done in batch:
            reward = reward
            if not done:
                reward += self.gamma * np.amax(self.model.predict(next_state)[0]) # discounted reward
            target = self.model.predict(state)
            target[0][action] = reward
            self.model.fit(state, target, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_final:
            self.epsilon *= self.epsilon_decay

In [4]:
# Imports data from online sources like Google Finances
dataset = data_reader.DataReader("AAPL", data_source="yahoo")

In [5]:
print(dataset.index[0])
print(dataset.index[-1])
print(str(dataset.index[0]).split())
print(str(dataset.index[0]).split()[0]) # This is what we want

2015-06-09 00:00:00
2020-06-08 00:00:00
['2015-06-09', '00:00:00']
2015-06-09


In [6]:
# Helper functions
def sigmoid(x):
    return 1/(1+math.exp(-x))

def stocks_price_format(n):
    
    if n<0: return "- $ {0:2f}".format(abs(n))
    else: return "$ {0:2f}".format(abs(n))
    
def dataset_loader(stock_name):
    
    dataset = data_reader.DataReader(stock_name, data_source='yahoo')
    start_date = str(dataset.index[0]).split()[0]
    end_date = str(dataset.index[-1]).split()[0]
    close = dataset['Close']
    return close

# State Creator function
def state_creator(data, timeStep, window_size):
    
    starting_id = timeStep - window_size + 1
    if starting_id >= 0: windowed_data = data[starting_id:timeStep+1]
    else: windowed_data = -starting_id * [data[0]] + list(data[0:timeStep+1])
    state = []
    for i in range(window_size - 1):
        state.append(sigmoid(windowed_data[i+1] - windowed_data[i]))
    return np.array([state])


In [9]:
# Loading data
stock_name = "AAPL"
data = dataset_loader(stock_name)

In [14]:
data.describe

<bound method NDFrame.describe of Date
2015-06-09    127.419998
2015-06-10    128.880005
2015-06-11    128.589996
2015-06-12    127.169998
2015-06-15    126.919998
                 ...    
2020-06-02    323.339996
2020-06-03    325.119995
2020-06-04    322.320007
2020-06-05    331.500000
2020-06-08    329.988190
Name: Close, Length: 1259, dtype: float64>

In [17]:
# Setting HyperP
window_size = 10
episodes = 500 #1000
batch_size = 32
data_samples = len(data) - 1

# Defining the trader model
trader = AI_Trader(window_size)

In [16]:
trader.model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 32)                352       
_________________________________________________________________
dense_1 (Dense)              (None, 64)                2112      
_________________________________________________________________
dense_2 (Dense)              (None, 128)               8320      
_________________________________________________________________
dense_3 (Dense)              (None, 3)                 387       
Total params: 11,171
Trainable params: 11,171
Non-trainable params: 0
_________________________________________________________________


In [18]:
# Training
for episode in range(1, episodes+1):
    print("Episode {}/{}".format(episode, episodes))
    state = state_creator(data, 0, window_size+1)
    total_profit = 0
    trader.inventory = []
    for t in tqdm(range(data_samples)):
        action = trader.trade(state)
        next_state = state_creator(data, t+1, window_size+1)
        reward = 0
        # if buying
        if action == 1:
            trader.inventory.append(data[t])
            print("AI trader bought: ", stocks_price_format(data[t]))
        # If sellable and selling
        elif len(trader.inventory)>0 and action==2:
            buy_price = trader.inventory.pop(0)
            reward = max(data[t] - buy_price, 0)
            total_profit += data[t] - buy_price
            print("AI trader sold: ", stocks_price_format(data[t]), " at Profit: " + stocks_price_format(data[t] - buy_price))
        # if t is the last data sample
        if t == data_samples-1: done = True
        else: done = False
            
        # Append all the data to the trader's memory or Experience replay buffer
        trader.memory.append((state, action, reward, next_state, done))
        state = next_state
        
        if done:
            print("DONE")
            print("TOTAL PROFIT: {}".format(total_profit))
        
        if len(trader.memory) > batch_size:
            trader.batch_trade(batch_size)
    if episode % 10 == 0:
        trader.model.save("ai_trader_{}.h5".format(episode))

  0%|          | 0/1258 [00:00<?, ?it/s]

Episode 1/500
AI trader bought:  $ 128.880005
AI trader bought:  $ 128.589996
AI trader bought:  $ 127.169998
AI trader sold:  $ 126.919998  at Profit: - $ 1.960007
AI trader sold:  $ 127.599998  at Profit: - $ 0.989998
AI trader bought:  $ 127.300003
AI trader sold:  $ 126.599998  at Profit: - $ 0.570000
AI trader bought:  $ 127.610001
AI trader sold:  $ 127.029999  at Profit: - $ 0.270004
AI trader bought:  $ 128.110001
AI trader bought:  $ 126.750000
AI trader bought:  $ 124.529999
AI trader sold:  $ 125.430000  at Profit: - $ 2.180000
AI trader sold:  $ 126.000000  at Profit: - $ 2.110001
AI trader sold:  $ 125.690002  at Profit: - $ 1.059998
AI trader bought:  $ 122.570000
AI trader bought:  $ 123.279999
AI trader bought:  $ 125.660004
AI trader sold:  $ 125.610001  at Profit: $ 1.080002
AI trader sold:  $ 126.820000  at Profit: $ 4.250000
AI trader sold:  $ 128.509995  at Profit: $ 5.229996
AI trader sold:  $ 129.619995  at Profit: $ 3.959991
AI trader bought:  $ 125.220001
AI tr

  3%|▎         | 34/1258 [00:10<25:21,  1.24s/it]

AI trader bought:  $ 123.379997


  3%|▎         | 35/1258 [00:13<39:18,  1.93s/it]

AI trader bought:  $ 122.989998


  3%|▎         | 36/1258 [00:17<49:12,  2.42s/it]

AI trader sold:  $ 122.370003  at Profit: - $ 1.009995


  3%|▎         | 37/1258 [00:20<54:56,  2.70s/it]

AI trader sold:  $ 121.300003  at Profit: - $ 1.689995


  3%|▎         | 39/1258 [00:28<1:05:18,  3.21s/it]

AI trader bought:  $ 114.639999


  3%|▎         | 40/1258 [00:31<1:05:35,  3.23s/it]

AI trader sold:  $ 115.400002  at Profit: $ 0.760002


  3%|▎         | 42/1258 [00:38<1:09:00,  3.41s/it]

AI trader bought:  $ 115.519997


  3%|▎         | 43/1258 [00:42<1:08:43,  3.39s/it]

AI trader bought:  $ 119.720001


  4%|▎         | 45/1258 [00:49<1:10:28,  3.49s/it]

AI trader bought:  $ 115.239998


  4%|▎         | 46/1258 [00:52<1:09:34,  3.44s/it]

AI trader sold:  $ 115.150002  at Profit: - $ 0.369995


  4%|▎         | 47/1258 [00:56<1:14:08,  3.67s/it]

AI trader bought:  $ 115.959999


  4%|▍         | 50/1258 [01:07<1:10:37,  3.51s/it]

AI trader sold:  $ 115.010002  at Profit: - $ 4.709999


  4%|▍         | 51/1258 [01:11<1:16:12,  3.79s/it]

AI trader bought:  $ 112.650002


  4%|▍         | 52/1258 [01:14<1:11:16,  3.55s/it]

AI trader bought:  $ 105.760002


  4%|▍         | 53/1258 [01:17<1:07:43,  3.37s/it]

AI trader bought:  $ 103.120003


  4%|▍         | 54/1258 [01:20<1:05:51,  3.28s/it]

AI trader bought:  $ 103.739998


  4%|▍         | 56/1258 [01:27<1:08:24,  3.41s/it]

AI trader bought:  $ 112.919998


  5%|▍         | 58/1258 [01:33<1:04:32,  3.23s/it]

AI trader bought:  $ 112.760002


  5%|▍         | 59/1258 [01:36<1:03:37,  3.18s/it]

AI trader sold:  $ 107.720001  at Profit: - $ 7.519997


  5%|▍         | 62/1258 [01:48<1:08:50,  3.45s/it]

AI trader bought:  $ 109.269997


  5%|▌         | 63/1258 [01:50<1:05:38,  3.30s/it]

AI trader sold:  $ 112.309998  at Profit: - $ 3.650002


  5%|▌         | 64/1258 [01:53<1:03:49,  3.21s/it]

AI trader sold:  $ 110.150002  at Profit: - $ 2.500000


  5%|▌         | 65/1258 [01:56<1:02:31,  3.14s/it]

AI trader sold:  $ 112.570000  at Profit: $ 6.809998


  5%|▌         | 67/1258 [02:02<1:00:54,  3.07s/it]

AI trader bought:  $ 115.309998


  5%|▌         | 68/1258 [02:07<1:10:45,  3.57s/it]

AI trader bought:  $ 116.279999


  6%|▌         | 70/1258 [02:13<1:05:04,  3.29s/it]

AI trader bought:  $ 113.919998


  6%|▌         | 72/1258 [02:19<1:01:55,  3.13s/it]

AI trader bought:  $ 115.209999


  6%|▌         | 75/1258 [02:28<59:23,  3.01s/it]  

AI trader sold:  $ 115.000000  at Profit: $ 11.879997


  6%|▋         | 81/1258 [02:48<1:00:54,  3.11s/it]

AI trader bought:  $ 110.379997


  7%|▋         | 82/1258 [02:51<59:35,  3.04s/it]  

AI trader bought:  $ 110.779999


  7%|▋         | 83/1258 [02:54<58:57,  3.01s/it]

AI trader sold:  $ 111.309998  at Profit: $ 7.570000


  7%|▋         | 84/1258 [02:57<58:48,  3.01s/it]

AI trader bought:  $ 110.779999


  7%|▋         | 85/1258 [03:00<58:50,  3.01s/it]

AI trader bought:  $ 109.500000


  7%|▋         | 86/1258 [03:05<42:12,  2.16s/it]


KeyboardInterrupt: 