# **Stage 1: Installing dependencies and environment setup**

In [1]:
# !pip install urllib3
# !pip install tensorflow pandas pandas-datareader tqdm matplotlib

Collecting urllib3
  Downloading https://files.pythonhosted.org/packages/b4/40/a9837291310ee1ccc242ceb6ebfd9eb21539649f193a7c8c86ba15b98539/urllib3-1.25.7-py2.py3-none-any.whl (125kB)
Installing collected packages: urllib3
Successfully installed urllib3-1.25.7
Collecting tensorflow
  Using cached https://files.pythonhosted.org/packages/54/5f/e1b2d83b808f978f51b7ce109315154da3a3d4151aa59686002681f2e109/tensorflow-2.0.0-cp37-cp37m-win_amd64.whl
Collecting pandas
  Using cached https://files.pythonhosted.org/packages/02/d0/1e8e60e61e748338e3a40e42f5dfeee63ccdecfc4f0894122b890bfb009a/pandas-0.25.3-cp37-cp37m-win_amd64.whl
Collecting pandas-datareader
  Using cached https://files.pythonhosted.org/packages/14/52/accb990baebe0063977f26e02df36aa7eb4015ed4e86f828cd76273cd6f1/pandas_datareader-0.8.1-py2.py3-none-any.whl
Collecting tqdm
  Downloading https://files.pythonhosted.org/packages/bb/62/6f823501b3bf2bac242bd3c320b592ad1516b3081d82c77c1d813f076856/tqdm-4.39.0-py2.py3-none-any.whl (53kB)
C

In [2]:
# **Stage 2: Improting project dependencies**

import math
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pandas_datareader as data_reader
# try:
#   # %tensorflow_version only exists in Colab.
#   %tensorflow_version 2.x
# except Exception:
#   pass
import tensorflow as tf

from tqdm import tqdm_notebook, tqdm
from collections import deque

tf.__version__

'2.0.0'

In [11]:
# **Stage 3: Building the AI Trader network**

class AI_Trader():

  def __init__(self, state_size, action_space=3, model_name="AITrader", distributed=False): #Stay, Buy, Sell (actions)

    self.state_size = state_size
    self.action_space = action_space
    self.distributed = distributed
    self.memory = deque(maxlen=1000)
    self.inventory = []
    self.model_name = model_name

    self.gamma = 0.95 # maximize the current 
    self.epsilon = 1.0 # start with random actions record
    self.epsilon_final = 0.01 # if epsilon is less than this number stop randomizing it
    self.epsilon_decay = 0.995

    self.model = self.model_builder()

  def model_builder(self):
    if self.distributed:
      distribute = tf.distribute.MirroredStrategy()

      with distribute.scope():
        model = tf.keras.models.Sequential()

        model.add(tf.keras.layers.Dense(units=32, activation='relu', input_dim=self.state_size))

        model.add(tf.keras.layers.Dense(units=64, activation='relu'))

        model.add(tf.keras.layers.Dense(units=128, activation='relu'))

        model.add(tf.keras.layers.Dense(units=self.action_space, activation='linear'))

        model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(lr=0.001))

    else:
      model = tf.keras.models.Sequential()

      model.add(tf.keras.layers.Dense(units=32, activation='relu', input_dim=self.state_size))

      model.add(tf.keras.layers.Dense(units=64, activation='relu'))

      model.add(tf.keras.layers.Dense(units=128, activation='relu'))

      model.add(tf.keras.layers.Dense(units=self.action_space, activation='linear'))

      model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(lr=0.001))

    return model

  def trade(self, state):

    if random.random() <= self.epsilon:
      return random.randrange(self.action_space)

    actions = self.model.predict(state)
    return np.argmax(actions[0])

  def batch_train(self, batch_size):

    batch = []
    for i in range(len(self.memory) - batch_size + 1, len(self.memory)):
      batch.append(self.memory[i])

    for state, action, reward, next_state, done in batch:
      reward = reward
      if not done:
        reward = reward + self.gamma * np.amax(self.model.predict(next_state)[0])

      target = self.model.predict(state)
      target[0][action] = reward

      self.model.fit(state, target, epochs=1, verbose=0)

      if self.epsilon > self.epsilon_final:
        self.epsilon *= self.epsilon_decay

In [12]:
# **Stage 4: Dataset preprocessing**

# **Define helper functions**

# **Sigmoid**

def sigmoid(x):
  return 1 / (1 + math.exp(-x))

# **Price format function**

def stocks_price_format(n):
  if n < 0:
    return "- $ {0:2f}".format(abs(n))
  else:
    return "$ {0:2f}".format(abs(n))

# **Dataset loader**

# data_reader.DataReader(stock_name, data_source="yahoo", start=2014, end=2016)

def dataset_loader(stock_name):

  #Complete the dataset loader function
  # dataset = data_reader.DataReader(stock_name, data_source="yahoo", start=2014, end=2015)
  dataset = data_reader.DataReader(stock_name, data_source="yahoo")

  start_date = str(dataset.index[0]).split()[0]
  end_date = str(dataset.index[-1]).split()[0]

  close = dataset['Close']

  return close

# **State creator**

def state_creator(data, timestep, window_size):

  starting_id = timestep - window_size + 1

  if starting_id >= 0:
    windowed_data = data[starting_id:timestep+1]
  else:
    windowed_data = - starting_id * [data[0]] + list(data[0:timestep+1])

  state = []
  for i in range(window_size - 1):
    state.append(sigmoid(windowed_data[i+1] - windowed_data[i]))

  return np.array([state])

# **Loading a dataset**

stock_name = "AAPL"
data = dataset_loader(stock_name)

In [13]:
# **Stage 5: Training the AI Trader**

# **Setting hyper parameters**

window_size = 10
episodes = 1000

batch_size = 32
data_samples = len(data) - 1

# **Defining the Trader model**

trader = AI_Trader(window_size, distributed=False)

trader.model.summary()

# **Training loop**

for episode in range(episodes + 1):

  print("Episode: {}/{}".format(episode, episodes))

  state = state_creator(data, 0, window_size + 1)

  total_profit = 0
  trader.inventory = []

  for t in tqdm(range(data_samples)):

    action = trader.trade(state)

    next_state = state_creator(data, t+1, window_size + 1)
    reward = 0

    if action == 1: #Buying
      trader.inventory.append(data[t])
      print("AI Trader bought: ", stocks_price_format(data[t]))

    elif action == 2 and len(trader.inventory) > 0: #Selling
      buy_price = trader.inventory.pop(0)

      reward = max(data[t] - buy_price, 0)
      total_profit += data[t] - buy_price
      print("AI Trader sold: ", stocks_price_format(data[t]), " Profit: " + stocks_price_format(data[t] - buy_price))

    if t == data_samples - 1:
      done = True
    else:
      done = False

    trader.memory.append((state, action, reward, next_state, done))

    state = next_state

    if done:
      print("#############################")
      print("TOTAL PROFIT: {}".format(total_profit))
      print("#############################")

    if len(trader.memory) > batch_size:
      trader.batch_train(batch_size)

  if episode % 10 == 0:
    trader.model.save("ai_trader_{}.h5".format(episode))

# from google.colab import drive
# drive.mount('/content/drive')

  0%|                                                                                                                                        | 0/1257 [00:00<?, ?it/s]

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 32)                352       
_________________________________________________________________
dense_1 (Dense)              (None, 64)                2112      
_________________________________________________________________
dense_2 (Dense)              (None, 128)               8320      
_________________________________________________________________
dense_3 (Dense)              (None, 3)                 387       
Total params: 11,171
Trainable params: 11,171
Non-trainable params: 0
_________________________________________________________________
Episode: 0/1000
AI Trader bought:  $ 116.470001
AI Trader sold:  $ 118.629997  Profit: $ 2.159996
AI Trader bought:  $ 115.489998
AI Trader bought:  $ 115.000000
AI Trader bought:  $ 112.400002
AI Trader bought:  $ 114.120003
AI Trader sold:  $ 111.9

  3%|███▊                                                                                                                         | 38/1257 [00:24<1:04:04,  3.15s/it]

AI Trader bought:  $ 108.720001


  3%|███▉                                                                                                                         | 40/1257 [00:31<1:09:14,  3.41s/it]

AI Trader sold:  $ 112.400002  Profit: $ 3.680000


  4%|█████▍                                                                                                                       | 55/1257 [01:19<1:02:41,  3.13s/it]

KeyboardInterrupt: 