<a href="https://colab.research.google.com/github/Tushar-ml/Projects/blob/master/Copy_of_Colab_8_Deep_Reinforcement_Learning_for_Stock_Market_Trading.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Stage 1: Installing dependencies and environment setup


In [0]:
!pip install tensorflow-gpu==2.0.0.alpha0

Collecting tensorflow-gpu==2.0.0.alpha0
[?25l  Downloading https://files.pythonhosted.org/packages/1a/66/32cffad095253219d53f6b6c2a436637bbe45ac4e7be0244557210dc3918/tensorflow_gpu-2.0.0a0-cp36-cp36m-manylinux1_x86_64.whl (332.1MB)
[K     |████████████████████████████████| 332.1MB 53kB/s 
[?25hCollecting tb-nightly<1.14.0a20190302,>=1.14.0a20190301 (from tensorflow-gpu==2.0.0.alpha0)
[?25l  Downloading https://files.pythonhosted.org/packages/a9/51/aa1d756644bf4624c03844115e4ac4058eff77acd786b26315f051a4b195/tb_nightly-1.14.0a20190301-py3-none-any.whl (3.0MB)
[K     |████████████████████████████████| 3.0MB 33.8MB/s 
Collecting tf-estimator-nightly<1.14.0.dev2019030116,>=1.14.0.dev2019030115 (from tensorflow-gpu==2.0.0.alpha0)
[?25l  Downloading https://files.pythonhosted.org/packages/13/82/f16063b4eed210dc2ab057930ac1da4fbe1e91b7b051a6c8370b401e6ae7/tf_estimator_nightly-1.14.0.dev2019030115-py2.py3-none-any.whl (411kB)
[K     |████████████████████████████████| 419kB 45.5MB/s 
Ins

In [0]:
!pip install pandas-datareader



## Stage 2: Importing project dependencies

In [1]:
import math
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas_datareader as data_reader

from tqdm import tqdm_notebook, tqdm
from collections import deque

  from pandas.util.testing import assert_frame_equal


In [2]:
tf.__version__

'2.2.0'

## Stage 3: Building the AI Trader network

In [0]:
class AI_Trader():
  
  def __init__(self, state_size, action_space=3, model_name="AITrader"): #Stay, Buy, Sell
    
    self.state_size = state_size
    self.action_space = action_space
    self.memory = deque(maxlen=2000)
    self.inventory = []
    self.model_name = model_name
    
    self.gamma = 0.95
    self.epsilon = 1.0
    self.epsilon_final = 0.01
    self.epsilon_decay = 0.995
    
    self.model = self.model_builder()
    
  def model_builder(self):
    
    model = tf.keras.models.Sequential()
    
    model.add(tf.keras.layers.Dense(units=32, activation='relu', input_dim=self.state_size))
    
    model.add(tf.keras.layers.Dense(units=64, activation='relu'))
    
    model.add(tf.keras.layers.Dense(units=128, activation='relu'))
    
    model.add(tf.keras.layers.Dense(units=self.action_space, activation='linear'))
    
    model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(lr=0.001))
    
    return model
  
  def trade(self, state):
    
    if random.random() <= self.epsilon:
      return random.randrange(self.action_space)
    
    actions = self.model.predict(state)
    return np.argmax(actions[0])
  
  
  def batch_train(self, batch_size):
    
    batch = []
    for i in range(len(self.memory) - batch_size + 1, len(self.memory)):
      batch.append(self.memory[i])
      
    for state, action, reward, next_state, done in batch:
      reward = reward
      if not done:
        reward = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
        
      target = self.model.predict(state)
      target[0][action] = reward
      
      self.model.fit(state, target, epochs=1, verbose=0)
      
    if self.epsilon > self.epsilon_final:
      self.epsilon *= self.epsilon_decay

## Stage 4: Dataset preprocessing

### Defining helper functions

#### Sigmoid

In [0]:
def sigmoid(x):
  return 1 / (1 + math.exp(-x))

#### Price format function

In [0]:
def stocks_price_format(n):
  if n < 0:
    return "- $ {0:2f}".format(abs(n))
  else:
    return "$ {0:2f}".format(abs(n))

#### Dataset loader

In [0]:
def dataset_loader(stock_name):
  
  #Complete the dataset loader function
  dataset = data_reader.DataReader(stock_name, data_source="yahoo")
  
  start_date = str(dataset.index[0]).split()[0]
  end_date = str(dataset.index[-1]).split()[0]
  
  close = dataset['Close']

  return close

### State creator

In [0]:
def state_creator(data, timestep, window_size):
  
  starting_id = timestep - window_size + 1
  
  if starting_id >= 0:
    windowed_data = data[starting_id:timestep+1]
  else:
    windowed_data = - starting_id * [data[0]] + list(data[0:timestep+1])
    
  state = []
  for i in range(window_size - 1):
    state.append(sigmoid(windowed_data[i+1] - windowed_data[i]))
    
  return np.array([state])

### Loading a dataset

In [0]:
stock_name = "FB"
data = dataset_loader(stock_name)

## Stage 5: Training the AI Trader

### Setting hyper parameters

In [0]:
window_size = 10
episodes = 3

batch_size = 32
data_samples = len(data) - 1

### Defining the Trader model

In [0]:
trader = AI_Trader(window_size)

In [19]:
trader.model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 32)                352       
_________________________________________________________________
dense_5 (Dense)              (None, 64)                2112      
_________________________________________________________________
dense_6 (Dense)              (None, 128)               8320      
_________________________________________________________________
dense_7 (Dense)              (None, 3)                 387       
Total params: 11,171
Trainable params: 11,171
Non-trainable params: 0
_________________________________________________________________


### Training loop

In [0]:
for episode in range(1, episodes + 1):
  
  print("Episode: {}/{}".format(episode, episodes))
  
  state = state_creator(data, 0, window_size + 1)
  
  total_profit = 0
  trader.inventory = []
  
  for t in tqdm(range(data_samples)):
    
    action = trader.trade(state)
    
    next_state = state_creator(data, t+1, window_size + 1)
    reward = 0
    
    if action == 1: #Buying
      trader.inventory.append(data[t])
      print("AI Trader bought: ", stocks_price_format(data[t]))
      
    elif action == 2 and len(trader.inventory) > 0: #Selling
      buy_price = trader.inventory.pop(0)
      
      reward = max(data[t] - buy_price, 0)
      total_profit += data[t] - buy_price
      # a = stocks_price_format(data[t] - buy_price).split(' ')[-1]
      if (data[t]-buy_price)>0:
        print("AI Trader sold: ", stocks_price_format(data[t]), " Profit: " + stocks_price_format(data[t] - buy_price) )
      else:
        print("AI Trader sold: ", stocks_price_format(data[t]), " Loss: " + stocks_price_format(data[t] - buy_price) )
      
    if t == data_samples - 1:
      done = True
    else:
      done = False
      
    trader.memory.append((state, action, reward, next_state, done))
    
    state = next_state
    
    if done:
      print("########################")
      print("TOTAL PROFIT: {}".format(total_profit))
      print("########################")
    
    if len(trader.memory) > batch_size:
      trader.batch_train(batch_size)
      
  if episode % 10 == 0:
    trader.model.save("ai_trader_{}.h5".format(episode))
    

  0%|          | 0/1256 [00:00<?, ?it/s]

Episode: 1/3
AI Trader bought:  $ 80.540001
AI Trader sold:  $ 79.330002  Loss: - $ 1.209999
AI Trader bought:  $ 80.290001
AI Trader bought:  $ 80.440002
AI Trader bought:  $ 82.440002
AI Trader bought:  $ 82.050003
AI Trader bought:  $ 82.139999
AI Trader bought:  $ 80.669998
AI Trader bought:  $ 80.669998
AI Trader bought:  $ 81.830002
AI Trader sold:  $ 81.529999  Profit: $ 1.239998
AI Trader bought:  $ 81.059998
AI Trader sold:  $ 81.790001  Profit: $ 1.349998
AI Trader sold:  $ 82.510002  Profit: $ 0.070000
AI Trader sold:  $ 84.739998  Profit: $ 2.689995
AI Trader sold:  $ 87.980003  Profit: $ 5.840004
AI Trader bought:  $ 88.010002
AI Trader bought:  $ 85.800003
AI Trader bought:  $ 85.769997
AI Trader bought:  $ 86.910004
AI Trader bought:  $ 87.290001
AI Trader sold:  $ 87.550003  Profit: $ 6.880005
AI Trader sold:  $ 87.220001  Profit: $ 6.550003
AI Trader sold:  $ 85.650002  Profit: $ 3.820000
AI Trader bought:  $ 85.879997


  3%|▎         | 33/1256 [00:03<02:02,  9.97it/s]

AI Trader sold:  $ 87.949997  Profit: $ 6.889999


  3%|▎         | 34/1256 [00:06<19:27,  1.05it/s]

AI Trader bought:  $ 90.099998


  3%|▎         | 35/1256 [00:09<30:42,  1.51s/it]

AI Trader sold:  $ 89.680000  Profit: $ 1.669998


  3%|▎         | 36/1256 [00:11<38:32,  1.90s/it]

AI Trader sold:  $ 89.760002  Profit: $ 3.959999


  3%|▎         | 38/1256 [00:17<48:03,  2.37s/it]

AI Trader bought:  $ 94.970001


  3%|▎         | 39/1256 [00:20<50:38,  2.50s/it]

AI Trader sold:  $ 97.910004  Profit: $ 12.140007


  3%|▎         | 40/1256 [00:23<52:31,  2.59s/it]

AI Trader sold:  $ 98.389999  Profit: $ 11.479996


  3%|▎         | 41/1256 [00:26<54:41,  2.70s/it]

AI Trader sold:  $ 97.040001  Profit: $ 9.750000


  3%|▎         | 42/1256 [00:28<55:20,  2.74s/it]

AI Trader sold:  $ 95.440002  Profit: $ 9.560005


  3%|▎         | 43/1256 [00:31<55:58,  2.77s/it]

AI Trader sold:  $ 96.949997  Profit: $ 6.849998


  4%|▎         | 45/1256 [00:37<57:22,  2.84s/it]

AI Trader sold:  $ 95.290001  Profit: $ 0.320000


  4%|▎         | 46/1256 [00:40<58:26,  2.90s/it]

AI Trader bought:  $ 96.989998


  4%|▎         | 47/1256 [00:43<58:39,  2.91s/it]

AI Trader bought:  $ 95.209999


  4%|▍         | 48/1256 [00:46<57:57,  2.88s/it]

AI Trader sold:  $ 94.010002  Loss: - $ 2.979996


  4%|▍         | 49/1256 [00:49<57:25,  2.85s/it]

AI Trader bought:  $ 94.139999


  4%|▍         | 50/1256 [00:51<57:15,  2.85s/it]

AI Trader bought:  $ 94.059998


  4%|▍         | 51/1256 [00:54<57:05,  2.84s/it]

AI Trader bought:  $ 96.440002


  4%|▍         | 52/1256 [00:57<56:48,  2.83s/it]

AI Trader sold:  $ 95.120003  Loss: - $ 0.089996


  4%|▍         | 53/1256 [01:00<57:35,  2.87s/it]

AI Trader sold:  $ 94.300003  Profit: $ 0.160004


  4%|▍         | 54/1256 [01:03<57:16,  2.86s/it]

AI Trader bought:  $ 94.150002


  4%|▍         | 55/1256 [01:06<57:00,  2.85s/it]

AI Trader sold:  $ 93.620003  Loss: - $ 0.439995


  4%|▍         | 56/1256 [01:09<56:38,  2.83s/it]

AI Trader sold:  $ 94.190002  Loss: - $ 2.250000


  5%|▍         | 57/1256 [01:11<56:39,  2.84s/it]

AI Trader bought:  $ 93.430000


  5%|▍         | 60/1256 [01:20<56:43,  2.85s/it]

AI Trader bought:  $ 95.169998


  5%|▍         | 61/1256 [01:23<56:25,  2.83s/it]

AI Trader sold:  $ 95.309998  Profit: $ 1.159996


  5%|▌         | 64/1256 [01:31<57:04,  2.87s/it]

AI Trader bought:  $ 82.089996


  5%|▌         | 65/1256 [01:34<56:28,  2.85s/it]

AI Trader sold:  $ 83.000000  Loss: - $ 10.430000


  5%|▌         | 67/1256 [01:40<56:16,  2.84s/it]

AI Trader sold:  $ 89.730003  Loss: - $ 5.439995


  5%|▌         | 68/1256 [01:43<56:26,  2.85s/it]

AI Trader bought:  $ 91.010002


  6%|▌         | 70/1256 [01:48<56:33,  2.86s/it]

AI Trader sold:  $ 87.230003  Profit: $ 5.140007


  6%|▌         | 71/1256 [01:51<57:14,  2.90s/it]

AI Trader sold:  $ 89.889999  Loss: - $ 1.120003


  6%|▌         | 72/1256 [01:54<56:45,  2.88s/it]

AI Trader bought:  $ 88.150002


  6%|▌         | 73/1256 [01:57<56:32,  2.87s/it]

AI Trader bought:  $ 88.260002


  6%|▌         | 74/1256 [02:00<56:15,  2.86s/it]

AI Trader bought:  $ 89.529999


  6%|▌         | 75/1256 [02:03<56:12,  2.86s/it]

AI Trader bought:  $ 90.440002


  6%|▌         | 76/1256 [02:06<56:10,  2.86s/it]

AI Trader bought:  $ 91.980003


  6%|▌         | 77/1256 [02:09<57:00,  2.90s/it]

AI Trader bought:  $ 92.050003


  6%|▌         | 78/1256 [02:12<56:26,  2.87s/it]

AI Trader sold:  $ 92.309998  Profit: $ 4.159996


  6%|▋         | 79/1256 [02:14<56:26,  2.88s/it]

AI Trader sold:  $ 92.900002  Profit: $ 4.639999


  6%|▋         | 81/1256 [02:20<56:15,  2.87s/it]

AI Trader sold:  $ 94.339996  Profit: $ 4.809998


  7%|▋         | 82/1256 [02:23<55:58,  2.86s/it]

AI Trader bought:  $ 94.400002


  7%|▋         | 83/1256 [02:26<56:40,  2.90s/it]

AI Trader sold:  $ 95.550003  Profit: $ 5.110001


  7%|▋         | 84/1256 [02:29<56:04,  2.87s/it]

AI Trader bought:  $ 92.959999


  7%|▋         | 85/1256 [02:32<56:08,  2.88s/it]

AI Trader bought:  $ 93.970001


  7%|▋         | 87/1256 [02:37<55:55,  2.87s/it]

AI Trader bought:  $ 92.769997


  7%|▋         | 88/1256 [02:40<55:43,  2.86s/it]

AI Trader sold:  $ 89.209999  Loss: - $ 2.770004


  7%|▋         | 89/1256 [02:43<56:37,  2.91s/it]

AI Trader bought:  $ 86.669998


  7%|▋         | 90/1256 [02:46<56:28,  2.91s/it]

AI Trader sold:  $ 89.900002  Loss: - $ 2.150002


  7%|▋         | 91/1256 [02:49<56:02,  2.89s/it]

AI Trader sold:  $ 90.949997  Loss: - $ 3.450005


  7%|▋         | 92/1256 [02:52<56:02,  2.89s/it]

AI Trader sold:  $ 92.070000  Loss: - $ 0.889999


  7%|▋         | 93/1256 [02:55<56:01,  2.89s/it]

AI Trader sold:  $ 94.010002  Profit: $ 0.040001


  7%|▋         | 94/1256 [02:58<55:43,  2.88s/it]

AI Trader sold:  $ 92.800003  Profit: $ 0.030006


  8%|▊         | 95/1256 [03:01<56:18,  2.91s/it]

AI Trader bought:  $ 92.400002


  8%|▊         | 97/1256 [03:06<55:44,  2.89s/it]

AI Trader sold:  $ 93.239998  Profit: $ 6.570000


  8%|▊         | 98/1256 [03:09<55:39,  2.88s/it]

AI Trader bought:  $ 94.260002


  8%|▊         | 100/1256 [03:15<56:23,  2.93s/it]

AI Trader sold:  $ 94.070000  Profit: $ 1.669998


  8%|▊         | 103/1256 [03:24<55:15,  2.88s/it]

AI Trader sold:  $ 98.470001  Profit: $ 4.209999


  8%|▊         | 106/1256 [03:32<54:20,  2.84s/it]

AI Trader bought:  $ 99.669998


  9%|▊         | 108/1256 [03:38<54:44,  2.86s/it]

AI Trader sold:  $ 103.769997  Profit: $ 4.099998


  9%|▉         | 116/1256 [04:01<54:58,  2.89s/it]

AI Trader bought:  $ 108.760002


  9%|▉         | 117/1256 [04:04<54:42,  2.88s/it]

AI Trader bought:  $ 107.099998


  9%|▉         | 119/1256 [04:10<55:31,  2.93s/it]

AI Trader sold:  $ 107.910004  Loss: - $ 0.849998


 10%|▉         | 120/1256 [04:13<55:02,  2.91s/it]

AI Trader sold:  $ 109.010002  Profit: $ 1.910004


 11%|█         | 132/1256 [04:47<53:43,  2.87s/it]

AI Trader bought:  $ 104.239998


 11%|█         | 133/1256 [04:50<53:14,  2.84s/it]

AI Trader sold:  $ 107.120003  Profit: $ 2.880005


 11%|█         | 137/1256 [05:02<53:52,  2.89s/it]

AI Trader bought:  $ 105.610001


 11%|█         | 138/1256 [05:05<53:48,  2.89s/it]

AI Trader bought:  $ 106.489998


 11%|█         | 139/1256 [05:08<53:48,  2.89s/it]

AI Trader bought:  $ 104.599998


 11%|█         | 140/1256 [05:11<53:54,  2.90s/it]

AI Trader bought:  $ 105.419998


 11%|█         | 141/1256 [05:13<53:39,  2.89s/it]

AI Trader bought:  $ 102.120003


 11%|█▏        | 143/1256 [05:19<53:43,  2.90s/it]

AI Trader sold:  $ 104.550003  Loss: - $ 1.059998


 12%|█▏        | 145/1256 [05:25<52:51,  2.85s/it]

AI Trader sold:  $ 106.220001  Loss: - $ 0.269997


 12%|█▏        | 146/1256 [05:28<52:48,  2.85s/it]

AI Trader sold:  $ 104.040001  Loss: - $ 0.559998


 12%|█▏        | 147/1256 [05:31<52:48,  2.86s/it]

AI Trader sold:  $ 104.769997  Loss: - $ 0.650002


 12%|█▏        | 148/1256 [05:34<53:22,  2.89s/it]

AI Trader bought:  $ 105.510002


 12%|█▏        | 149/1256 [05:36<53:19,  2.89s/it]

AI Trader sold:  $ 104.629997  Profit: $ 2.509995


 12%|█▏        | 150/1256 [05:39<53:02,  2.88s/it]

AI Trader sold:  $ 105.019997  Loss: - $ 0.490005


 12%|█▏        | 151/1256 [05:42<53:04,  2.88s/it]

AI Trader bought:  $ 105.930000


 12%|█▏        | 152/1256 [05:45<53:16,  2.90s/it]

AI Trader sold:  $ 107.260002  Profit: $ 1.330002


 12%|█▏        | 156/1256 [05:57<53:18,  2.91s/it]

AI Trader bought:  $ 102.730003


 12%|█▎        | 157/1256 [06:00<52:43,  2.88s/it]

AI Trader sold:  $ 102.970001  Profit: $ 0.239998


 13%|█▎        | 158/1256 [06:03<52:45,  2.88s/it]

AI Trader bought:  $ 97.919998


 13%|█▎        | 160/1256 [06:08<53:14,  2.91s/it]

AI Trader bought:  $ 97.510002


 13%|█▎        | 162/1256 [06:14<52:48,  2.90s/it]

AI Trader sold:  $ 95.440002  Loss: - $ 2.479996


 13%|█▎        | 163/1256 [06:17<52:59,  2.91s/it]

AI Trader sold:  $ 98.370003  Profit: $ 0.860001


 13%|█▎        | 164/1256 [06:20<52:48,  2.90s/it]

AI Trader bought:  $ 94.970001


 13%|█▎        | 165/1256 [06:23<52:57,  2.91s/it]

AI Trader sold:  $ 95.260002  Profit: $ 0.290001


 13%|█▎        | 168/1256 [06:32<52:11,  2.88s/it]

AI Trader bought:  $ 97.940002


 14%|█▎        | 170/1256 [06:37<51:50,  2.86s/it]

AI Trader sold:  $ 97.339996  Loss: - $ 0.600006


 14%|█▍        | 177/1256 [06:58<51:39,  2.87s/it]

AI Trader bought:  $ 110.489998


 14%|█▍        | 179/1256 [07:03<52:05,  2.90s/it]

AI Trader bought:  $ 99.750000


 15%|█▍        | 183/1256 [07:15<50:44,  2.84s/it]

AI Trader sold:  $ 102.010002  Loss: - $ 8.479996


 15%|█▍        | 184/1256 [07:18<51:53,  2.90s/it]

AI Trader sold:  $ 101.610001  Profit: $ 1.860001


 15%|█▍        | 185/1256 [07:21<51:37,  2.89s/it]

AI Trader bought:  $ 105.199997


 15%|█▍        | 186/1256 [07:24<51:34,  2.89s/it]

AI Trader sold:  $ 103.470001  Loss: - $ 1.729996


 15%|█▌        | 189/1256 [07:32<51:21,  2.89s/it]

AI Trader bought:  $ 105.459999


 15%|█▌        | 190/1256 [07:35<51:33,  2.90s/it]

AI Trader sold:  $ 106.879997  Profit: $ 1.419998


 15%|█▌        | 192/1256 [07:41<51:23,  2.90s/it]

AI Trader bought:  $ 107.919998


 15%|█▌        | 193/1256 [07:44<51:20,  2.90s/it]

AI Trader sold:  $ 106.919998  Loss: - $ 1.000000


 16%|█▌        | 196/1256 [07:53<51:44,  2.93s/it]

AI Trader bought:  $ 109.580002


 16%|█▌        | 197/1256 [07:55<51:17,  2.91s/it]

AI Trader bought:  $ 108.389999


 16%|█▌        | 198/1256 [07:58<50:50,  2.88s/it]

AI Trader sold:  $ 105.730003  Loss: - $ 3.849998


 16%|█▌        | 199/1256 [08:01<50:55,  2.89s/it]

AI Trader sold:  $ 105.930000  Loss: - $ 2.459999


 17%|█▋        | 210/1256 [08:33<50:03,  2.87s/it]

In [0]:

trader.model.save("ai_trader_{}_Apple.h5".format(episode))