# importing the libararies 

In [None]:
import numpy as np 
import pandas as pd 
import random 
import matplotlib.pyplot as plt 
import tensorflow as tf 
import math 
import pandas_datareader as data_reader 

from tqdm import tqdm_notebook , tqdm 
from collections import deque 

# building the AI trader network

In [None]:
class AI_Trader():
  
  def __init__(self, state_size, action_space=3, model_name="AITrader"): #Stay, Buy, Sell
    
    self.state_size = state_size
    self.action_space = action_space
    self.memory = deque(maxlen=2000)
    self.inventory = []
    self.model_name = model_name
    
    self.gamma = 0.95
    self.epsilon = 1.0
    self.epsilon_final = 0.01
    self.epsilon_decay = 0.995
    
    self.model = self.model_builder()
    
  def model_builder(self):
    
    model = tf.keras.models.Sequential()
    
    model.add(tf.keras.layers.Dense(units=32, activation='relu', input_dim=self.state_size))
    
    model.add(tf.keras.layers.Dense(units=64, activation='relu'))
    
    model.add(tf.keras.layers.Dense(units=128, activation='relu'))
    
    model.add(tf.keras.layers.Dense(units=self.action_space, activation='linear'))
    
    model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(lr=0.001))
    
    return model
  
  def trade(self, state):
    
    if random.random() <= self.epsilon:
      return random.randrange(self.action_space)
    
    actions = self.model.predict(state)
    return np.argmax(actions[0])
  
  
  def batch_train(self, batch_size):
    
    batch = []
    for i in range(len(self.memory) - batch_size + 1, len(self.memory)):
      batch.append(self.memory[i])
      
    for state, action, reward, next_state, done in batch:
      reward = reward
      if not done:
        reward = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
        
      target = self.model.predict(state)
      target[0][action] = reward
      
      self.model.fit(state, target, epochs=1, verbose=0)
      
    if self.epsilon > self.epsilon_final:
      self.epsilon *= self.epsilon_decay
            
        

# data set preprocessing 

## sigmoid 

In [None]:
def sigmoid(x):
    return 1/(1 + math.exp(-x))

## price format function 

In [None]:
def stock_price_format(n):
    if n < 0 :
        return '- $ {:2f}'.format(abs(n))
    else :
        return '$ {:2f}'.format(abs(n))

# dataset loader 

In [None]:
def dataset_loader(stock_name):
    
    dataset = data_reader.DataReader(stock_name , data_source = 'yahoo')
    
    start_date = str(dataset.index[0]).split()[0]
    end_date = str(dataset.index[-1]).split()[0]
    
    close = dataset['Close']
    return close  

In [None]:

# this is for example purpose 


dataset_for_example = data_reader.DataReader('AAPL' , data_source = 'yahoo')
dataset_for_example



Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2016-02-01,24.177500,23.850000,24.117500,24.107500,163774000.0,22.334911
2016-02-02,24.010000,23.570000,23.855000,23.620001,149428800.0,21.883259
2016-02-03,24.209999,23.520000,23.750000,24.087500,183857200.0,22.316381
2016-02-04,24.332500,23.797501,23.965000,24.150000,185886800.0,22.495693
2016-02-05,24.230000,23.422501,24.129999,23.504999,185672400.0,21.894878
...,...,...,...,...,...,...
2021-01-22,139.850006,135.020004,136.279999,139.070007,113907200.0,139.070007
2021-01-25,145.089996,136.539993,143.070007,142.919998,157611700.0,142.919998
2021-01-26,144.300003,141.369995,143.600006,143.160004,98390600.0,143.160004
2021-01-27,144.300003,140.410004,143.429993,142.059998,139236700.0,142.059998


# state creator 

In [None]:
def state_creator(data, timestep, window_size):
  
  starting_id = timestep - window_size + 1
  
  if starting_id >= 0:
    windowed_data = data[starting_id:timestep+1]
  else:
    windowed_data = - starting_id * [data[0]] + list(data[0:timestep+1])
    
  state = []
  for i in range(window_size - 1):
    state.append(sigmoid(windowed_data[i+1] - windowed_data[i]))
    
  return np.array([state])

# loading a dataset 

In [None]:
stock_name = 'SBER.ME'#'BUTTERFLY.BO'
data = dataset_loader(stock_name)
data

Date
2016-02-01     96.500000
2016-02-02     94.400002
2016-02-03     94.529999
2016-02-04     97.570000
2016-02-05     97.199997
                 ...    
2021-01-22    267.899994
2021-01-25    263.540009
2021-01-26    267.940002
2021-01-27    266.700012
2021-01-28    265.630005
Name: Close, Length: 1255, dtype: float64

In [None]:
# Расчет индекса старта с 1 января 2019 года
mask = (data.index >= '2019-01-01 01:00:00.000') & (data.index <= '2019-12-31 23:59:59.999') # ключевая строка для форвадного анализа
min(data[mask].index), max(data[mask].index)

(Timestamp('2019-01-03 00:00:00'), Timestamp('2019-12-30 00:00:00'))

In [None]:
# Загружаем датасет с учетом необходимости формировать ensemble
data = data[ (min(data[mask].index)) : max(data[mask].index) ]
data

Date
2019-01-03    186.990005
2019-01-04    190.990005
2019-01-08    191.240005
2019-01-09    197.000000
2019-01-10    196.750000
                 ...    
2019-12-24    248.669998
2019-12-25    248.039993
2019-12-26    248.240005
2019-12-27    252.059998
2019-12-30    254.750000
Name: Close, Length: 252, dtype: float64

# Training the AI trader

## setting the hyper parameters 

In [None]:
window_size = 10 
episodes = 10 # 00

batch_size = 32
data_samples = len(data) - 1 


## defining the trader model 

In [None]:
trader = AI_Trader(window_size)

In [None]:
trader.model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 32)                352       
_________________________________________________________________
dense_1 (Dense)              (None, 64)                2112      
_________________________________________________________________
dense_2 (Dense)              (None, 128)               8320      
_________________________________________________________________
dense_3 (Dense)              (None, 3)                 387       
Total params: 11,171
Trainable params: 11,171
Non-trainable params: 0
_________________________________________________________________


# training loop 

In [None]:
for episode in range(1, episodes + 1):
  print("Episode: {}/{}".format(episode, episodes))
  
  state = state_creator(data, 0, window_size + 1)
  
  total_profit = 0
  trader.inventory = []
  
  for t in range(data_samples):
    
    action = trader.trade(state)
    
    next_state = state_creator(data, t+1, window_size + 1)
    reward = 0
    
    if action == 1: #Buying
      trader.inventory.append(data[t])
      print("AI Trader bought: ", stock_price_format(data[t]))
      
    elif action == 2 and len(trader.inventory) > 0: #Selling
      buy_price = trader.inventory.pop(0)
      
      reward = max(data[t] - buy_price, 0)
      total_profit += data[t] - buy_price
      print("AI Trader sold: ", stock_price_format(data[t]), " Profit: " + stock_price_format(data[t] - buy_price) )
      
    if t == data_samples - 1:
      done = True
    else:
      done = False
      
    trader.memory.append((state, action, reward, next_state, done))
    
    state = next_state
    
    if done:
      print("########################")
      print("TOTAL PROFIT: {}".format(total_profit))
      print("########################")
    
    if len(trader.memory) > batch_size:
      trader.batch_train(batch_size)
      
  if episode % 10 == 0:
    trader.model.save("ai_trader_{}.h5".format(episode))

Episode: 1/10
AI Trader bought:  $ 94.400002
AI Trader bought:  $ 94.529999
AI Trader sold:  $ 97.199997  Profit: $ 2.799995
AI Trader bought:  $ 94.699997
AI Trader sold:  $ 93.889999  Profit: - $ 0.639999
AI Trader bought:  $ 96.349998
AI Trader sold:  $ 94.849998  Profit: $ 0.150002
AI Trader sold:  $ 95.250000  Profit: - $ 1.099998
AI Trader bought:  $ 106.120003
AI Trader bought:  $ 106.989998
AI Trader sold:  $ 109.470001  Profit: $ 3.349998
AI Trader bought:  $ 108.150002
AI Trader sold:  $ 108.839996  Profit: $ 1.849998
AI Trader sold:  $ 109.260002  Profit: $ 1.110001
AI Trader bought:  $ 109.260002
AI Trader sold:  $ 106.459999  Profit: - $ 2.800003
AI Trader bought:  $ 111.489998
AI Trader bought:  $ 109.099998
AI Trader sold:  $ 108.059998  Profit: - $ 3.430000
AI Trader bought:  $ 108.120003
AI Trader sold:  $ 107.050003  Profit: - $ 2.049995
AI Trader bought:  $ 109.900002
AI Trader sold:  $ 108.519997  Profit: $ 0.399994
AI Trader sold:  $ 108.000000  Profit: - $ 1.90000

KeyboardInterrupt: ignored

In [None]:
total_profit

-700.1800765991211

# Trading loop on 2020

In [None]:
# Расчет индекса старта с 1 января 2020 года
mask20 = (data.index >= '2020-01-01 01:00:00.000') & (data.index <= '2020-12-31 23:59:59.999') # ключевая строка для форвадного анализа
min(data[mask20].index), max(data[mask20].index)

(Timestamp('2020-01-03 00:00:00'), Timestamp('2020-12-30 00:00:00'))

In [None]:
# Загружаем датасет с учетом необходимости формировать ensemble
data20 = data[ (min(data[mask20].index)) : max(data[mask20].index) ]
data20

Date
2020-01-03    255.000000
2020-01-06    253.899994
2020-01-08    259.149994
2020-01-09    257.989990
2020-01-10    258.190002
                 ...    
2020-12-24    268.850006
2020-12-25    270.700012
2020-12-28    274.529999
2020-12-29    272.540009
2020-12-30    270.660004
Name: Close, Length: 250, dtype: float64

In [None]:
data_samples = len(data20) - 1 

In [None]:
state = state_creator(data20, 0, window_size + 1)
data_samples = len(data20) - 1 
stocks = 0
cash0 = 100000.
cash = cash0

for t in range(data_samples - window_size):
    action = trader.trade(state)
    
    next_state = state_creator(data, t+1, window_size + 1)
    reward = 0
    price = data[t + window_size] # вот такая будет цена
    
    if action == 1: # Buying
        n = cash // price # столько акций сможем купить
        if n > 0:
            stocks += n
            cash -= (n * price)
            buy_price = price
            print("%d: Купили %d акций по цене %.2f" % (t + window_size, n, price))
      
    if action == 2: #Selling
        n = stocks
        if n > 0:
            cash += (price * stocks) # продаем все акции
            stocks = 0
            # reward = max(price - buy_price, 0) # зачем макс?
            reward = price - buy_price 
            print("%d: Продали %d акций по цене %.2f, кэш стал: %.2f, reward=%.2f" % (t + window_size, n, price, cash, reward))
      
    done = (t == (data_samples - 1 - window_size))
        
    trader.memory.append((state, action, reward, next_state, done))
    
    state = next_state
    
    if done:
        print("########################")
        print("Результат: %.2f " % (price * stocks + cash - cash0)) 
        print("########################")
    
    if len(trader.memory) > batch_size:
        trader.batch_train(batch_size)
      

10: Купили 1049 акций по цене 95.25
11: Купили 0 акций по цене 96.51
12: Купили 0 акций по цене 99.20
13: Купили 0 акций по цене 102.70
14: Купили 0 акций по цене 101.50
15: Купили 0 акций по цене 103.20
16: Купили 0 акций по цене 102.10
17: Купили 0 акций по цене 104.05
18: Купили 0 акций по цене 106.24
19: Купили 0 акций по цене 107.00
21: Купили 0 акций по цене 106.12
22: Купили 0 акций по цене 106.99
23: Купили 0 акций по цене 108.99
24: Купили 0 акций по цене 109.47
25: Купили 0 акций по цене 108.15
26: Купили 0 акций по цене 108.84
27: Купили 0 акций по цене 109.26
28: Купили 0 акций по цене 109.26
29: Купили 0 акций по цене 106.46
30: Купили 0 акций по цене 107.00
31: Купили 0 акций по цене 109.47
32: Купили 0 акций по цене 112.40
33: Купили 0 акций по цене 112.07
34: Купили 0 акций по цене 111.49
35: Купили 0 акций по цене 109.10
36: Купили 0 акций по цене 108.06
37: Купили 0 акций по цене 109.10
38: Купили 0 акций по цене 108.12
39: Купили 0 акций по цене 107.05
40: Купили 0 а