In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

import sys

sys.path.insert(0,'/content/drive/My Drive/ml-trde-notebooks')

Mounted at /content/drive


In [None]:
!pip install --upgrade tensorflow

In [None]:
!pip install keras-rl2
!pip install keras==2.11.0

!pip install tensorflow==2.11.0

In [None]:
!pip install yfinance



# Data

In [None]:
actions = [0,1,-1] #hold, buy, short
import yfinance as yf
import pandas as pd
from statsmodels.tsa.seasonal import seasonal_decompose
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from scipy.stats import norm

# On 1 stock, train stick the most sophisticated lstm we can come up with in the ddqn
ticker = yf.Ticker("SPY")
df = ticker.history(period='max')


# Perform seasonal decomposition
result = seasonal_decompose(df['Close'], model='additive', period=90, extrapolate_trend='freq')

# Add trend back to original time series
df["trend"] = result.trend

# Compute the residuals by subtracting the trend from the original time series
residuals = result.resid

# Fit a Gaussian distribution to the residuals
mu, std = norm.fit(residuals)

# Compute the probability of a value being above or below the trend line
# for each point in the time series
z_scores = residuals / std
df["prob_above_trend"] = 1 - norm.cdf(z_scores)
df["weighted-volume"] = df["Close"] * df["Volume"]


# DQN Agent


In [None]:
logging_level = 2
def error(*args):
    if logging_level >= 0:
        print(*args)

def info(*args):
    if logging_level >=1:
        print(*args)

def verbose(*args):
    if logging_level >=2:
        print(*args)

def debug(*args):
    if logging_level >=3:
        print(*args)

class BackTestingEnv():
    def __init__(self, product, window_size, timeseries):
        # Initialize the environment and retrieve stock data from a data source
        self.product = product
        self.timeseries = self.scale(timeseries[["Close", "weighted-volume", "trend", "prob_above_trend"]])
        self.final = len(timeseries)
        self.ledger = self.make_ledger_row()
        self.slippage = .01
        self.fee = .0025
        self._window_size = window_size
        self.current_index = window_size
        self.cash = 5000
        self.position_shares = 0
        self.position_value = 0
        
    def window_size(self):
        return self._window_size

    def scale(self, timeseries):
      df = timeseries.reset_index()  # Reset the index of the DataFrame
      dates = df['Date']
      data_to_scale = df.drop('Date', axis=1)
      self.scaler = MinMaxScaler()
      scaled_data = self.scaler.fit_transform(data_to_scale)
      scaled_df = pd.concat([dates, pd.DataFrame(scaled_data, columns=data_to_scale.columns)], axis=1)
      return scaled_df.set_index('Date')  # Set the index back to 'Date'

    def columns(self):
      return self.timeseries.columns

    def features(self):
      print("features:", self.timeseries.columns, len(self.timeseries.columns))
      return len(self.timeseries.columns)

    def make_ledger_row(self):
      ledger = pd.DataFrame()
      ledger["Date"] = []
      ledger["Side"] = []
      ledger["Action"]  = []
      ledger["Price"] = []
      ledger["Fee"] = []
      return ledger

    def reset(self):
        # Reset the environment and return the first observation
        info("Reset!!!!")
        self.current_index = self.window_size()
        self.cash = 5000
        self.position_shares = 0
        self.position_value = 0
        self.ledger = self.make_ledger_row()
        return self.env_block()

    def step(self, action):
        # Advance the environment by one time step and return the observation, reward, and done flag
        self.current_index += 1
        info("step:", "index:", self.current_index, " of: ", self.final-1, " cash: ", self.cash, " value: ", self.position_value)

        if (self.current_index >= self.final - 1 or self.calc_reward() <= 0):
          info("********MARKING DONE", "index:", self.current_index, " of: ", self.final-1, " cash: ", self.cash, " value: ", self.position_value)
          if (self.current_product != None):
            info("done so closing: ", self.current_product)
            self.close_position()
          reward = self.calc_reward()
          return self.env_block(), reward, True, {}
        else:
          done = False

        # convert action to product and action
       

        if ((action == 1 or action == 0) and self.position_shares != 0):
          info("holding: ")
          self.update_position_value()
          reward = self.calc_reward()
          return self.env_block(), reward, done, {}
        elif (action == -1):
          info("closing: ")
          self.close_position()
          reward = self.calc_reward()
          return self.env_block(), reward, done, {}
        elif (action == 1 and self.position_shares == 0):
          info("opening: ")
          self.open_position()
          reward = self.calc_reward()
          return self.env_block(), reward, done, {}
        else:
          info("invalid")
          reward = self.calc_reward()
          return self.env_block(), reward, done, {}

    def calc_reward(self):
      info("current reward: ", self.position_value + self.cash)
      return self.position_value + self.cash

    def get_price_with_slippage(self, price):
        return price + (price * self.slippage)

    def open_position(self):
      info("opening position. ")
      
      row = self.timeseries[self.current_index,:]
      price = self.get_price_with_slippage(row["Close"])
      self.position_shares = math.floor(self.cash/price)
      self.position_value  = self.position_shares * price
      self.cash = 0
      ledger_row = self.make_ledger_row()
      ledger_row["Side"] = [1]
      ledger_row["Action"] = ["enter"]
      ledger_row["Price"] = [price]
      ledger_row["Fee"] = [self.fee]
      self.ledger = self.ledger.append(ledger_row)


    def close_position(self):
      df = self.timeseries
      row = df.iloc[self.current_index,:]
      price = self.get_price_with_slippage(row["Close"])
      value = price * self.position_shares
      self.position_shares = 0
      self.position_value = 0
      self.cash = value
      ledger_row = self.make_ledger_row()
      ledger_row["Side"] = [1]
      ledger_row["Action"] = ["exit"]
      ledger_row["Price"] = [price]
      ledger_row["Fee"] = [self.cash * self.fee]
      self.ledger = self.ledger.append(ledger_row)
      self.current_product = None

    def update_position_value(self):
      df = self.timeseries
      row = df.iloc[self.current_index,:]
      self.position_value = row["Close"] * self.position_shares
    

    def render(self, mode='human'):
      # Render the environment for human consumption
      pass

    def env_block(self):
      start_index = self.current_index - self.window_size()
      end_index = self.current_index
      df = self.timeseries.copy()
      df = self.timeseries.reset_index().drop(['Date'], axis=1)
      block = df.iloc[start_index:end_index, :].to_numpy()
      print("env shape:", block.shape)
      return block





env = BackTestingEnv("SPY", 90, df)


df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Capital Gains,trend,prob_above_trend
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1993-01-29 00:00:00-05:00,25.236164,25.236164,25.110610,25.218227,1003200,0.0,0.0,0.0,25.291198,0.493642
1993-02-01 00:00:00-05:00,25.236158,25.397583,25.236158,25.397583,480500,0.0,0.0,0.0,25.298910,0.488046
1993-02-02 00:00:00-05:00,25.379630,25.469310,25.325821,25.451374,201300,0.0,0.0,0.0,25.306623,0.462480
1993-02-03 00:00:00-05:00,25.487261,25.738367,25.469324,25.720430,529400,0.0,0.0,0.0,25.314335,0.448744
1993-02-04 00:00:00-05:00,25.810113,25.881857,25.523134,25.828049,531500,0.0,0.0,0.0,25.322047,0.460646
...,...,...,...,...,...,...,...,...,...,...
2023-03-09 00:00:00-05:00,399.739990,401.480011,390.529999,391.559998,111945300,0.0,0.0,0.0,391.459196,0.425697
2023-03-10 00:00:00-05:00,390.989990,393.160004,384.320007,385.910004,189105300,0.0,0.0,0.0,391.503197,0.811838
2023-03-13 00:00:00-04:00,381.809998,390.390015,380.649994,385.359985,157790000,0.0,0.0,0.0,391.547199,0.841458
2023-03-14 00:00:00-04:00,390.500000,393.450012,387.049988,391.730011,149752400,0.0,0.0,0.0,391.591200,0.429554


In [None]:
import math
import os
import tensorflow as tf
from keras.models import Model
from keras.layers import Input, Dense, LSTM, Dropout, BatchNormalization, Attention
from keras.callbacks import EarlyStopping
from keras.optimizers import Adam
from rl.agents.dqn import DQNAgent
from rl.policy import LinearAnnealedPolicy, BoltzmannQPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory
from rl.core import Processor
from rl.callbacks import FileLogger, ModelIntervalCheckpoint
from sklearn.preprocessing import MinMaxScaler


def build_attention_model(seq_length, features, actions):
  # Create the model
  inputs = Input(shape=(seq_length,features))
  x = LSTM(150, return_sequences= True)(inputs)
  x = Dropout(0.3)(x)
  x = BatchNormalization()(x)
  x = LSTM(150, return_sequences=True)(x)
  x = Dropout(0.3)(x)
  x = BatchNormalization()(x)
  x = LSTM(150)(x)
  attention_layer = Attention()([x, x])
  outputs = Dense(actions, activation='linear')(attention_layer)
  model = Model(inputs=inputs, outputs=outputs)
  model.compile(optimizer='adam', loss="mse")
  return model


data_path = '/content/drive/My Drive/ml-trde-notebooks/data'
model_path = "/content/drive/My Drive/ml-trde-notebooks/models"
weights_filename = model_path + '/dqn_trader2_weights.h5f'
midpoint_filename = model_path + "/dqn_trader2_weights_250000.h5f"
checkpoint_weights_filename = model_path + '/dqn_trader2_weights_{step}.h5f'
log_filename = data_path + '/dqn2_{args.env_name}_log.json'

# Define input shape and number of timesteps
input_shape = (env.features())
print("input shape:", input_shape)

nb_actions = len(actions)  # Number of possible actions

model = build_attention_model(env.window_size(), env.features(), nb_actions)

memory = SequentialMemory(limit=1000000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
              target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)]
callbacks += [FileLogger(log_filename, interval=100)]

train = True
if train:
  if os.path.exists(midpoint_filename):
    dqn.model.load_weights(midpoint_filename)
  
  dqn.fit(env, callbacks=callbacks, nb_steps=1750000, visualize=False,  log_interval=10000, verbose=2)

  # After training is done, we save the final weights one more time.
  dqn.save_weights(weights_filename, overwrite=True)

else:
    dqn.load_weights(weights_filename)
    dqn.test(env, nb_episodes=10, visualize=False)



features: Index(['Close', 'Volume', 'trend', 'prob_above_trend'], dtype='object') 4
input shape: 4
features: Index(['Close', 'Volume', 'trend', 'prob_above_trend'], dtype='object') 4


Instructions for updating:
Colocations handled automatically by placer.
  super().__init__(name, **kwargs)


Training for 1750000 steps ...
Reset!!!!
env shape: (90, 4)


ValueError: ignored

In [None]:
env.calc_reward()
