In [None]:
pip install pandas-ta

In [1]:
# Importing all neccassary libraries

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.distributions import Categorical

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import matplotlib.pyplot as plt

In [2]:
import pandas_ta as ta

class TechnicalIndicators: #Used GPT to convert the code into pandas_ta
    def __init__(self, data):
        self.data = data

    def add_momentum_indicators(self):
        # RSI
        self.data['RSI'] = ta.rsi(self.data['Close'], length=14)

        # MACD
        macd = ta.macd(self.data['Close'], fast=12, slow=26, signal=9)
        self.data['MACD'] = macd['MACD_12_26_9']
        self.data['MACD_signal'] = macd['MACDs_12_26_9']
        self.data['MACD_hist'] = macd['MACDh_12_26_9']

        # Stochastic Oscillator
        stoch = ta.stoch(self.data['High'], self.data['Low'], self.data['Close'])
        self.data['Stoch_k'] = stoch['STOCHk_14_3_3']
        self.data['Stoch_d'] = stoch['STOCHd_14_3_3']

    def add_volume_indicators(self):
        # On-Balance Volume (OBV)
        self.data['OBV'] = ta.obv(self.data['Close'], self.data['Volume'])

    def add_volatility_indicators(self):
        # Bollinger Bands (BBANDS)
        bbands = ta.bbands(self.data['Close'], length=20)
        self.data['Upper_BB'] = bbands['BBU_20_2.0']
        self.data['Middle_BB'] = bbands['BBM_20_2.0']
        self.data['Lower_BB'] = bbands['BBL_20_2.0']

        # Average True Range (ATR)
        self.data['ATR_1'] = ta.atr(self.data['High'], self.data['Low'], self.data['Close'], length=1)
        self.data['ATR_2'] = ta.atr(self.data['High'], self.data['Low'], self.data['Close'], length=2)
        self.data['ATR_5'] = ta.atr(self.data['High'], self.data['Low'], self.data['Close'], length=5)
        self.data['ATR_10'] = ta.atr(self.data['High'], self.data['Low'], self.data['Close'], length=10)
        self.data['ATR_20'] = ta.atr(self.data['High'], self.data['Low'], self.data['Close'], length=20)

    def add_trend_indicators(self):
        # Average Directional Index (ADX)
        adx = ta.adx(self.data['High'], self.data['Low'], self.data['Close'], length=14)
        self.data['ADX'] = adx['ADX_14']
        self.data['+DI'] = adx['DMP_14']
        self.data['-DI'] = adx['DMN_14']

        # Commodity Channel Index (CCI)
        self.data['CCI'] = ta.cci(self.data['High'], self.data['Low'], self.data['Close'], length=5)

    def add_other_indicators(self):
        # Daily Log Return (DLR)
        self.data['DLR'] = np.log(self.data['Close'] / self.data['Close'].shift(1))

        # Time-Weighted Average Price (TWAP)
        self.data['TWAP'] = self.data['Close'].expanding().mean()

        # Volume Weighted Average Price (VWAP)
        self.data['VWAP'] = ta.vwap(self.data['High'], self.data['Low'], self.data['Close'], self.data['Volume'])

    def add_all_indicators(self):
        self.add_momentum_indicators()
        self.add_volume_indicators()
        self.add_volatility_indicators()
        self.add_trend_indicators()
        self.add_other_indicators()
        return self.data


In [3]:
from google.colab import drive
drive.mount('/content/drive') #Mounting the drive
file_path = '/content/drive/MyDrive/xnas-itch-20230703.tbbo.csv'
data = pd.read_csv(file_path)

#If you want to upload the dataset without wanting to mount the drive.

# from google.colab import files
# uploaded = files.upload()
# data = pd.read_csv(list(uploaded.keys())[0])

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
data = data.rename(columns=lambda x: x.strip()) #Preprocess the dataset
print(data.columns)

In [None]:
print("Columns :", data.columns) #Checking the columns

data['ts_event'] = pd.to_datetime(data['ts_event']) #Converting the timestamp to datetime

duplicate_times = data['ts_event'].duplicated().sum() #Checking for duplicate timestamps
print(f"Number of duplicate timestamps: {duplicate_times}") #Printing the number of duplicate timestamps

In [8]:
data = data.drop_duplicates(subset=['ts_event']) #Dropping the duplicates

In [10]:
data.set_index('ts_event', inplace=True) #Setting the timestamp as index

In [11]:
pd.set_option('display.max_columns', None)

In [12]:
# # Preprocessing to create necessary columns

data['price']=data['price']/1e9
data['bid_px_00']=data['bid_px_00']/1e9
data['ask_px_00']=data['ask_px_00']/1e9

data['Close'] = data['price']
data['Volume'] = data['size']
data['High'] = data[['bid_px_00', 'ask_px_00']].max(axis=1)
data['Low'] = data[['bid_px_00', 'ask_px_00']].min(axis=1)
data['Open'] = data['Close'].shift(1).fillna(data['Close'])

In [None]:
data.head(10) #Checking the dataset

In [14]:
ti = TechnicalIndicators(data)
# data = ti.add_all_indicators() #Adding the technical indicators
df_with_indicators = ti.add_all_indicators()
market_features_df = df_with_indicators[35:]
df = market_features_df.copy() #Creating a copy of the dataset

In [None]:
print(data.columns) #Checking the columns

In [None]:
data.isna().sum() #Checking for missing values

In [None]:
data.dropna(inplace=True) #Dropping the missing values

In [17]:
df = data.copy() #Creating a copy of the dataset

In [19]:
# A transformer to extract features from the dataset. The lines that is commented out was used for debugging

import torch.nn.functional as F
import torch
import torch.nn as nn

class TransformerFeatureExtractor(nn.Module):
  def __init__(self, input_dim, model_dim, num_heads, num_layers, dropout=0.1):
    super(TransformerFeatureExtractor, self).__init__()
    self.embedding = nn.Linear(input_dim, model_dim)
    self.positional_encoding = nn.Parameter(torch.zeros(1, model_dim))
    transformer_layer = nn.TransformerEncoderLayer(d_model=model_dim, nhead=num_heads, dropout=dropout, batch_first=True)
    self.transformer_layers = nn.Sequential(
        nn.LayerNorm(model_dim),
        nn.Dropout(dropout),
        nn.TransformerEncoder(transformer_layer, num_layers=num_layers)
    )
    self.residual_connection = nn.Identity()
    self.fc = nn.Linear(model_dim, model_dim)
    self.layer_norm = nn.LayerNorm(model_dim)
    self.relu = nn.ReLU()
    self.dropout = nn.Dropout(dropout)
    self.adaptive_pool = nn.AdaptiveAvgPool1d(1)

  def forward(self, x):
    x = self.embedding(x) + self.positional_encoding
    residual = self.residual_connection(x)
    x = self.transformer_layers(x) + residual
    x = x.permute(1, 2, 0)
    x = self.adaptive_pool(x).squeeze(2)
    x = self.fc(x)
    x = self.layer_norm(x)
    x = self.relu(x)
    x = self.dropout(x)
    return x

In [20]:
# PPO policy model for action selection

class PPOPolicyNetwork(nn.Module):
  def __init__(self, input_dim, action_dim):
    super(PPOPolicyNetwork, self).__init__()
    self.fc1 = nn.Linear(input_dim, 128)
    self.fc2 = nn.Linear(128, 64)
    self.fc3 = nn.Linear(64, action_dim)

  def forward(self, x):
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    logits = self.fc3(x)
    return logits

  def get_action(self, x):
    logits = self.forward(x)
    probs = F.softmax(logits, dim=-1)
    action_distribution = Categorical(probs)
    action = action_distribution.sample()
    return action, action_distribution.log_prob(action)


In [21]:
# Hybrid model combining Transformer and PPO

class HybridModel(nn.Module):
  def __init__(self, transformer, ppo_policy):
    super(HybridModel, self).__init__()
    self.transformer = transformer
    self.ppo_policy = ppo_policy

  def forward(self, x):
    features = self.transformer(x)
    action, log_prob = self.ppo_policy.get_action(features)
    return action, log_prob


In [22]:
# Trading Environment provided in the ipynb notebook

import gym
from gym import spaces
import numpy as np

class TradingEnvironment(gym.Env):
  metadata = {'render.modes': ['human']}

  def __init__(self, data, daily_trading_limit):
    super(TradingEnvironment, self).__init__()
    self.data = market_features_df
    self.daily_trading_limit = daily_trading_limit
    self.current_step = 0

    # Extract state columns
    self.state_columns = ['Close', 'Volume', 'RSI', 'MACD', 'MACD_signal', 'MACD_hist', 'Stoch_k', 'Stoch_d',
                          'OBV', 'Upper_BB', 'Middle_BB', 'Lower_BB', 'ATR_1', 'ADX', '+DI', '-DI', 'CCI']

    # Initialize balance, shares held, and total shares traded
    self.balance = 10_000_000.0  # $10 million
    self.shares_held = 0
    self.total_shares_traded = 0
    self.trades = []
    self.cumulative_reward = 0

    # Define action space: [Hold, Buy, Sell]
    self.action_space = spaces.Discrete(3)

    # Define observation space based on state columns
    self.observation_space = spaces.Box(
        low=-np.inf, high=np.inf, shape=(len(self.state_columns),), dtype=np.float32
    )
  # Normalzing the input
  def normalize_inputs(self, inputs):
    mean = np.mean(inputs, axis=0)
    std = np.std(inputs, axis=0)
    return (inputs - mean) / (std + 1e-8)

  #Reset of the enviroinment
  def reset(self):
    self.current_step = 0
    self.balance = 10_000_000.0
    self.shares_held = 0
    self.total_shares_traded = 0
    self.cumulative_reward = 0
    self.trades = []
    self.state = self.normalize_inputs(self._next_observation())
    return self.state

  #Next observation
  def _next_observation(self):
    raw_state = self.data[self.state_columns].iloc[self.current_step].values
    normalized_state = self.normalize_inputs(raw_state)
    return normalized_state

  #Step of the enviroinment
  def step(self, action):
    expected_price = self.data.iloc[self.current_step]['ask_px_00']
    actual_price = self.data.iloc[self.current_step]['price']
    transaction_time = self.data.iloc[self.current_step]['ts_in_delta']
    self._take_action(action)
    reward = 0

    if self.current_step >= len(self.data) - 1:
      self.current_step = 0
    if action != 0:
      transaction_cost= self._calculate_transaction_cost(self.data.iloc[self.current_step]['Volume'], 0.3, self.data['Volume'].mean())
      reward = self._calculate_reward(expected_price, actual_price, transaction_time, transaction_cost)
      self.cumulative_reward += reward
      print(f"Step: {self.current_step}, Action: {action}, Expected Price: {expected_price}, Actual Price: {actual_price}, Reward: {reward}")
      if self.trades:
        self.trades[-1]['reward'] = reward
        self.trades[-1]['transaction_cost'] = transaction_cost
        self.trades[-1]['slippage'] = expected_price - actual_price
        self.trades[-1]['time_penalty'] = 100*transaction_time/1e9
    done = self.current_step == len(self.data) - 1
    obs = self._next_observation()
    info = {
    'step': self.current_step,
    'action': action,
    'price': actual_price,
    'shares': self.trades[-1]['shares'] if self.trades else 0
}
    self.current_step += 1
    reward *= 1e-6  # Scaling the reward

    return obs, reward, done, info

  #Take action
  def _take_action(self, action):
    current_price = self.data.iloc[self.current_step]['Close']

    # Access the timestamp from the index
    current_time = self.data.index[self.current_step]

    trade_info = {
        'step': self.current_step,
        'timestamp': current_time,
        'action': action,
        'price': current_price,
        'shares': 0,
        'reward': 0,
        'transaction_cost': 0,
        'slippage': 0,
        'time_penalty': 0
    }

    if action == 1:  # Buy
      shares_bought = (self.balance * np.random.uniform(0.001, 0.005)) // current_price
      self.balance -= shares_bought * current_price
      self.shares_held += shares_bought
      self.total_shares_traded += shares_bought
      trade_info['shares'] = shares_bought
      if shares_bought > 0:
          self.trades.append(trade_info)
    elif action == 2:  # Sell
      shares_sold = min((self.balance * np.random.uniform(0.001, 0.005)) // current_price, self.shares_held)
      self.balance += shares_sold * current_price
      self.shares_held -= shares_sold
      self.total_shares_traded -= shares_sold
      trade_info['shares'] = shares_sold
      if shares_sold > 0:
          self.trades.append(trade_info)

  def _calculate_reward(self, expected_price, actual_price, transaction_time, transaction_cost):
    slippage = expected_price - actual_price
    time_penalty = 100 * transaction_time / 1e9  # Scaling the time penalty
    reward = - (slippage + time_penalty + transaction_cost)
    return reward


  def _calculate_transaction_cost(self, volume, volatility, daily_volume):
    volume = np.abs(volume)  # Ensure volume is positive
    daily_volume = np.abs(daily_volume)
    volume_ratio = np.maximum(volume / daily_volume, 0)
    transaction_cost = volatility * np.sqrt(volume_ratio)

    return transaction_cost


  def get_state(self):
    return self._next_observation()

  def run(self):
    self.reset()
    for _ in range(len(self.data)):
        self.step()
    return self.cumulative_reward, self.trades

  #Render of the enviroinment
  def render(self, mode='human', close=False):
    print(f'Step: {self.current_step}')
    print(f'Balance: {self.balance}')
    print(f'Shares held: {self.shares_held}')
    print(f'Total shares traded: {self.total_shares_traded}')
    print(f'Total portfolio value: {self.balance + self.shares_held * self.data.iloc[self.current_step]["Close"]}')
    print(f'Cumulative reward: {self.cumulative_reward}')
    self.print_trades()

  #Print trades
  def print_trades(self):
      # download all trades in a pandas dataframe using .csv
      trades_df = pd.DataFrame(self.trades)
      # Save a csv
      trades_df.to_csv('trades_ppo.csv', index=False)
      for trade in self.trades:
          print(f"Step: {trade['step']}, Timestamp: {trade['timestamp']}, Action: {trade['action']}, Price: {trade['price']}, Shares: {trade['shares']}, Reward: {trade['reward']}, Transaction Cost: {trade['transaction_cost']}, Slippage: {trade['slippage']}, Time Penalty: {trade['time_penalty']}")

      #The trades will be downloaded and can be viewed as a CSV file

In [62]:
#The Training Class for training the hybrid model

import torch.optim as optim
import torch.nn as nn
from torch.optim.lr_scheduler import StepLR

class PPOTrainer:
  def __init__(self, hybrid_model, environment, lr=0.00013, batch_size=64, reward_scale=1.0, gradient_clip_value=0.5326, entropy_coeff=0.1):
    self.hybrid_model = hybrid_model
    self.environment = environment
    self.lr = lr
    self.batch_size = batch_size
    self.reward_scale = reward_scale
    self.gradient_clip_value = gradient_clip_value
    self.entropy_coeff = entropy_coeff
    self.rewards = []
    self.log_probs = []
    self.values = []
    self.actions = []
    self.mse_loss = torch.nn.MSELoss()

    self.optimizer = optim.Adam(self.hybrid_model.parameters(), lr=self.lr, weight_decay=1e-4)  # Adjusted weight decay
    self.scheduler = optim.lr_scheduler.StepLR(self.optimizer, step_size=100, gamma=0.9)

    # Initialize weights
  def init_weights(self, m):
    if isinstance(m, nn.Linear):
      nn.init.xavier_uniform_(m.weight)
      if m.bias is not None:
        nn.init.zeros_(m.bias)

  def train_hybrid_model(self, num_epochs):
    for epoch in range(num_epochs):
        total_loss = 0
        states, actions, rewards, log_probs = [], [], [], []

        for t in range(len(self.environment.data) - 1):
          state = self.environment.get_state()
          state_tensor = torch.FloatTensor(state).unsqueeze(0).unsqueeze(1)

          features = self.hybrid_model.transformer(state_tensor)  # Transformer features

          action, log_prob = self.hybrid_model(state_tensor)  # PPO action

          action, log_prob = self.hybrid_model(state_tensor)  # PPO action
          action = action.item()

          next_state, reward, done, _ = self.environment.step(action)
          reward *= self.reward_scale  # Scaling the reward

          states.append(state_tensor)
          actions.append(action)
          rewards.append(reward)
          log_probs.append(log_prob)

          if len(states) == self.batch_size or t == len(self.environment.data) - 2:
            # Calculate advantage
            advantages = self.compute_advantages(rewards)
            loss = self.compute_loss(advantages, log_probs)

            # Backpropagation
            self.optimizer.zero_grad()
            loss.backward()
            nn.utils.clip_grad_norm_(self.hybrid_model.parameters(), self.gradient_clip_value)
            self.optimizer.step()

            # Learning rate adjustment
            self.scheduler.step()

            total_loss += loss.item()
            states, actions, rewards, log_probs = [], [], [], []

            if done:
              break

        # Log the total loss for the epoch
        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {total_loss}")

  def compute_advantages(self, rewards):
    rewards = np.array(rewards)
    advantages = rewards - np.mean(rewards)
    advantages /= (np.std(rewards) + 1e-8)  # Normalize to reduce variance
    return torch.FloatTensor(advantages)

  def compute_loss(self, advantages, log_probs):
    advantages = torch.FloatTensor(advantages)
    log_probs = torch.cat(log_probs)
    policy_loss = -advantages * log_probs
    entropy = -log_probs.mean()  # Entropy regularization

    # Calculate the total loss
    loss = policy_loss.mean() + 0.01 * entropy # Entropy regularization to encourage exploration
    print("Loss:", loss.item())  # Print the loss for debugging

    return loss

  def calculate_returns(self, rewards, gamma):
      R = 0
      returns = []
      for r in reversed(rewards):
        R = r + gamma * R
        returns.insert(0, R)
      return returns

  def update_model(self, hybrid_model, reward, action, log_prob, done):
    # Store the reward, log probability of the action, and action itself
    self.rewards.append(reward)
    self.log_probs.append(log_prob)
    self.actions.append(action)

    if done:
      rewards_tensor = torch.tensor(self.rewards, dtype=torch.float32)  # Convert rewards to tensor
      actions_tensor = torch.tensor(self.actions, dtype=torch.int64)
      log_probs_tensor = torch.cat(self.log_probs)
      loss = self.calculate_loss(rewards_tensor, actions_tensor, log_probs_tensor)
      self.optimizer.zero_grad()
      loss.backward()  # Backpropagation
      nn.utils.clip_grad_norm_(self.hybrid_model.parameters(), self.gradient_clip_value)  # Gradient clipping
      self.optimizer.step()

      self.rewards = []
      self.actions = []
      self.log_probs = []

      return loss.item()

    return None


In [None]:
numeric_columns = market_features_df.select_dtypes(include=[np.number]).columns
print(market_features_df[numeric_columns].isnull().sum())

In [None]:
market_features_df['CCI'] = market_features_df['CCI'].fillna(market_features_df['CCI'].mean())  # Fill missing values with the mean

In [None]:
data.head(10)  #Checking the dataset

In [64]:
input_dim = 17  # Number of input features from the data
transformer = TransformerFeatureExtractor(input_dim=input_dim, model_dim=64, num_heads=32, num_layers=24)
ppo_policy = PPOPolicyNetwork(input_dim=64, action_dim=3)
hybrid_model = HybridModel(transformer, ppo_policy)
environment = TradingEnvironment(market_features_df, daily_trading_limit=100000)
ppo_trainer = PPOTrainer(hybrid_model=hybrid_model, environment=environment, lr=0.001329)
hybrid_model.apply(ppo_trainer.init_weights)

num_epochs = 10 #ncrease the number of epochs

for epoch in range(num_epochs):
  state = environment.reset()
  done = False
  total_reward = 0

  while not done:

    state_tensor = torch.tensor(state, dtype=torch.float32).unsqueeze(0).unsqueeze(1)
    action, log_prob = hybrid_model(state_tensor)
    action = action.item()
    next_state, reward, done, info = environment.step(action)
    total_reward += reward
    loss = ppo_trainer.update_model(hybrid_model, reward, action, log_prob, done)
    if loss is not None:
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss}, Total Reward: {total_reward}")

    state = next_state


In [27]:
#Preporcesssing for Evauation. Taken from the provided ipynb notebook

from scipy.stats import norm

INITIAL_CASH = 10_000_000  # $10 million

# Data Preprocessing and RSI Calculation Functions
def preprocess_data(data):
    data['liquidity'] = data['bid_sz_00'] * data['bid_px_00'] + data['ask_sz_00'] * data['ask_px_00']
    return data

def calculate_rsi(data, window=14):
  delta = data.diff()
  gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
  loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
  rs = gain / (loss + 1e-8)  # Adding a small epsilon to avoid division by zero
  rsi = 100 - (100 / (1 + rs))
  return rsi

def calculate_vol_and_liquidity(price_df, volume_df, window_size):
  rolling_mean_vol = price_df.pct_change().rolling(window=window_size).mean()
  rolling_std_vol = price_df.pct_change().rolling(window=window_size).std()
  rolling_mean_liq = volume_df.rolling(window=window_size).mean()
  rolling_std_liq = volume_df.rolling(window=window_size).std()
  return rolling_mean_vol, rolling_std_vol, rolling_mean_liq, rolling_std_liq

def get_percentile(current_value, mean, std):
  if std > 0:
      z_score = (current_value - mean) / (std + 1e-8)  # Add a small epsilon to avoid division by zero
      percentile = norm.cdf(z_score)
  else:
      percentile = 0.5  # No variation
  return percentile

def get_trade_price(base_price, current_vol, current_liq, mean_vol, std_vol, mean_liq, std_liq, trade_direction):
  vol_percentile = get_percentile(current_vol, mean_vol, std_vol)
  liq_percentile = get_percentile(current_liq, mean_liq, std_liq)

  if vol_percentile >= 0.9 and liq_percentile < 0.1:
      price_adjustment_percent = np.random.uniform(-0.25, -0.15)
  elif vol_percentile <= 0.1 and liq_percentile < 0.1:
      price_adjustment_percent = np.random.uniform(-0.10, -0.05)
  elif vol_percentile >= 0.9 and liq_percentile >= 0.9:
      price_adjustment_percent = np.random.uniform(-0.05, +0.10)
  else:
      price_adjustment_percent = np.random.uniform(-0.05, +0.05)  # Default for normal conditions

  if trade_direction == 'BUY':
      adjusted_price = base_price * (1 - price_adjustment_percent)
  else:  # SELL
      adjusted_price = base_price * (1 + price_adjustment_percent)

  return adjusted_price

In [59]:
#Trading environment for the blotter taken from the ipynb notebook

class TradingEnvironmentwithBlotter:
  def __init__(self, data, daily_trading_limit, window_size, hybrid_model):
    self.data = preprocess_data(data)
    self.daily_trading_limit = daily_trading_limit
    self.window_size = window_size
    self.hybrid_model = hybrid_model
    self.state_columns = ['price', 'liquidity', 'RSI', 'MACD', 'MACD_signal', 'MACD_hist', 'Stoch_k', 'Stoch_d',
                            'OBV', 'Upper_BB', 'Middle_BB', 'Lower_BB', 'ATR_1', 'ADX', '+DI', '-DI', 'CCI']
    self.reset()

  def reset(self):
    self.current_step = 0
    self.balance = INITIAL_CASH
    self.shares_held = 0
    self.total_shares_traded = 0
    self.cumulative_reward = 0
    self.trades = []
    self.portfolio = {'cash': self.balance, 'holdings': {ticker: 0 for ticker in self.data['symbol'].unique()}}
    self.data['RSI'] = calculate_rsi(self.data['price'])
    self.data['pct_change'] = self.data['price'].pct_change()
    self.data['rolling_mean_vol'], self.data['rolling_std_vol'], self.data['rolling_mean_liq'], self.data['rolling_std_liq'] = calculate_vol_and_liquidity(self.data['price'], self.data['liquidity'], self.window_size)


  def step(self):
    row = self.data.iloc[self.current_step]
    current_price = row['price']
    current_time = pd.to_datetime(row['ts_recv'])
    current_rsi = row['RSI']
    current_vol = row['pct_change']
    current_liq = row['liquidity']
    mean_vol = row['rolling_mean_vol']
    std_vol = row['rolling_std_vol']
    mean_liq = row['rolling_mean_liq']
    std_liq = row['rolling_std_liq']

    if current_rsi < 30:  # Entry signal based on RSI
        trade_direction = 'BUY'
        trade_price = get_trade_price(current_price, current_vol, current_liq, mean_vol, std_vol, mean_liq, std_liq, trade_direction)
        trade_size = (self.portfolio['cash'] * np.random.uniform(0.001, 0.005)) / trade_price
        if self.portfolio['cash'] >= trade_size * trade_price:
            self.portfolio['cash'] -= trade_size * trade_price
            self.portfolio['holdings'][row['symbol']] += trade_size
            trade_status = 'filled'
        else:
            trade_status = 'cancelled'
    elif current_rsi > 70:  # Exit signal based on RSI
        trade_direction = 'SELL'
        if self.portfolio['holdings'][row['symbol']] > 0:
            trade_size = min(self.portfolio['holdings'][row['symbol']], self.portfolio['cash']*np.random.uniform(0.001, 0.005) / current_price)
            trade_price = get_trade_price(current_price, current_vol, current_liq, mean_vol, std_vol, mean_liq, std_liq, trade_direction)
            self.portfolio['cash'] += trade_size * trade_price
            self.portfolio['holdings'][row['symbol']] -= trade_size
            trade_status = 'filled'
        else:
            trade_size = 0
            trade_status = 'cancelled'
    else:
        trade_direction = 'HOLD'
        trade_size = 0
        trade_price = current_price
        trade_status = 'skipped'

    if trade_size > 0:
        expected_price = row['ask_px_00']
        actual_price = row['price']
        transaction_time = row['ts_in_delta']
        transaction_cost = self._calculate_transaction_cost(row['Volume'], 0.3, self.data['Volume'].mean())
        slippage = expected_price - actual_price
        time_penalty = 1000 * transaction_time / 1e9
        reward = - (slippage + time_penalty + transaction_cost)

        self.cumulative_reward += reward
        self.trades.append({
            'step': self.current_step,
            'timestamp': current_time,
            'action': trade_direction,
            'price': trade_price,
            'shares': trade_size,
            'symbol': row['symbol'],
            'reward': reward,
            'transaction_cost': transaction_cost,
            'slippage': slippage,
            'time_penalty': time_penalty
        })

    self.current_step += 1
    if self.current_step >= len(self.data) - 1:
        done=True
        self.current_step = 0

  def _calculate_transaction_cost(self, volume, volatility, daily_volume):
    epsilon = 1e-8
    safe_daily_volume = max(daily_volume, epsilon)
    transaction_cost = volatility * np.sqrt(volume / daily_volume)

    return transaction_cost

  def run(self):
    self.reset()
    for _ in range(len(self.data)):
        self.step()
    return self.cumulative_reward, self.trades


  def render(self):
    print(f'Cumulative reward: {self.cumulative_reward}')
    row = self.data.iloc[self.current_step]
    total_portfolio_value = self.portfolio["cash"] + self.portfolio["holdings"][row["symbol"]] * row["price"]
    print(f'Total portfolio value: {total_portfolio_value}')
    trades_df = pd.DataFrame(self.trades)

    # Save the trades to a CSV file
    trades_df.to_csv('trades_blotter.csv', index=False)

    # Print the details of each trade
    for trade in self.trades:
        print(f"Step: {trade['step']}, Action: {trade['action']}, "
              f"Price: {trade['price']}, Shares: {trade['shares']}, Symbol: {trade['symbol']}, "
              f"Reward: {trade['reward']}, Transaction Cost: {trade['transaction_cost']}, "
              f"Slippage: {trade['slippage']}, Time Penalty: {trade['time_penalty']}")


In [60]:
ticker = 'AAPL'
ticker_data = market_features_df[market_features_df['symbol'] == ticker]

window_size = 60
daily_trading_limit = 1000
#USE IF YOU DID NOT RUN THE PREVIOUS TRAINING PROGRAM

# input_dim = 17
# transformer = TransformerFeatureExtractor(input_dim=input_dim, model_dim=64, num_heads=32, num_layers=24)
# ppo_policy = PPOPolicyNetwork(input_dim=64, action_dim=3)

# hybrid_model = HybridModel(transformer, ppo_policy)

env = TradingEnvironmentwithBlotter(ticker_data, daily_trading_limit=1000, window_size=window_size, hybrid_model=hybrid_model)

cumulative_reward, trades = env.run()
env.render()



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Step: 20919, Action: BUY, Price: 185.79329980734067, Shares: 3.2310369804328953, Symbol: AAPL, Reward: -0.3263784015986263, Transaction Cost: 0.15200440159863537, Slippage: 0.009999999999990905, Time Penalty: 0.164374
Step: 20929, Action: SELL, Price: 203.65262170085046, Shares: 4.32149518452912, Symbol: AAPL, Reward: -0.5937421903265601, Transaction Cost: 0.42452519032656, Slippage: 0.0, Time Penalty: 0.169217
Step: 20939, Action: SELL, Price: 196.02497105007538, Shares: 5.158908238115961, Symbol: AAPL, Reward: -0.3798236862811968, Transaction Cost: 0.21496668628119675, Slippage: 0.0, Time Penalty: 0.164857
Step: 20940, Action: SELL, Price: 186.6641213908406, Shares: 1.2584475083851778, Symbol: AAPL, Reward: -0.3812596325306775, Transaction Cost: 0.21171763253068204, Slippage: 0.0049999999999954525, Time Penalty: 0.164542
Step: 20941, Action: SELL, Price: 189.67810261702522, Shares: 3.399707835855961, Symbol: AAPL, Rewar

In [50]:
data.head(10)

  and should_run_async(code)


Unnamed: 0_level_0,ts_recv,rtype,publisher_id,instrument_id,action,side,depth,price,size,flags,ts_in_delta,sequence,bid_px_00,ask_px_00,bid_sz_00,ask_sz_00,bid_ct_00,ask_ct_00,symbol,Close,Volume,High,Low,Open,RSI,MACD,MACD_signal,MACD_hist,Stoch_k,Stoch_d,OBV,Upper_BB,Middle_BB,Lower_BB,ATR_1,ATR_2,ATR_5,ATR_10,ATR_20,ADX,+DI,-DI,CCI,DLR,TWAP,VWAP
ts_event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1
2023-07-03 08:00:14.385893078,1688371214386057385,1,2,32,T,N,0,194.05,50,130,164307,326232,194.0,194.3,3101,19,4,10,AAPL,194.05,50,194.3,194.0,194.05,63.903099,0.006486,-0.001681,0.008167,52.525253,61.952862,-266.0,194.068104,194.015,193.961896,0.3,0.175078,0.098707,0.074095,0.068133,97.139167,34.06901,0.175986,166.666667,0.0,194.021176,194.01736
2023-07-03 08:00:14.385899379,1688371214386063777,1,2,32,T,N,0,194.05,50,130,164398,326233,194.0,194.3,3101,19,4,10,AAPL,194.05,50,194.3,194.0,194.05,63.903099,0.007202,9.5e-05,0.007107,38.383838,52.525253,-266.0,194.069951,194.019,193.968049,0.3,0.237539,0.138986,0.097331,0.082183,97.305096,25.003344,0.129157,83.333333,0.0,194.022,194.019916
2023-07-03 08:00:15.804687301,1688371215804852019,1,2,32,T,B,0,194.21,10,130,164718,328131,194.0,194.21,3101,29,4,1,AAPL,194.21,10,194.21,194.0,194.05,90.057,0.020445,0.004165,0.016279,40.40404,43.771044,-256.0,194.126125,194.03,193.933875,0.21,0.22377,0.153194,0.108888,0.089847,97.453177,20.825771,0.107577,79.268293,0.000824,194.027222,194.020531
2023-07-03 08:00:19.671312224,1688371219671476629,1,2,32,T,N,0,194.14,10,130,164405,331406,194.0,194.16,3101,400,4,1,AAPL,194.14,10,194.16,194.0,194.21,67.13792,0.025003,0.008333,0.01667,49.494949,42.760943,-266.0,194.142928,194.0375,193.932072,0.21,0.216885,0.164559,0.119232,0.09698,97.585797,17.649962,0.091172,-3.205128,-0.00036,194.03027,194.020936
2023-07-03 08:00:23.368671235,1688371223368835585,1,2,32,T,B,0,194.13,10,130,164350,334235,194.0,194.13,3101,400,4,1,AAPL,194.13,10,194.13,194.0,194.14,64.608304,0.027491,0.012165,0.015327,57.575758,49.158249,-276.0,194.155247,194.044,193.932753,0.14,0.178442,0.159646,0.121352,0.09951,97.704947,15.908276,0.082175,-113.095238,-5.2e-05,194.032895,194.021269
2023-07-03 08:00:29.849775570,1688371229849940201,1,2,32,T,B,0,194.13,100,130,164631,339055,194.01,194.13,101,390,2,1,AAPL,194.13,100,194.13,194.01,194.13,64.608304,0.029128,0.015557,0.013571,49.494949,52.188552,-276.0,194.165821,194.0505,193.935179,0.12,0.149221,0.151715,0.121214,0.100705,97.812299,14.580187,0.075315,-64.102564,0.0,194.035385,194.024584
2023-07-03 08:00:30.451005195,1688371230451172473,1,2,32,T,N,0,194.02,10,0,167278,339547,194.01,194.16,101,400,2,1,AAPL,194.02,10,194.16,194.01,194.13,43.634446,0.021303,0.016706,0.004597,34.545455,47.205387,-286.0,194.16539,194.0515,193.93761,0.15,0.149611,0.151372,0.124141,0.103555,97.920694,15.127743,0.067706,-113.425926,-0.000567,194.035,194.02477
2023-07-03 08:00:30.451829005,1688371230451995982,1,2,32,T,A,0,194.0,3075,0,166977,339553,194.0,194.16,3119,400,5,1,AAPL,194.0,3075,194.16,194.0,194.02,41.026625,0.013334,0.016032,-0.002698,18.383838,34.141414,-3361.0,194.16539,194.0515,193.93761,0.16,0.154805,0.153098,0.12778,0.106793,97.269584,13.554677,0.710575,-103.825137,-0.000103,194.034146,194.041799
2023-07-03 08:00:30.566381995,1688371230566546422,1,2,32,T,N,0,194.09,10,130,164427,339623,194.0,194.16,19,400,2,1,AAPL,194.09,10,194.16,194.0,194.0,54.271135,0.014119,0.015649,-0.001531,12.222222,21.717172,-3351.0,194.168499,194.056,193.943501,0.16,0.157403,0.154478,0.131046,0.109824,96.67885,12.189628,0.639015,39.215686,0.000464,194.035476,194.041879
2023-07-03 08:00:37.857944791,1688371237858109689,1,2,32,T,B,0,194.12,10,130,164898,346965,194.02,194.12,80,400,1,1,AAPL,194.12,10,194.12,194.02,194.09,57.682756,0.016965,0.015913,0.001053,23.333333,17.979798,-3341.0,194.174712,194.062,193.949288,0.1,0.128701,0.143582,0.127904,0.109268,96.141878,11.415825,0.59845,55.555556,0.000155,194.037442,194.041965
