### PPO AGENT:

#### Load the dataset and preprocess the dataframe in the required format.

Technical indicators created for the environment include:

- RSI
- MACD
- Stoch_k
- OBV
- Upper_BB
- ATR_1
- ATR_2
- ATR_5
- ATR_10
- ATR_20

In [1]:
import pandas as pd
import numpy as np
import talib as ta

class TechnicalIndicators:
    def __init__(self, data):
        self.data = data

    def add_momentum_indicators(self):
        self.data['RSI'] = ta.RSI(self.data['Close'], timeperiod=14)
        self.data['MACD'], self.data['MACD_signal'], self.data['MACD_hist'] = ta.MACD(self.data['Close'], fastperiod=12, slowperiod=26, signalperiod=9)
        self.data['Stoch_k'], self.data['Stoch_d'] = ta.STOCH(self.data['High'], self.data['Low'], self.data['Close'],
                                                              fastk_period=14, slowk_period=3, slowd_period=3)

    def add_volume_indicators(self):
        self.data['OBV'] = ta.OBV(self.data['Close'], self.data['Volume'])

    def add_volatility_indicators(self):
        self.data['Upper_BB'], self.data['Middle_BB'], self.data['Lower_BB'] = ta.BBANDS(self.data['Close'], timeperiod=20)
        self.data['ATR_1'] = ta.ATR(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=1)
        self.data['ATR_2'] = ta.ATR(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=2)
        self.data['ATR_5'] = ta.ATR(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=5)
        self.data['ATR_10'] = ta.ATR(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=10)
        self.data['ATR_20'] = ta.ATR(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=20)

    def add_trend_indicators(self):
        self.data['ADX'] = ta.ADX(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=14)
        self.data['+DI'] = ta.PLUS_DI(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=14)
        self.data['-DI'] = ta.MINUS_DI(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=14)
        self.data['CCI'] = ta.CCI(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=5)

    def add_other_indicators(self):
        self.data['DLR'] = np.log(self.data['Close'] / self.data['Close'].shift(1))
        self.data['TWAP'] = self.data['Close'].expanding().mean()
        self.data['VWAP'] = (self.data['Volume'] * (self.data['High'] + self.data['Low']) / 2).cumsum() / self.data['Volume'].cumsum()

    def add_all_indicators(self):
        self.add_momentum_indicators()
        self.add_volume_indicators()
        self.add_volatility_indicators()
        self.add_trend_indicators()
        self.add_other_indicators()
        return self.data

In [2]:
data = pd.read_csv('xnas-itch-20230703.tbbo.csv')

# Preprocessing to create necessary columns
data['price']=data['price']/1e9
data['bid_px_00']=data['bid_px_00']/1e9
data['ask_px_00']=data['ask_px_00']/1e9

data['Close'] = data['price']
data['Volume'] = data['size']
data['High'] = data[['bid_px_00', 'ask_px_00']].max(axis=1)
data['Low'] = data[['bid_px_00', 'ask_px_00']].min(axis=1)
data['Open'] = data['Close'].shift(1).fillna(data['Close'])


ti = TechnicalIndicators(data)
df_with_indicators = ti.add_all_indicators()
market_features_df = df_with_indicators

### Arrival cost

In [3]:
def arrival_cost_calculation(data):
    arrival_price = (data['bid_px_00'] + data['ask_px_00']) / 2
    execution_price = data['price']

    if data['side'] == 'B':
        return 100 * ((execution_price - arrival_price) / arrival_price)
    
    elif data['side'] == 'A':
        return 100 * ((arrival_price - execution_price) / arrival_price)
    
    else:
        return np.nan

data['arrival_cost (%)'] = data.apply(arrival_cost_calculation, axis=1)

### Half-spread cost

In [4]:
data['half-spread_cost (%)'] = ((data['ask_px_00'] - data['bid_px_00']) / 2) / 100

### Weighted mid-price

In [5]:
imbalance = data['bid_sz_00']/(data['bid_sz_00'] + data['ask_sz_00'])
data['weighted_mid_price'] = imbalance * data['ask_px_00'] + (1 - imbalance) * data['bid_px_00']

### Bid-ask bounce

This feature measures where the trade price falls within the bid-ask spread.
Values closer to 0 or 1 may indicate higher urgency and potentially higher slippage.

In [6]:
data['bid_ask_bounce'] = (data['price'] - data['bid_px_00']) / (data['ask_px_00'] - data['bid_px_00'])

### Momentum

In [7]:
data['price_momentum'] = (data['Close'] - data['Close'].shift(10)) / data['Close'].shift(10)

### Price impact ratio

In [8]:
previous_close = data['Close'].shift(1)
data['price_impact_ratio'] = abs(data['price'] - previous_close) / ((data['bid_px_00'] + data['ask_px_00']) / 2)

### Liquidity consumption rate

This measures how quickly available liquidity is being consumed by recent trades. High consumption rates may indicate potential for higher slippage due to depleting liquidity.

In [9]:
data['liquidity_consumption_rate'] = data['size'].rolling(window=10).sum() / (data['bid_sz_00'] + data['ask_sz_00'])

### Time between trades

Measures the time between consecutive trades, which can indicate trading intensity. Can help identify periods of high or low trading activity.

In [10]:
data['TTT'] = data['ts_event'].diff()

Checking the dataset:

In [11]:
# Show all columns in pandas
pd.set_option('display.max_columns', None)

market_features_df

Unnamed: 0,ts_recv,ts_event,rtype,publisher_id,instrument_id,action,side,depth,price,size,flags,ts_in_delta,sequence,bid_px_00,ask_px_00,bid_sz_00,ask_sz_00,bid_ct_00,ask_ct_00,symbol,Close,Volume,High,Low,Open,RSI,MACD,MACD_signal,MACD_hist,Stoch_k,Stoch_d,OBV,Upper_BB,Middle_BB,Lower_BB,ATR_1,ATR_2,ATR_5,ATR_10,ATR_20,ADX,+DI,-DI,CCI,DLR,TWAP,VWAP,arrival_cost (%),half-spread_cost (%),weighted_mid_price,bid_ask_bounce,price_momentum,price_impact_ratio,liquidity_consumption_rate,TTT
0,1688371200660869841,1688371200660704717,1,2,32,T,B,0,194.12,1,130,165124,303634,193.63,194.12,27,27,1,1,AAPL,194.12,1,194.12,193.63,194.12,,,,,,,1.0,,,,,,,,,,,,,,194.120000,193.875000,0.126370,0.00245,193.875000,1.0,,,,
1,1688371201201402566,1688371201201237816,1,2,32,T,B,0,194.11,2,130,164750,304724,193.90,194.11,5,400,1,1,AAPL,194.11,2,194.11,193.90,194.12,,,,,,,-1.0,,,,0.22,,,,,,,,,-0.000052,194.115000,193.961667,0.054122,0.00105,193.902593,1.0,,0.000052,,5.405331e+08
2,1688371201233688992,1688371201233524761,1,2,32,T,B,0,194.11,8,130,164231,304850,193.90,194.11,5,398,1,1,AAPL,194.11,8,194.11,193.90,194.11,,,,,,,-1.0,,,,0.21,0.215000,,,,,,,,0.000000,194.113333,193.993182,0.054122,0.00105,193.902605,1.0,,0.000000,,3.228694e+07
3,1688371201317556361,1688371201317392163,1,2,32,T,B,0,194.11,2,130,164198,305101,193.90,194.11,5,390,1,1,AAPL,194.11,2,194.11,193.90,194.11,,,,,,,-1.0,,,,0.21,0.212500,,,,,,,,0.000000,194.112500,193.995000,0.054122,0.00105,193.902658,1.0,,0.000000,,8.386740e+07
4,1688371201478520666,1688371201478356044,1,2,32,T,B,0,194.00,7,130,164622,306430,193.90,194.00,5,200,1,1,AAPL,194.00,7,194.00,193.90,194.11,,,,,,,-8.0,,,,0.21,0.211250,,,,,,,-74.468085,-0.000567,194.090000,193.979250,0.025780,0.00050,193.902439,1.0,,0.000567,,1.609639e+08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59266,1688417954514485218,1688417954514320323,1,2,32,T,B,0,192.44,6,130,164895,252532002,192.40,192.44,40,7,1,1,AAPL,192.44,6,192.44,192.40,192.40,63.061829,0.005002,0.006368,-0.001366,33.333333,22.222222,913234.0,192.444977,192.4175,192.390023,0.04,0.034997,0.031258,0.028636,0.027592,15.474528,6.612534,3.765166,118.055556,0.000208,192.701135,192.722211,0.010394,0.00020,192.434043,1.0,0.000052,0.000208,1.063830,4.510372e+10
59267,1688417961020718430,1688417961020553920,1,2,32,T,B,0,192.44,1,130,164510,252532102,192.40,192.44,40,1,1,1,AAPL,192.44,1,192.44,192.40,192.44,63.061829,0.006355,0.006365,-0.000011,66.666667,33.333333,913234.0,192.447411,192.4195,192.391589,0.04,0.037498,0.033006,0.029772,0.028212,16.329018,5.953254,3.389772,83.333333,0.000000,192.701131,192.722211,0.010394,0.00020,192.439024,1.0,0.000052,0.000000,1.097561,6.506234e+09
59268,1688417973297905504,1688417973297741235,1,2,32,T,A,0,192.40,5,130,164269,252532347,192.40,192.46,40,6,1,1,AAPL,192.40,5,192.46,192.40,192.44,44.499081,0.004151,0.005922,-0.001771,66.666667,55.555556,913229.0,192.447411,192.4195,192.391589,0.06,0.048749,0.038405,0.032795,0.029802,19.013869,9.751295,2.919558,12.820513,-0.000208,192.701125,192.722211,0.015590,0.00030,192.452174,0.0,-0.000156,0.000208,1.065217,1.227719e+10
59269,1688417996889779362,1688417996889614660,1,2,32,T,B,0,192.45,3,130,164702,252532944,192.40,192.45,35,16,1,1,AAPL,192.45,3,192.45,192.40,192.40,60.250043,0.006366,0.006011,0.000355,61.111111,64.814815,913232.0,192.451394,192.4220,192.392606,0.05,0.049375,0.040724,0.034515,0.030812,21.506945,8.671762,2.596344,100.000000,0.000260,192.701121,192.722211,0.012992,0.00025,192.434314,1.0,0.000104,0.000260,0.843137,2.359187e+10


In [4]:
df_with_indicators.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 59271 entries, 0 to 59270
Data columns (total 47 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   ts_recv        59271 non-null  int64  
 1   ts_event       59271 non-null  int64  
 2   rtype          59271 non-null  int64  
 3   publisher_id   59271 non-null  int64  
 4   instrument_id  59271 non-null  int64  
 5   action         59271 non-null  object 
 6   side           59271 non-null  object 
 7   depth          59271 non-null  int64  
 8   price          59271 non-null  float64
 9   size           59271 non-null  int64  
 10  flags          59271 non-null  int64  
 11  ts_in_delta    59271 non-null  int64  
 12  sequence       59271 non-null  int64  
 13  bid_px_00      59271 non-null  float64
 14  ask_px_00      59271 non-null  float64
 15  bid_sz_00      59271 non-null  int64  
 16  ask_sz_00      59271 non-null  int64  
 17  bid_ct_00      59271 non-null  int64  
 18  ask_ct

#### Create the Trading Environment class for the PPO Agent

In [6]:
import gymnasium as gym
import numpy as np
import pandas as pd

class TradingEnvironment(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, data, daily_trading_limit):
        super(TradingEnvironment, self).__init__()
        self.data = data
        self.daily_trading_limit = daily_trading_limit
        self.current_step = 0

        # Extract state columns
        self.state_columns = ['Close', 'Volume', 'RSI', 'MACD', 'MACD_signal', 'MACD_hist', 'Stoch_k', 'Stoch_d',
                              'OBV', 'Upper_BB', 'Middle_BB', 'Lower_BB', 'ATR_1', 'ADX', '+DI', '-DI', 'CCI']

        # Initialize balance, shares held, and total shares traded
        self.balance = 10_000_000.0  # $10 million
        self.shares_held = 0
        self.total_shares_traded = 0

        # Define action space: [Hold, Buy, Sell]
        self.action_space = gym.spaces.Discrete(3)

        # Define observation space based on state columns
        self.observation_space = gym.spaces.Box(
            low=-np.inf, high=np.inf, shape=(len(self.state_columns),), dtype=np.float32
        )

    def reset(self):
        self.current_step = 0
        self.balance = 10_000_000.0  # $10 million
        self.shares_held = 0
        self.total_shares_traded = 0
        self.cumulative_reward = 0
        self.trades = []
        return self._next_observation()

    def _next_observation(self):
        return self.data[self.state_columns].iloc[self.current_step].values

    def step(self, action):
        expected_price = self.data.iloc[self.current_step]['ask_px_00']
        actual_price = self.data.iloc[self.current_step]['price']
        transaction_time = self.data.iloc[self.current_step]['ts_in_delta']
        self._take_action(action)
        reward = 0
        
        if self.current_step >= len(self.data) - 1:
            self.current_step = 0
        if action != 0:
            transaction_cost= self._calculate_transaction_cost(self.data.iloc[self.current_step]['Volume'], 0.3, self.data['Volume'].mean())
            reward = self._calculate_reward(expected_price, actual_price, transaction_time, transaction_cost)
            self.cumulative_reward += reward
            if self.trades:
                self.trades[-1]['reward'] = reward
                self.trades[-1]['transaction_cost'] = transaction_cost
                self.trades[-1]['slippage'] = expected_price - actual_price
                self.trades[-1]['time_penalty'] = 100*transaction_time/1e9
        done = self.current_step == len(self.data) - 1
        obs = self._next_observation()
        info = {
        'step': self.current_step,
        'action': action,
        'price': actual_price,
        'shares': self.trades[-1]['shares'] if self.trades else 0
    }
        self.current_step += 1

        return obs, reward, done, info

    def _take_action(self, action):
        current_price = self.data.iloc[self.current_step]['Close']
        current_time = pd.to_datetime(self.data.iloc[self.current_step]['ts_event'])
        trade_info = {'step': self.current_step, 'timestamp': current_time, 'action': action, 'price': current_price, 'shares': 0, 'reward': 0, 'transaction_cost': 0, 'slippage': 0, 'time_penalty': 0}

        if action == 1: # and self.total_shares_traded < self.daily_trading_limit:  # Buy
            shares_bought = (self.balance * np.random.uniform(0.001, 0.005)) // current_price
            self.balance -= shares_bought * current_price
            self.shares_held += shares_bought
            self.total_shares_traded += shares_bought
            trade_info['shares'] = shares_bought
            if(shares_bought>0):
                self.trades.append(trade_info)
        elif action == 2: # and self.total_shares_traded < self.daily_trading_limit:  # Sell
            shares_sold = min((self.balance * np.random.uniform(0.001, 0.005)) // current_price, self.shares_held)
            self.balance += shares_sold * current_price
            self.shares_held -= shares_sold
            self.total_shares_traded -= shares_sold
            trade_info['shares'] = shares_sold
            if(shares_sold>0):
                self.trades.append(trade_info)

    def _calculate_reward(self, expected_price, actual_price, transaction_time, transaction_cost):
        slippage = expected_price - actual_price
        time_penalty = 100*transaction_time/1e9
        reward = - (slippage + time_penalty + transaction_cost)
        return reward
    
    def _calculate_transaction_cost(self, volume, volatility, daily_volume):
        return volatility * np.sqrt(volume / daily_volume)
    
    def run(self):
        self.reset()
        for _ in range(len(self.data)):
            self.step()
        return self.cumulative_reward, self.trades

    def render(self, mode='human', close=False):
        print(f'Step: {self.current_step}')
        print(f'Balance: {self.balance}')
        print(f'Shares held: {self.shares_held}')
        print(f'Total shares traded: {self.total_shares_traded}')
        print(f'Total portfolio value: {self.balance + self.shares_held * self.data.iloc[self.current_step]["Close"]}')
        print(f'Cumulative reward: {self.cumulative_reward}')
        self.print_trades()

    def print_trades(self):
        # download all trades in a pandas dataframe using .csv
        trades_df = pd.DataFrame(self.trades)
        # Save a csv
        trades_df.to_csv('trades_ppo.csv', index=False)
        for trade in self.trades:
            print(f"Step: {trade['step']}, Timestamp: {trade['timestamp']}, Action: {trade['action']}, Price: {trade['price']}, Shares: {trade['shares']}, Reward: {trade['reward']}, Transaction Cost: {trade['transaction_cost']}, Slippage: {trade['slippage']}, Time Penalty: {trade['time_penalty']}")

#### Train the PPO Agent with the environment and for different tickers.

In [7]:
# Define the daily trading limit (total number of shares to trade per day)
daily_trading_limit = 1000

ticker = 'AAPL'  # Specify the ticker you want to trade
ticker_data = market_features_df[market_features_df['symbol'] == ticker]

env = TradingEnvironment(ticker_data, daily_trading_limit)  # Adjust window_size if needed

In [None]:
import pandas as pd
from stable_baselines3 import PPO

# Define the daily trading limit (total number of shares to trade per day)
daily_trading_limit = 1000

ticker = 'AAPL'  # Specify the ticker you want to trade
ticker_data = market_features_df[market_features_df['symbol'] == ticker]

# Create the trading environment
env = TradingEnvironment(ticker_data, daily_trading_limit)  # Adjust window_size if needed

# Define the best hyperparameters
best_hyperparameters = {'learning_rate': 0.0009931989008886031,'n_steps': 512,'batch_size': 128, 'gamma': 0.9916829193042708,'clip_range': 0.21127653449387027,'n_epochs': 6} # type: ignore

# Create the RL model with the best hyperparameters
model = PPO('MlpPolicy', env, verbose=1, **best_hyperparameters)

# Train the model
model.learn(total_timesteps=10000)

# Save the model
model.save("trading_agent")

# Evaluate the model
obs = env.reset()
for _ in range(len(ticker_data)):
    action, _states = model.predict(obs)
    obs, rewards, done, info = env.step(action)
    if done:
        break

# Render the final state
env.render()

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


  from .autonotebook import tqdm as notebook_tqdm


----------------------------
| time/              |     |
|    fps             | 230 |
|    iterations      | 1   |
|    time_elapsed    | 2   |
|    total_timesteps | 512 |
----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 223         |
|    iterations           | 2           |
|    time_elapsed         | 4           |
|    total_timesteps      | 1024        |
| train/                  |             |
|    approx_kl            | 0.017060015 |
|    clip_fraction        | 0.175       |
|    clip_range           | 0.211       |
|    entropy_loss         | -1.09       |
|    explained_variance   | -0.0433     |
|    learning_rate        | 0.000993    |
|    loss                 | 0.133       |
|    n_updates            | 6           |
|    policy_gradient_loss | -0.0185     |
|    value_loss           | 0.726       |
-----------------------------------------
-----------------------------------------

### TRADING BLOTTER:

#### Preprocess the data for the trading blotter:

In [10]:
import pandas as pd
import numpy as np
from scipy.stats import norm
import matplotlib.pyplot as plt

INITIAL_CASH = 10_000_000  # $10 million

def preprocess_data(df):
    df['liquidity'] = df['bid_sz_00'] * df['bid_px_00'] + df['ask_sz_00'] * df['ask_px_00']
    return df

def calculate_rsi(data, window=14):
    delta = data.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

def calculate_vol_and_liquidity(price_df, volume_df, window_size):
    # Calculate rolling statistics
    rolling_mean_vol = price_df.pct_change().rolling(window=window_size).mean()
    rolling_std_vol = price_df.pct_change().rolling(window=window_size).std()
    rolling_mean_liq = volume_df.rolling(window=window_size).mean()
    rolling_std_liq = volume_df.rolling(window=window_size).std()
    
    return rolling_mean_vol, rolling_std_vol, rolling_mean_liq, rolling_std_liq

def get_percentile(current_value, mean, std):
    if std > 0:
        z_score = (current_value - mean) / std
        percentile = norm.cdf(z_score)
    else:
        percentile = 0.5  # No variation
    return percentile

def get_trade_price(base_price, current_vol, current_liq, mean_vol, std_vol, mean_liq, std_liq, trade_direction):
    vol_percentile = get_percentile(current_vol, mean_vol, std_vol)
    liq_percentile = get_percentile(current_liq, mean_liq, std_liq)

    # Define price adjustment scenarios based on market conditions
    if vol_percentile >= 0.9 and liq_percentile < 0.1:
        price_adjustment_percent = np.random.uniform(-0.25, -0.15)
    elif vol_percentile <= 0.1 and liq_percentile < 0.1:
        price_adjustment_percent = np.random.uniform(-0.10, -0.05)
    elif vol_percentile >= 0.9 and liq_percentile >= 0.9:
        price_adjustment_percent = np.random.uniform(-0.05, +0.10)
    else:
        price_adjustment_percent = np.random.uniform(-0.05, +0.05)  # Default for normal conditions

    # Adjust price based on trade direction
    if trade_direction == 'BUY':
        adjusted_price = base_price * (1 - price_adjustment_percent)
    else:  # SELL
        adjusted_price = base_price * (1 + price_adjustment_percent)
    
    return adjusted_price


Matplotlib is building the font cache; this may take a moment.


#### Create trading environment for the blotter

In [11]:
class TradingEnvironmentwithBlotter:
    def __init__(self, data, daily_trading_limit, window_size):
        self.data = preprocess_data(data)
        self.daily_trading_limit = daily_trading_limit
        self.window_size = window_size
        self.state_columns = ['price', 'liquidity', 'RSI', 'MACD', 'MACD_signal', 'MACD_hist', 'Stoch_k', 'Stoch_d',
                              'OBV', 'Upper_BB', 'Middle_BB', 'Lower_BB', 'ATR_1', 'ADX', '+DI', '-DI', 'CCI']
        self.reset()

    def reset(self):
        self.current_step = 0
        self.balance = INITIAL_CASH
        self.shares_held = 0
        self.total_shares_traded = 0
        self.cumulative_reward = 0
        self.trades = []
        self.portfolio = {'cash': self.balance, 'holdings': {ticker: 0 for ticker in self.data['symbol'].unique()}}
        self.data['RSI'] = calculate_rsi(self.data['price'])
        self.data['pct_change'] = self.data['price'].pct_change()
        self.data['rolling_mean_vol'], self.data['rolling_std_vol'], self.data['rolling_mean_liq'], self.data['rolling_std_liq'] = calculate_vol_and_liquidity(self.data['price'], self.data['liquidity'], self.window_size)

    def step(self):
        row = self.data.iloc[self.current_step]
        current_price = row['price']
        current_time = pd.to_datetime(row['ts_event'])
        current_rsi = row['RSI']
        current_vol = row['pct_change']
        current_liq = row['liquidity']
        mean_vol = row['rolling_mean_vol']
        std_vol = row['rolling_std_vol']
        mean_liq = row['rolling_mean_liq']
        std_liq = row['rolling_std_liq']

        if current_rsi < 30:  # Entry signal based on RSI
            trade_direction = 'BUY'
            trade_price = get_trade_price(current_price, current_vol, current_liq, mean_vol, std_vol, mean_liq, std_liq, trade_direction)
            trade_size = (self.portfolio['cash'] * np.random.uniform(0.001, 0.005)) / trade_price
            if self.portfolio['cash'] >= trade_size * trade_price:
                self.portfolio['cash'] -= trade_size * trade_price
                self.portfolio['holdings'][row['symbol']] += trade_size
                trade_status = 'filled'
            else:
                trade_status = 'cancelled'
        elif current_rsi > 70:  # Exit signal based on RSI
            trade_direction = 'SELL'
            if self.portfolio['holdings'][row['symbol']] > 0:
                trade_size = min(self.portfolio['holdings'][row['symbol']], self.portfolio['cash']*np.random.uniform(0.001, 0.005) / current_price)
                trade_price = get_trade_price(current_price, current_vol, current_liq, mean_vol, std_vol, mean_liq, std_liq, trade_direction)
                self.portfolio['cash'] += trade_size * trade_price
                self.portfolio['holdings'][row['symbol']] -= trade_size
                trade_status = 'filled'
            else:
                trade_size = 0
                trade_status = 'cancelled'
        else:
            trade_direction = 'HOLD'
            trade_size = 0
            trade_price = current_price
            trade_status = 'skipped'

        if trade_size > 0:
            expected_price = row['ask_px_00']
            actual_price = row['price']
            transaction_time = row['ts_in_delta']
            transaction_cost = self._calculate_transaction_cost(row['Volume'], 0.3, self.data['Volume'].mean())
            slippage = expected_price - actual_price
            time_penalty = 1000 * transaction_time / 1e9
            reward = - (slippage + time_penalty + transaction_cost)
        
            self.cumulative_reward += reward
            self.trades.append({
                'step': self.current_step,
                'timestamp': current_time,
                'action': trade_direction,
                'price': trade_price,
                'shares': trade_size,
                'symbol': row['symbol'],
                'reward': reward,
                'transaction_cost': transaction_cost,
                'slippage': slippage,
                'time_penalty': time_penalty
            })

            

        self.current_step += 1
        if self.current_step >= len(self.data) - 1:
            done=True
            self.current_step = 0

    def _calculate_transaction_cost(self, volume, volatility, daily_volume):
        return volatility * np.sqrt(volume / daily_volume)

    def run(self):
        self.reset()
        for _ in range(len(self.data)):
            self.step()
        return self.cumulative_reward, self.trades

    def render(self):
        print(f'Cumulative reward: {self.cumulative_reward}')
        row = self.data.iloc[self.current_step]
        print(f'Total portfolio value: {self.portfolio["cash"] + self.portfolio["holdings"][row["symbol"]]*row["Close"]}')
        # get trades in a pandas dataframe
        trades_df = pd.DataFrame(self.trades)
        # Save a csv
        trades_df.to_csv('trades_blotter.csv', index=False)
        for trade in self.trades:
            print(f"Step: {trade['step']}, Timestamp: {trade['timestamp']}, Action: {trade['action']}, Price: {trade['price']}, Shares: {trade['shares']}, Symbol: {trade['symbol']}, Reward: {trade['reward']}, Transaction Cost: {trade['transaction_cost']}, Slippage: {trade['slippage']}, Time Penalty: {trade['time_penalty']}")

#### Run the trading blotter

In [None]:
# Filter data for the specified ticker
ticker = 'AAPL'  # Specify the ticker you want to trade
ticker_data = market_features_df[market_features_df['symbol'] == ticker]

window_size = 60
daily_trading_limit = 1000
# Create the trading environment
env = TradingEnvironmentwithBlotter(ticker_data, daily_trading_limit=1000, window_size=window_size)  # Daily trading limit of 1000 shares

# Run the environment
cumulative_reward, trades = env.run()

# Render the results
env.render()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['liquidity'] = df['bid_sz_00'] * df['bid_px_00'] + df['ask_sz_00'] * df['ask_px_00']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.data['RSI'] = calculate_rsi(self.data['price'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.data['pct_change'] = self.data['price'].pct_change()
A valu

Cumulative reward: -12231.547996131785
Total portfolio value: 10022412.45503103
Step: 106, Action: BUY, Price: 193.01260992217345, Shares: 229.8146152353405, Symbol: AAPL, Reward: -0.382250785758524, Transaction Cost: 0.08423678575852858, Slippage: 0.12999999999999545, Time Penalty: 0.168014
Step: 125, Action: SELL, Price: 203.57345254737228, Shares: 193.32637614428842, Symbol: AAPL, Reward: -0.27620137226887, Transaction Cost: 0.0718373722688495, Slippage: 0.040000000000020464, Time Penalty: 0.164364
Step: 135, Action: BUY, Price: 199.02027999805844, Shares: 170.5205584448564, Symbol: AAPL, Reward: -0.2850713465369663, Transaction Cost: 0.025398346536962962, Slippage: 0.09000000000000341, Time Penalty: 0.169673
Step: 136, Action: BUY, Price: 188.63514250526757, Shares: 173.0130124705765, Symbol: AAPL, Reward: -0.3676554532605354, Transaction Cost: 0.0879824532605218, Slippage: 0.11000000000001364, Time Penalty: 0.169673
Step: 153, Action: BUY, Price: 192.93745914631475, Shares: 52.617

In [None]:
df=market_features_df.copy()

In [None]:
df['timestamp']=pd.to_datetime(df['ts_recv'])

In [None]:
df.head()

Unnamed: 0,ts_recv,ts_event,rtype,publisher_id,instrument_id,action,side,depth,price,size,flags,ts_in_delta,sequence,bid_px_00,ask_px_00,bid_sz_00,ask_sz_00,bid_ct_00,ask_ct_00,symbol,Close,Volume,High,Low,Open,RSI,MACD,MACD_signal,MACD_hist,Stoch_k,Stoch_d,OBV,Upper_BB,Middle_BB,Lower_BB,ATR_1,ATR_2,ATR_5,ATR_10,ATR_20,ADX,+DI,-DI,CCI,DLR,TWAP,VWAP,timestamp
35,1688371212400103305,1688371212399937688,1,2,32,T,B,0,194.05,56,130,165617,324353,194.0,194.05,3079,56,2,1,AAPL,194.05,56,194.05,194.0,194.05,51.852848,-2.561087,-3.619556,1.058469,99.974582,99.974582,-282.0,254.713931,186.1505,117.587069,0.05,7.422578,23.240495,19.656509,12.550193,69.565924,51.717387,47.989962,0.0,0.0,189.649722,192.24824,2023-07-03 08:00:12.400103305
36,1688371214386057385,1688371214385893078,1,2,32,T,N,0,194.05,50,130,164307,326232,194.0,194.3,3101,19,4,10,AAPL,194.05,50,194.3,194.0,194.05,51.852848,-1.532555,-3.202156,1.669601,99.930172,99.959779,-282.0,254.718308,186.1535,117.588692,0.3,3.861289,18.652396,17.720858,11.937684,64.872152,51.762467,47.921535,166.666667,0.0,189.768649,192.297868,2023-07-03 08:00:14.386057385
37,1688371214386063777,1688371214385899379,1,2,32,T,N,0,194.05,50,130,164398,326233,194.0,194.3,3101,19,4,10,AAPL,194.05,50,194.3,194.0,194.05,51.852848,-0.70926,-2.703577,1.994317,99.885761,99.930172,-282.0,254.721956,186.156,117.590044,0.3,2.080645,14.981917,15.978772,11.3558,60.513649,51.683105,47.848062,83.333333,0.0,189.881316,192.344972,2023-07-03 08:00:14.386063777
38,1688371215804852019,1688371215804687301,1,2,32,T,B,0,194.21,10,130,164718,328131,194.0,194.21,3101,29,4,1,AAPL,194.21,10,194.21,194.0,194.05,51.895447,-0.043381,-2.171538,2.128156,99.875196,99.897043,-272.0,254.737322,186.1665,117.595678,0.21,1.145322,12.027533,14.401895,10.79851,56.466467,51.623439,47.792823,79.268293,0.000824,189.992308,192.353879,2023-07-03 08:00:15.804852019
39,1688371216978631317,1688371216978466819,1,2,470,T,A,0,114.57,43,130,164498,329439,114.57,114.76,43,27,1,1,AMD,114.57,43,114.76,114.57,194.21,35.20102,-5.874236,-2.912077,-2.962159,83.062571,94.274509,-315.0,257.37349,182.195,107.01651,79.64,40.392661,25.550027,20.925705,14.240584,54.540006,35.082367,64.436445,-166.666667,-0.527754,188.10675,190.699287,2023-07-03 08:00:16.978631317
