In [1]:
import gymnasium as gym
from gymnasium import spaces
import pandas as pd
import numpy as np
from stable_baselines3.common.env_checker import check_env
from execution_env import ExecutionEnv

## 1. Testing the environment

### a. Importing the data

In [2]:
local_file_path = 'data/btc_us_1min_bars_2023-05-01_to_2023-05-31.csv'
df = pd.read_csv(local_file_path, index_col='timestamp', parse_dates=True)
df.head()

Unnamed: 0_level_0,close,high,low,trade_count,open,volume,vwap,symbol
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2023-05-01 00:00:00+00:00,29286.99965,29290.19,29222.08,286,29260.73,19.62345,29263.335504,BTC/USD
2023-05-01 00:01:00+00:00,29292.210905,29326.33362,29282.474943,163,29285.21,14.78774,29302.23499,BTC/USD
2023-05-01 00:02:00+00:00,29255.232775,29298.44,29243.0,240,29298.44,13.31829,29267.462058,BTC/USD
2023-05-01 00:03:00+00:00,29290.061,29294.24,29243.22,76,29245.4,3.84511,29270.874708,BTC/USD
2023-05-01 00:04:00+00:00,29320.109138,29327.485,29288.14585,155,29296.85,10.03054,29308.483835,BTC/USD


## 2. Instantiate and test the environment 

In [3]:
env = ExecutionEnv(df=df, initial_inventory=50, trade_horizon=60)

In [4]:
# Check the environment using the stable-baselines3 checker
# This will raise an error if the environment doesn't follow the Gymnasium API
print("--- Checking environment with Stable Baselines checker ---")
check_env(env)
print("--- Environment check passed! ---")

--- Checking environment with Stable Baselines checker ---
--- Environment check passed! ---




## 3. Running a simple test loop with a random agent

In [5]:
print("--- Running a test episode with a random agent ---")
episodes = 1
for ep in range(episodes):
    obs, info = env.reset()
    done = False
    step = 0
    while not done:
        env.render()
        random_action = env.action_space.sample() # The agent picks a random action
        obs, reward, terminated, truncated, info = env.step(random_action)
        done = terminated or truncated
        step += 1
    env.render() # Render final state
    
    # Calculate final performance
    initial_value = env.initial_inventory * df.iloc[env.start_tick]['close']
    implementation_shortfall = initial_value - info['total_cash_received']
    
    print("\n--- Episode Finished ---")
    print(f"Initial Value: ${initial_value:,.2f}")
    print(f"Final Cash Received: ${info['total_cash_received']:,.2f}")
    print(f"Implementation Shortfall: ${implementation_shortfall:,.2f}")
    print(f"Average Execution Price: ${info['total_cash_received'] / env.initial_inventory:,.2f}")
    print(f"Benchmark Arrival Price: ${df.iloc[env.start_tick]['close']:,.2f}")

--- Running a test episode with a random agent ---
Step: 0, Inv: 50.00, Cash: $0.00
Step: 1, Inv: 18.06, Cash: $405,924.69
Step: 2, Inv: 3.43, Cash: $737,356.44
Step: 3, Inv: 1.04, Cash: $799,513.38
Step: 4, Inv: 0.48, Cash: $814,270.22
Step: 5, Inv: 0.05, Cash: $824,847.28
Step: 6, Inv: 0.02, Cash: $825,646.40
Step: 7, Inv: 0.02, Cash: $825,713.66
Step: 8, Inv: 0.02, Cash: $825,735.76
Step: 9, Inv: 0.01, Cash: $826,025.49
Step: 10, Inv: 0.00, Cash: $826,158.32
Step: 11, Inv: 0.00, Cash: $826,159.22
Step: 12, Inv: 0.00, Cash: $826,169.02
Step: 13, Inv: 0.00, Cash: $826,174.31
Step: 14, Inv: 0.00, Cash: $826,175.36
Step: 15, Inv: 0.00, Cash: $826,176.06
Step: 16, Inv: 0.00, Cash: $826,176.08
Step: 17, Inv: 0.00, Cash: $826,176.08
Step: 18, Inv: 0.00, Cash: $826,176.08
Step: 19, Inv: 0.00, Cash: $826,176.08
Step: 20, Inv: 0.00, Cash: $826,176.08
Step: 21, Inv: 0.00, Cash: $826,176.08
Step: 22, Inv: 0.00, Cash: $826,176.08
Step: 23, Inv: 0.00, Cash: $826,176.08
Step: 24, Inv: 0.00, Cash: 