In [1]:
# Import necessary libraries for data handling, the DRL agent,
# our custom environment, and path management.
import pandas as pd
from stable_baselines3 import PPO
from qmind_quant.ml_models.environments.trading_env import TradingEnv
from qmind_quant.config.paths import FEATURES_DATA_DIR, MODELS_DIR

In [2]:
# We'll train the agent on a single stock (AAPL) to keep the process fast and simple.
print("Loading data and creating the environment...")
feature_file = FEATURES_DATA_DIR / "ml_feature_data.parquet"
df = pd.read_parquet(feature_file)

# Filter for a single ticker for training
aapl_df = df[df['ticker'] == 'AAPL'].reset_index(drop=True)

# Create an instance of our custom trading environment
env = TradingEnv(df=aapl_df)
print("Environment created successfully.")

Loading data and creating the environment...
Environment created successfully.


In [3]:
# Initialize the PPO model. 'MlpPolicy' uses a standard neural network.
# We also set up TensorBoard logging to visualize the training process later.
model = PPO(
    'MlpPolicy',
    env,
    verbose=1,
    tensorboard_log="./drl_tensorboard_logs/ppo/"
)

# Train the agent. 20,000 timesteps is a small number for a quick initial run.
# A production model would train for millions of timesteps.
print("\n--- Starting DRL Agent Training ---")
model.learn(total_timesteps=20000)
print("--- Training Complete ---")

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.

--- Starting DRL Agent Training ---
Logging to ./drl_tensorboard_logs/ppo/PPO_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 81       |
|    ep_rew_mean     | 2e+04    |
| time/              |          |
|    fps             | 2688     |
|    iterations      | 1        |
|    time_elapsed    | 0        |
|    total_timesteps | 2048     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 81         |
|    ep_rew_mean          | 1.88e+04   |
| time/                   |            |
|    fps                  | 1908       |
|    iterations           | 2          |
|    time_elapsed         | 2          |
|    total_timesteps      | 4096       |
| train/                  |            |
|    approx_kl            | 0.00530415 |
|    clip_fraction        | 0     

In [4]:
# Save the learned policy and model weights to a file.
model_path = MODELS_DIR / "drl_ppo_v1.zip"
model.save(model_path)
print(f"\nDRL agent saved to: {model_path}")


DRL agent saved to: /Users/enisyasaroglu/qmind_quant_platform/qmind_quant/ml_models/models/drl_ppo_v1.zip


In [5]:
# Run a full backtest episode using the trained agent's policy to see
# how it performs on the training data.
print("\n--- Evaluating Trained Agent ---")

obs, info = env.reset()
done = False
while not done:
    # Use the model to predict the best action in the current state
    action, _states = model.predict(obs, deterministic=True)
    
    # Take the action in the environment
    obs, reward, terminated, truncated, info = env.step(action)
    
    done = terminated or truncated
    
    # Print the state at each step
    env.render()

print("--- Evaluation Complete ---")


--- Evaluating Trained Agent ---
Step: 31, Total Value: 100000.00, Position: 592.84, Cash: 0.00
Step: 32, Total Value: 99652.71, Position: 592.84, Cash: 0.00
Step: 33, Total Value: 102124.89, Position: 592.84, Cash: 0.00
Step: 34, Total Value: 100258.99, Position: 592.84, Cash: 0.00
Step: 35, Total Value: 99652.71, Position: 592.84, Cash: 0.00
Step: 36, Total Value: 101848.26, Position: 592.84, Cash: 0.00
Step: 37, Total Value: 107940.43, Position: 592.84, Cash: 0.00
Step: 38, Total Value: 106957.45, Position: 592.84, Cash: 0.00
Step: 39, Total Value: 107363.58, Position: 592.84, Cash: 0.00
Step: 40, Total Value: 107563.72, Position: 592.84, Cash: 0.00
Step: 41, Total Value: 108640.89, Position: 592.84, Cash: 0.00
Step: 42, Total Value: 107892.33, Position: 592.84, Cash: 0.00
Step: 43, Total Value: 109796.13, Position: 592.84, Cash: 0.00
Step: 44, Total Value: 110473.96, Position: 592.84, Cash: 0.00
Step: 45, Total Value: 111823.73, Position: 592.84, Cash: 0.00
Step: 46, Total Value: 