# Trade Manager

# Buy agent

In [None]:
from config.loader import load_trading_system_config
config = load_trading_system_config("config/data_config.yaml")
print(config)

In [None]:
from agents.buy_agent_trainer import BuyAgentTrainer
from agents.sell_agent_trainer import SellAgentTrainer

buy_trainer = BuyAgentTrainer(
    ticker="AAPL",
    window_size=30,
    horizon=20,
    transaction_cost=0.001,
    lambda_dd=0.05,
    lambda_vol=0.01,
    hold_penalty_long=0.0,
    device="cpu",
)

buy_trend_history = buy_trainer.train_trend_filtered(
    n_episodes=200,
    verbose=True,
)


# Sell Agent

In [None]:
# 1) Build SellTrainer
sell_trainer = SellAgentTrainer(
    ticker="AAPL",
    window_size=30,
    horizon=20,
    transaction_cost=0.001,
    min_steps_before_sell=1,
    lambda_dd=0.05,
    lambda_vol=0.01,
    hold_penalty_long=0.0,
    device="cpu",
)

# 2) Train SellAgent on BuyAgent entries
sell_history = sell_trainer.train_on_buy_entries(
    buy_agent=buy_trainer.agent,
    n_episodes=300,
    verbose=True,
)


In [None]:
from scripts.trade_manager import TradeManager
# from agents.buy_agent_trainer import BuyAgentTrainer
# from agents.sell_agent_trainer import SellAgentTrainer
import numpy as np

tm = TradeManager(
    buy_trainer=buy_trainer,
    sell_trainer=sell_trainer,
    cfg_path="config/data_config.yaml",
)

print("Trend filter True count:", np.sum(tm.sma_short > tm.sma_long))

# Inspect BuyAgent confidence over entire dataset
import torch
confs = []
for i in range(len(tm.state_df)):
    state = tm.state_df.iloc[i].values.astype(np.float32)
    with torch.no_grad():
        s = torch.from_numpy(state).unsqueeze(0).to(tm.buy_agent.device)
        q = tm.buy_agent.q_net(s)[0].cpu().numpy()
        p = np.exp(q - q.max()) / np.exp(q - q.max()).sum()
        confs.append(p[1])

print("Average BUY confidence:", np.mean(confs))
print("Max BUY confidence:", np.max(confs))
print("Min BUY confidence:", np.min(confs))

results = tm.run_backtest(greedy=True)



equity = results["equity_curve"]
trades = results["trades"]

print("Trades executed:", len(trades))
print("First 5 trades:")
for t in trades[:5]:
    print(t)

print("Final equity:", equity[-1])


In [None]:
equity = results["equity_curve"]
trades = results["trades"]

print("Trades executed:", len(trades))
print("First 5 trades:")
for t in trades[:5]:
    print(t)

print("Final equity:", equity[-1])


In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(12,5))
plt.plot(equity)
plt.title("Equity Curve (After MP Training)")
plt.xlabel("Time")
plt.ylabel("Equity")
plt.grid(True)
plt.show()


# After refactoring

In [2]:
import numpy as np

from config.loader import load_config
from agents.buy_agent_trainer import BuyAgentTrainer
from agents.sell_agent_trainer import SellAgentTrainer
from scripts.trade_manager import TradeManager


Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
Users of this version of Gym should be able to simply replace 'import gym' with 'import gymnasium as gym' in the vast majority of cases.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


In [3]:
config = load_config("config/config.yaml")
print(config.trade_manager)


TradeManagerConfig(cooldown_steps=5, sell_horizon=20, buy_min_confidence=0.5, use_trend_filter=True, ma_short=10, ma_long=30)


In [None]:
buy_trainer = BuyAgentTrainer(ticker="AAPL", config=config, device="cpu")
buy_hist = buy_trainer.train(n_episodes=20, warmup_dynamic=True, verbose=True)

buy_entry_indices = buy_trainer.collect_buy_entry_indices(
    buy_min_confidence=0.48,   # tune here
    use_trend_filter=False
)

print("Collected BUY entries:", len(buy_entry_indices))
print("First 10:", buy_entry_indices[:10])

sell_trainer = SellAgentTrainer(ticker="AAPL", config=config, device="cpu")
sell_hist = sell_trainer.train(
    n_episodes=50,
    warmup_dynamic=True,
    verbose=True,
    buy_entry_indices=buy_entry_indices,
)
print("Sell final reward:", sell_hist["episode_rewards"][-1])


[*********************100%***********************]  1 of 1 completed


[BuyTrainer] Raw dataset: (1224, 10)
[BuyTrainer] After dropna: (1224, 10)
[BuyTrainer] Rolling state_df shape: (1194, 270)
[BuyTrainer] state_dim=270, actions=2
[BuyTrainer] Warmup set to: 238
[Buy Ep 1/20] Reward=0.9083 | Eps=0.773 | Steps=1193 | Buffer=1193 | Avg10=0.9083
[Buy Ep 10/20] Reward=0.4525 | Eps=0.050 | Steps=1193 | Buffer=11930 | Avg10=0.9781


[*********************100%***********************]  1 of 1 completed

[Buy Ep 20/20] Reward=0.7477 | Eps=0.050 | Steps=1193 | Buffer=23860 | Avg10=1.0023
Collected BUY entries: 0
First 10: []
[SellTrainer] Raw dataset: (1224, 10)
[SellTrainer] After dropna: (1224, 10)
[SellTrainer] state_df shape: (1194, 270)
[SellTrainer] prices shape: (1194,)





ValueError: SellAgentTrainer: buy_entry_indices must be a non-empty 1D array.

In [None]:
sell_trainer = SellAgentTrainer(
    ticker="AAPL",
    config=config,
    device="cpu",
)

sell_rewards = sell_trainer.train(
    n_episodes=50,
    warmup_dynamic=True,
    verbose=True,
    buy_entry_indices=buy_entry_indices,
)

print("\n[SellAgent] Training complete")
print("Final reward:", sell_rewards["episode_rewards"][-1])