# Multi Process

In [1]:
import multiprocessing as mp
import numpy as np
import matplotlib.pyplot as plt
import torch

from config.loader import load_config
from agents.buy_agent_trainer import BuyAgentTrainer
from agents.multi_process.multi_process_trainer import MultiProcessTrainer
from agents.multi_process.handler import EnvHandler

Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
Users of this version of Gym should be able to simply replace 'import gym' with 'import gymnasium as gym' in the vast majority of cases.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


In [None]:
def train_buy_agent_mp():
    config = load_config("config/config.yaml")

    buy_trainer = BuyAgentTrainer(
        ticker="AAPL",
        config=config,
        device="cpu",
    )

    assert buy_trainer.env is not None
    assert buy_trainer.agent is not None
    assert buy_trainer.state_df is not None
    assert buy_trainer.prices is not None

    print(f"[BuyTrainer-MP] state_df shape: {buy_trainer.state_df.shape}")
    print(f"[BuyTrainer-MP] prices shape: {buy_trainer.prices.shape}")

    env_fn = EnvHandler(
        env_type="buy",
        features=buy_trainer.state_df.values.astype(np.float32),
        prices=np.asarray(buy_trainer.prices, dtype=np.float32),
        config=config,
    )

    # âœ… NO AgentHandler, NO agent_fn
    mp_trainer = MultiProcessTrainer(
        agent=buy_trainer.agent,
        env_fn=env_fn,
        n_workers=4,
        steps_per_batch=300,
        worker_epsilon=0.05,
        sync_every=10,
        log_every=10,
    )

    print("[BuyTrainer-MP] Starting MP training...")
    mp_trainer.train(n_batches=600, updates_per_batch=50, log_every=10)

    return buy_trainer


def evaluate_greedy_buy(buy_trainer):
    env = buy_trainer.env
    agent = buy_trainer.agent
    assert env is not None and agent is not None

    state = env.reset()
    done = False
    total_reward = 0.0
    steps = 0

    while not done:
        action = agent.select_action(state, greedy=True)
        next_state, reward, done, info = env.step(action)
        total_reward += float(reward)
        state = next_state
        steps += 1

    return total_reward, steps


def inspect_buy_confidence(buy_trainer):
    agent = buy_trainer.agent
    state_df = buy_trainer.state_df
    env = buy_trainer.env

    buy_index = env.BUY

    confs = []
    agent.q_net.eval()

    for i in range(len(state_df)):
        state = state_df.iloc[i].values.astype(np.float32)
        with torch.no_grad():
            s = torch.from_numpy(state).unsqueeze(0).to(agent.device)
            q = agent.q_net(s)[0].detach().cpu().numpy()
            exps = np.exp(q - np.max(q))
            probs = exps / (np.sum(exps) + 1e-12)
            confs.append(float(probs[buy_index]))

    confs = np.asarray(confs, dtype=np.float32)
    return float(confs.mean()), float(confs.max()), float(confs.min())



def plot_loss_history(agent):
    if not agent.loss_history:
        print("[BuyTrainer-MP] No loss history to plot.")
        return

    plt.figure(figsize=(8, 4))
    plt.plot(agent.loss_history)
    plt.xlabel("Update step")
    plt.ylabel("Loss")
    plt.title("BuyAgent DDQN Loss (MP training)")
    plt.grid(True)
    plt.tight_layout()
    plt.show()

def greedy_buy_rate(buy_trainer):
    agent = buy_trainer.agent
    state_df = buy_trainer.state_df
    env = buy_trainer.env

    assert agent is not None and state_df is not None
    assert hasattr(env, "BUY"), "BuyEnv must define BUY action index"

    buy_index = env.BUY

    buys = 0
    for i in range(len(state_df)):
        state = state_df.iloc[i].values.astype(np.float32)
        a = agent.select_action(state, greedy=True)
        buys += int(a == buy_index)

    return buys / len(state_df)




In [3]:
if __name__ == "__main__":
    mp.set_start_method("spawn", force=True)
    buy_trainer = train_buy_agent_mp()

    reward, steps = evaluate_greedy_buy(buy_trainer)
    avg_buy, max_buy, min_buy = inspect_buy_confidence(buy_trainer)
    buy_rate = greedy_buy_rate(buy_trainer)

    print(f"Greedy eval reward={reward:.4f} steps={steps}")
    print(f"BUY confidence mean={avg_buy:.6f} max={max_buy:.6f} min={min_buy:.6f}")
    print(f"Greedy BUY rate={buy_rate:.4f}")


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


[BuyTrainer] Raw dataset: (1224, 10)
[BuyTrainer] After dropna: (1224, 10)
[BuyTrainer] Rolling state_df shape: (1194, 270)
[BuyTrainer] state_dim=270, actions=2
[BuyTrainer-MP] state_df shape: (1194, 270)
[BuyTrainer-MP] prices shape: (1194,)
[BuyTrainer-MP] Starting MP training...
[MP Trainer] Starting workers...


Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
Users of this version of Gym should be able to simply replace 'import gym' with 'import gymnasium as gym' in the vast majority of cases.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.
Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
Users of this version of Gym should be able to simply replace 'import gym' with 'import gymnasium as gym' in the vast majority of cases.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


[Batch 10/600] Buffer size=3000 | eps=1.000
[Batch 20/600] Buffer size=6000 | eps=1.000
[Batch 30/600] Buffer size=9000 | eps=1.000
[Batch 40/600] Buffer size=12000 | eps=1.000
[Batch 50/600] Buffer size=15000 | eps=1.000
[Probe @ batch 50] BUY_conf mean=0.5005 max=0.5054 min=0.4943 | greedy_buy_rate=0.656
[Batch 60/600] Buffer size=18000 | eps=1.000
[Batch 70/600] Buffer size=21000 | eps=1.000
[Batch 80/600] Buffer size=24000 | eps=1.000
[Batch 90/600] Buffer size=27000 | eps=1.000
[Batch 100/600] Buffer size=30000 | eps=1.000
[Probe @ batch 100] BUY_conf mean=0.5008 max=0.5033 min=0.4979 | greedy_buy_rate=0.906
[Batch 110/600] Buffer size=33000 | eps=1.000
[Batch 120/600] Buffer size=36000 | eps=1.000
[Batch 130/600] Buffer size=39000 | eps=1.000
[Batch 140/600] Buffer size=42000 | eps=1.000
[Batch 150/600] Buffer size=45000 | eps=1.000
[Probe @ batch 150] BUY_conf mean=0.5006 max=0.5027 min=0.4993 | greedy_buy_rate=0.922
[Batch 160/600] Buffer size=48000 | eps=1.000
[Batch 170/600] 

In [4]:
print("BUY index:", buy_trainer.env.BUY)
print("HOLD index:", buy_trainer.env.HOLD)


BUY index: 1
HOLD index: 0
