In [1]:
import torch
from src.Trainer import Trainer

In [None]:
def train() -> None:
    """Train the DQN agent on the Blackjack environment."""
    try:
        seed = 42
        torch.manual_seed(seed)

        trainer = Trainer(
            n_episodes=50000,  # Total number of episodes
            max_t=100,  # Max steps per episode
            eps_start=1.0,  # Initial epsilon (exploration)
            eps_end=0.05,  # Final epsilon (exploitation)
            eps_decay=0.995,  # Epsilon decay factor (faster decay)
            model_save_path="models/",  # Where to save trained models
            device=torch.device("cuda" if torch.cuda.is_available() else "cpu"),
            seed=seed,
        )

        # Start training
        print("Starting training...")
        trainer.train()

        # After training, save final model
        trainer.save_model("dqn_blackjack_final.pth")
        print("Training completed. Final model saved.")

        # Plot scores
        trainer.plot_scores()

    except Exception as e:
        raise RuntimeError(f"Error during training: {str(e)}") from e

In [3]:
train()

Starting training...
Episode 100/50000, Moving Average Score: -0.29
Episode 200/50000, Moving Average Score: -0.30
Episode 300/50000, Moving Average Score: 0.02
Episode 400/50000, Moving Average Score: 0.15
Episode 500/50000, Moving Average Score: 0.33
Episode 600/50000, Moving Average Score: 0.12
Episode 700/50000, Moving Average Score: 0.16
Episode 800/50000, Moving Average Score: -0.02
Episode 900/50000, Moving Average Score: 0.08
Episode 1000/50000, Moving Average Score: 0.25
Episode 1100/50000, Moving Average Score: 0.21
Episode 1200/50000, Moving Average Score: 0.04
Episode 1300/50000, Moving Average Score: 0.17
Episode 1400/50000, Moving Average Score: 0.21
Episode 1500/50000, Moving Average Score: 0.22
Episode 1600/50000, Moving Average Score: 0.25
Episode 1700/50000, Moving Average Score: 0.23
Episode 1800/50000, Moving Average Score: 0.10
Episode 1900/50000, Moving Average Score: 0.34
Episode 2000/50000, Moving Average Score: 0.34
Episode 2100/50000, Moving Average Score: 0.1

KeyboardInterrupt: 