## IMPORTS

In [16]:
import os, sys, random, time
import warnings

import numpy as np
import torch
import matplotlib.pyplot as plt

import gymnasium as gym
from stable_baselines3 import DQN, PPO
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.callbacks import BaseCallback

import imageio
from IPython.display import Image

## STUP

In [17]:
# seeds
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)
torch.cuda.manual_seed_all(SEED)

# Ensure deterministic behavior in CuDNN (NVIDIA backend for deep learning ops).
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("Python:", sys.version.split()[0])
print("PyTorch:", torch.__version__)
print("Device:", device)
print("CUDA:", torch.version.cuda if torch.cuda.is_available() else "None")

Python: 3.11.14
PyTorch: 2.8.0
Device: cpu
CUDA: None


# Path

In [18]:
OUT_DIR = './outputs_DQN/'
os.makedirs(OUT_DIR, exist_ok=True)

TENSORBOARD_LOGS_DIR = OUT_DIR + "tensorboard/"
SAVE_MODEL_PATH = OUT_DIR + "model_dqn.zip"

## ENVIRONMENT

In [19]:
def make_env():
    """Factory function that creates a fresh CartPole environment."""
    return gym.make("LunarLander-v3", render_mode="rgb_array")

env = DummyVecEnv([make_env])

  from pkg_resources import resource_stream, resource_exists
