# Working DQN Example on CartPole-v1 Environment

In [None]:
import gym
from stable_baselines3 import DQN, PPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecVideoRecorder
import wandb
from wandb.integration.sb3 import WandbCallback


config = {
  "n_timesteps": 5e4,
  "policy": 'MlpPolicy',
  "learning_rate": 2.3e-3,
  "batch_size": 64,
  "buffer_size": 100000,
  "learning_starts": 1000,
  "gamma": 0.99,
  "target_update_interval": 10,
  "train_freq": 256,
  "gradient_steps": 128,
  "exploration_fraction": 0.16,
  "exploration_final_eps": 0.04,
  "policy_kwargs": dict(net_arch=[256, 256]),
  "seed":42,
  "env_name":"CartPole-v1",
}

run = wandb.init(
    project="sb3",
    config=config,
    sync_tensorboard=True,  # auto-upload sb3's tensorboard metrics
    monitor_gym=True,  # auto-upload the videos of agents playing the game
    save_code=True,  # optional
)


def make_env():
    env = gym.make(config["env_name"])
    env = Monitor(env)  # record stats such as returns
    return env


env = DummyVecEnv([make_env])
env = VecVideoRecorder(
    env,
    f"videos/{run.id}",
    record_video_trigger=lambda x: x % 2000 == 0,
    video_length=200
)


model = DQN(
    config["policy"],
    env,
    learning_rate=config["learning_rate"],
    batch_size=config["batch_size"],
    buffer_size=config["buffer_size"],
    learning_starts=config["learning_starts"],
    gamma=config["gamma"],
    target_update_interval=config["target_update_interval"],
    train_freq=config["train_freq"],
    gradient_steps=config["gradient_steps"],
    exploration_fraction=config["exploration_fraction"],
    exploration_final_eps=config["exploration_final_eps"],
    policy_kwargs=config["policy_kwargs"],
    tensorboard_log=f"runs/{run.id}",
    verbose=1,
    seed=config["seed"]
)

model.learn(
    total_timesteps=config["n_timesteps"],
    callback=WandbCallback(
        gradient_save_freq=100,
        model_save_path=f"models/{run.id}",
        verbose=0,
    ),
)
run.finish()

# Now for Self Driving Cars DQN

In [1]:
import gym
import numpy as np
from gym import spaces
from simulation import Simulator
import random


class CustomDuckieTownSim(gym.Env):
    """Custom Environment that follows gym interface."""

    metadata = {"render.modes": ["human"]}

    def __init__(self, camera_settings, map_parameters, car_parameters, action_angles: list = [-30, 0, 30], display=False):
        super().__init__()

        self.camera_settings = camera_settings
        self.map_parameters = map_parameters
        self.car_parameters = car_parameters
        self.display = display
        
        # Define action and observation space
        # They must be gym.spaces objects
        # Example when using discrete actions:
        self.action_angles = action_angles
        N_DISCRETE_ACTIONS = len(action_angles)
        self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS)

        # Example for using image as input (channel-first; channel-last also works):
        N_CHANNELS = 3
        (HEIGHT,WIDTH)=self.camera_settings["resolution"]
        self.observation_space = spaces.Box(
            low=0, high=255, shape=(HEIGHT, WIDTH, N_CHANNELS), dtype=np.uint8
        )
        # ! I'm pretty sure observation space is supposed to be the features the agent has access to -- the preprocessed image.

    def preprocess_img(self, raw_img):
        # some feature engineering to separate out red/white/yellow was done in that paper
        # maybe also do some horizon cropping? maybe not important for sim training
        # also maybe stacking a short sequence of images too?
        # !SB3 CNNPolicy normalizes images by default. 
        return raw_img

    def step(self, action):
        raw_img, reward, self.done = self.sim.step(
            steer=self.action_angles[action], speed=1.0, display=self.display
        )
        self.info={}

        observation = self.preprocess_img(raw_img)
        return observation, reward, self.done, self.info

    def reset(self):
        """Reset gets called right after init typically.
        This is actually where most of the setup comes in.

        Returns:
            _type_: _description_
        """

        self.done = False

        self.sim = Simulator(cameraSettings=self.camera_settings)

        startLocations = np.array(
            [
                [0, 1],
                [0, 2],
                [0, 3],
                [0, 4],
                [0, 5],
                [0, 6],
                [0, 7],
                [1, 0],
                [1, 1],
                [1, 4],
                [1, 7],
                [2, 2],
                [2, 3],
                [1, 4],
                [2, 5],
                [2, 6],
                [2, 0],
                [5, 1],
                [3, 2],
                [2, 4],
                [5, 5],
                [5, 6],
                [2, 7],
                [3, 0],
                [7, 1],
                [4, 2],
                [7, 3],
                [5, 4],
                [6, 5],
                [3, 7],
                [4, 0],
                [5, 2],
                [7, 4],
                [7, 5],
                [4, 7],
                [5, 0],
                [7, 2],
                [5, 7],
                [6, 0],
            ]
        )
        startLoc = random.randint(0, 38)

        self.sim.start(
            mapSeed="real",
            mapParameters=self.map_parameters,
            carParameters=self.car_parameters,
            startPoint=(
                int(startLocations[startLoc, 0]),
                int(startLocations[startLoc, 1]),
                0,
                0,
            ),
        )

        where, facing = self.sim.RealSense.parent.ackermann.pose()
        initial_img = self.sim.RealSense.camera.getImage(where, facing)

        observation = self.preprocess_img(initial_img)
        return observation  # reward, done, info can't be included



### Check Environment to make sure its good

In [None]:
from stable_baselines3.common.env_checker import check_env
img_size = (64,64)
cameraSettings = {
    # "resolution": (1920, 1080),
    "resolution": img_size,
    "fov": {"diagonal": 77}, # realsense diagonal fov is 77 degrees IIRC
    "angle": {"roll": 0, "pitch": 0, "yaw": 0}, # don't go too crazy with these, my code should be good up to like... 45 degrees probably? But the math gets unstable
    # "angle": {"roll": 13, "pitch": 30, "yaw": 30}, # don't go too crazy with these, my code should be good up to like... 45 degrees probably? But the math gets unstable
    "height": 66 # 8 pixels/inch - represents how high up the camera is relative to the road
}

mapParameters = {
    "loops": 1,
    "size": (6, 6),
    "expansions": 5,
    "complications": 4
}

# Can also pass car parameters for max/min speed, etc
carParameters = {
    "wheelbase": 6.5, # inches, influences how quickly the steering will turn the car.  Larger = slower
    "maxSteering": 30.0, # degrees, extreme (+ and -) values of steering
    "steeringOffset": 0.0, # degrees, since the car is rarely perfectly aligned
    "minVelocity": 0.0, # pixels/second, slower than this doesn't move at all.
    "maxVelocity": 480.0, # pixels/second, 8 pixels/inch, so if the car can move 5 fps that gives us 480 pixels/s top speed
}


env = CustomDuckieTownSim(cameraSettings, mapParameters, carParameters)
# It will check your custom environment and output additional warnings if needed
check_env(env)

## Training loop

In [2]:
from stable_baselines3.common.monitor import Monitor

def make_env(display, config):
    env = CustomDuckieTownSim(
        config["camera_settings"],
        config["map_parameters"],
        config["car_parameters"],
        config["actions"],
        display
    )
    env = Monitor(env)  # record stats such as returns
    return env

In [3]:
from stable_baselines3 import DQN
from stable_baselines3.dqn import CnnPolicy
from stable_baselines3.common.vec_env import DummyVecEnv, VecVideoRecorder
from torch import tensor
import wandb
from wandb.integration.sb3 import WandbCallback


def train(random_seed, return_dict):
    img_size = (64,64)
    cameraSettings = {
        # "resolution": (1920, 1080),
        "resolution": img_size,
        "fov": {"diagonal": 77}, # realsense diagonal fov is 77 degrees IIRC
        "angle": {"roll": 0, "pitch": 0, "yaw": 0}, # don't go too crazy with these, my code should be good up to like... 45 degrees probably? But the math gets unstable
        # "angle": {"roll": 13, "pitch": 30, "yaw": 30}, # don't go too crazy with these, my code should be good up to like... 45 degrees probably? But the math gets unstable
        "height": 66 # 8 pixels/inch - represents how high up the camera is relative to the road
    }
    
    mapParameters = {
        "loops": 1,
        "size": (6, 6),
        "expansions": 5,
        "complications": 4
    }
    
    # Can also pass car parameters for max/min speed, etc
    carParameters = {
        "wheelbase": 6.5, # inches, influences how quickly the steering will turn the car.  Larger = slower
        "maxSteering": 30.0, # degrees, extreme (+ and -) values of steering
        "steeringOffset": 0.0, # degrees, since the car is rarely perfectly aligned
        "minVelocity": 0.0, # pixels/second, slower than this doesn't move at all.
        "maxVelocity": 480.0, # pixels/second, 8 pixels/inch, so if the car can move 5 fps that gives us 480 pixels/s top speed
    }
    
    config = {
    "n_timesteps": 50000, # sb3 dqn runs go up to 1e7 at most
    "policy": 'CnnPolicy',
    "env":"CustomDuckieTown",
    "actions": [-30,0,30],
    "camera_settings":cameraSettings,
    "map_parameters":mapParameters,
    "car_parameters":carParameters,
    "learning_rate": 2.3e-3,
    "batch_size": 64,
    "buffer_size": 100000,
    "learning_starts": 1000,
    "gamma": 0.99,
    "target_update_interval": 10,
    "train_freq": 256,
    "gradient_steps": 128,
    "exploration_fraction": 0.16,
    "exploration_final_eps": 0.04,
    "seed":random_seed,
    }
    
    run = wandb.init(
        project="self_driving_cars",
        config=config,
        sync_tensorboard=True,  # auto-upload sb3's tensorboard metrics
        monitor_gym=True,  # auto-upload the videos of agents playing the game
        save_code=True,  # optional
    )

    env = make_env(False, config)
    # env = DummyVecEnv([make_env])
    
    # env = VecVideoRecorder(
    #     env,
    #     f"videos/{run.id}",
    #     record_video_trigger=lambda x: x % 2000 == 0,
    #     video_length=200
    # )
    
    model = DQN(
        config["policy"],
        env,
        learning_rate=config["learning_rate"],
        batch_size=config["batch_size"],
        buffer_size=config["buffer_size"],
        learning_starts=config["learning_starts"],
        gamma=config["gamma"],
        target_update_interval=config["target_update_interval"],
        train_freq=config["train_freq"],
        gradient_steps=config["gradient_steps"],
        exploration_fraction=config["exploration_fraction"],
        exploration_final_eps=config["exploration_final_eps"],
        # policy_kwargs=config["policy_kwargs"],
        tensorboard_log=f"runs/{run.id}",
        verbose=1,
        # seed=config["seed"]
    )
    
    trained_model = model.learn(
        total_timesteps=config["n_timesteps"],
        callback=WandbCallback(
            gradient_save_freq=100,
            model_save_path=f"models/{run.id}",
            verbose=0,
        ),
    )
    run.finish()
    return_dict[random_seed] = trained_model

In [4]:
from multiprocessing import Manager, Process

manager = Manager()
model_dict = manager.dict()
jobs = []
numWorkers = 1

for i in range(numWorkers):
    p = Process(target=train, args=(random.randint(0,100), model_dict))
    jobs.append(p)
    p.start()

for proc in jobs:
    proc.join()

print(model_dict.keys())

[34m[1mwandb[0m: Currently logged in as: [33mcurtiscjohnson[0m. Use [1m`wandb login --relogin`[0m to force relogin


Using cuda device
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.
Logging to runs/x1jr0vlm/DQN_1
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 59.8     |
|    ep_rew_mean      | 6.01     |
|    exploration_rate | 1        |
| time/               |          |
|    episodes         | 4        |
|    fps              | 159      |
|    time_elapsed     | 1        |
|    total_timesteps  | 239      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 58.9     |
|    ep_rew_mean      | 5.85     |
|    exploration_rate | 1        |
| time/               |          |
|    episodes         | 8        |
|    fps              | 160      |
|    time_elapsed     | 2        |
|    total_timesteps  | 471      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 52.9     |


Process Process-2:
Traceback (most recent call last):
  File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/tmp/ipykernel_158793/1870075194.py", line 94, in train
    trained_model = model.learn(
  File "/home/curtis/.local/lib/python3.8/site-packages/stable_baselines3/dqn/dqn.py", line 265, in learn
    return super().learn(
  File "/home/curtis/.local/lib/python3.8/site-packages/stable_baselines3/common/off_policy_algorithm.py", line 334, in learn
    rollout = self.collect_rollouts(
  File "/home/curtis/.local/lib/python3.8/site-packages/stable_baselines3/common/off_policy_algorithm.py", line 567, in collect_rollouts
    new_obs, rewards, dones, infos = env.step(actions)
  File "/home/curtis/.local/lib/python3.8/site-packages/stable_baselines3/common/vec_env/base_vec_env.py", line 163, in step
    return self.step_wa

KeyboardInterrupt: 

wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wand

# Validation

In [None]:
env = make_env(display=True)
obs = env.reset()
while True:
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, done, info = env.step(action)
    if done:
      obs = env.reset()