# SWEN 711 - Spring 24 - Term Project

### Code written by: Hemanth Chebiyam, Sanjeev Vijayakumar

### DQN Algorithm on OpenAI Gym Car Racing Environment

# 1. Import Dependencies

In [1]:
import gymnasium as gym
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
import os

# 2. Test Environment

In [2]:
environment_name = "CarRacing-v2"
env = gym.make(environment_name, render_mode="human")

In [3]:
env.reset()

(array([[[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        ...,
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]]], dtype=uint8),
 {})

In [4]:
env.action_space

Box([-1.  0.  0.], 1.0, (3,), float32)

In [5]:
env.observation_space

Box(0, 255, (96, 96, 3), uint8)

In [6]:
env.close()

# 3. Simulating without Training

In [None]:
environment_name = "CarRacing-v2"
env = gym.make(environment_name, render_mode="human")

episodes = 5
for episode in range(episodes):
    observation, info = env.reset(seed=42)
    score = 0 
    done = False

    while not done:
        action = env.action_space.sample()  # this is where you would insert your policy
        observation, reward, terminated, truncated, info = env.step(action)
        score += reward

        if terminated or truncated:
            break

    print('Episode:{} Score:{}'.format(episode + 1, score))

env.close()

# 4. Simulating after basic training (~500k timesteps)

In [8]:
env = gym.make(environment_name, render_mode="human")
env = DummyVecEnv([lambda: env])

In [9]:
dqn_path = os.path.join('Training', 'Saved Models', 'DQN_500k_model')

In [None]:
model = DQN.load(dqn_path, env)

In [None]:
environment_name = "CarRacing-v2"
env = gym.make(environment_name, render_mode="human")

episodes = 5
for episode in range(episodes):
    observation, info = env.reset(seed=42)
    score = 0 
    done = False

    while not done:
        action, _ = model.predict(observation)  # this is where you would insert your policy
        observation, reward, terminated, truncated, info = env.step(action)
        score += reward

        if terminated or truncated:
            break

    print('Episode:{} Score:{}'.format(episode + 1, score))

env.close()

# 5. Simulating after more training (~2 mil timesteps)

In [12]:
env = gym.make(environment_name, render_mode="human")
env = DummyVecEnv([lambda: env])

In [13]:
dqn_path = os.path.join('Training', 'Saved Models', 'DQN_2mil_model')

In [None]:
model = DQN.load(dqn_path, env)

In [None]:
environment_name = "CarRacing-v2"
env = gym.make(environment_name, render_mode="human")
episodes = 5
for episode in range(episodes):
    observation, info = env.reset(seed=42)
    score = 0 
    done = False

    while not done:
        action, _ = model.predict(observation)  # this is where you would insert your policy
        observation, reward, terminated, truncated, info = env.step(action)
        score += reward

        if terminated or truncated:
            break

    print('Episode:{} Score:{}'.format(episode + 1, score))

env.close()