Julien Gauthier

# Reinforcement Learning (DDPG - HER) Project : Self Parking Car

##### Install the correct dependencies :

In [None]:
%pip install stable-baselines3[extra] highway-env

In [1]:
import stable_baselines3 as sb3
import gymnasium as gym
from stable_baselines3 import HerReplayBuffer, DDPG
from tqdm.notebook import trange

In [None]:
# ONLY FOR VIDEO RECORDING : (OPTIONNAL IMPORTS)
import sys
%pip install tensorboardx gym pyvirtualdisplay
%pip install ffmpeg-python
sys.path.insert(0, 'HighwayEnv/scripts')
from highway_env.utils import record_videos, show_videos

### We'll be using the "parking-v0" (third-party) environment with a DDPG-HerReplayBuffer agent.

In [None]:
env = gym.make("parking-v0", render_mode='human')

model = DDPG(
    "MultiInputPolicy",
    env,
    replay_buffer_class=HerReplayBuffer,
    replay_buffer_kwargs=dict(n_sampled_goal=4, goal_selection_strategy='future'),
    verbose=1,
    device='cuda'
)


### Insert the parameters for the training :

In [None]:
NUM_EPISODES = 10
NUM_STEPS = 30000

## Training the agent in the Parking environment :

In [None]:
model.learn(NUM_STEPS)

env = gym.make('parking-v0', render_mode='rgb_array')

#env = record_videos(env) # ONLY FOR VIDEO RECORDING

for episode in trange ((NUM_EPISODES), desc="Test episodes"):
    obs, info = env.reset()
    done = truncated = False
    while not (done or truncated):
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, truncated, info = env.step(action)
env.close()

#show_videos() # ONLY FOR VIDEO RECORDING


#### Save your agent for future use (execute this before restarting the kernel if the training crashes):

In [19]:
model.save('pre-trained-model.h5f')

#### Reload and test a saved agent from the directory :

In [1]:
# Reload the model :
env = gym.make("parking-v0", render_mode='human')

# Load the trained agent (MODIFY THIS PART) :
loaded_model = DDPG.load('pre-trained-DDPG-HER.h5f', env=env)
NUM_TESTS = 200


# Test the loaded agent :
obs, _ = env.reset()
for episode in range(NUM_TESTS):
    obs, info = env.reset()
    done = truncated = False
    while not (done or truncated):
        action, _ = loaded_model.predict(obs, deterministic=True)
        obs, reward, done, truncated, info = env.step(action)
env.close()


NameError: name 'gym' is not defined

##### -> Pre-trained model is available for demonstration in the directory : 'pre-trained-DDPG-HER.h5f'