# Atari CrazyClimber

In the Atari game CrazyClimber, the goal is to reach the top of four buildings while avoiding obstacles.

## 1. Import Dependencies

In [10]:
# Uncomment to install required packages
# !pip install 'stable-baselines3[extra]'
# !pip install 'pyglet==1.5.27'

In [1]:
# Import Gymnasium and Stable Baseline modules
import gym
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_atari_env
import os

## 2. Get a Feel for the Environment

In [2]:
# Load environment
environment_name = "ALE/CrazyClimber-v5"
env = gym.make(environment_name)

A.L.E: Arcade Learning Environment (version 0.7.4+069f8bd)
[Powered by Stella]


## 3. Vectorise and Train Model 

In [3]:
# load environment
env = make_atari_env(environment_name, n_envs=4, seed=0)
env = VecFrameStack(env, n_stack=4)

In [4]:
# establish log math and load model
log_path = os.path.join("Training", "Logs")
model = PPO("MlpPolicy", env, verbose=1, tensorboard_log=log_path)

Using cpu device
Wrapping the env in a VecTransposeImage.


In [5]:
# train model, experiment with timesteps
model.learn(total_timesteps=500000)

Logging to Training/Logs/PPO_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3.03e+03 |
|    ep_rew_mean     | 1.07e+04 |
| time/              |          |
|    fps             | 621      |
|    iterations      | 1        |
|    time_elapsed    | 13       |
|    total_timesteps | 8192     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.16e+03    |
|    ep_rew_mean          | 1.2e+04     |
| time/                   |             |
|    fps                  | 350         |
|    iterations           | 2           |
|    time_elapsed         | 46          |
|    total_timesteps      | 16384       |
| train/                  |             |
|    approx_kl            | 0.017487943 |
|    clip_fraction        | 0.22        |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.18       |
|    explained_variance   | -0.0153     |

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.8e+03     |
|    ep_rew_mean          | 1.28e+04    |
| time/                   |             |
|    fps                  | 249         |
|    iterations           | 11          |
|    time_elapsed         | 361         |
|    total_timesteps      | 90112       |
| train/                  |             |
|    approx_kl            | 0.013630414 |
|    clip_fraction        | 0.177       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.93       |
|    explained_variance   | 0.664       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.836       |
|    n_updates            | 100         |
|    policy_gradient_loss | -0.0163     |
|    value_loss           | 2.4         |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.79e+

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.87e+03    |
|    ep_rew_mean          | 1.72e+04    |
| time/                   |             |
|    fps                  | 240         |
|    iterations           | 21          |
|    time_elapsed         | 716         |
|    total_timesteps      | 172032      |
| train/                  |             |
|    approx_kl            | 0.016395358 |
|    clip_fraction        | 0.204       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.72       |
|    explained_variance   | 0.747       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.5         |
|    n_updates            | 200         |
|    policy_gradient_loss | -0.0149     |
|    value_loss           | 4.49        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.92e+

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.74e+03    |
|    ep_rew_mean          | 2.46e+04    |
| time/                   |             |
|    fps                  | 240         |
|    iterations           | 31          |
|    time_elapsed         | 1054        |
|    total_timesteps      | 253952      |
| train/                  |             |
|    approx_kl            | 0.021955542 |
|    clip_fraction        | 0.257       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.61       |
|    explained_variance   | 0.803       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.65        |
|    n_updates            | 300         |
|    policy_gradient_loss | -0.0154     |
|    value_loss           | 5.7         |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.8e+03 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.9e+03     |
|    ep_rew_mean          | 2.95e+04    |
| time/                   |             |
|    fps                  | 237         |
|    iterations           | 41          |
|    time_elapsed         | 1412        |
|    total_timesteps      | 335872      |
| train/                  |             |
|    approx_kl            | 0.023220083 |
|    clip_fraction        | 0.237       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | 0.813       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.68        |
|    n_updates            | 400         |
|    policy_gradient_loss | -0.0138     |
|    value_loss           | 7.58        |
-----------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.85e+03  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.87e+03    |
|    ep_rew_mean          | 2.92e+04    |
| time/                   |             |
|    fps                  | 237         |
|    iterations           | 51          |
|    time_elapsed         | 1756        |
|    total_timesteps      | 417792      |
| train/                  |             |
|    approx_kl            | 0.024007382 |
|    clip_fraction        | 0.251       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.31       |
|    explained_variance   | 0.796       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.09        |
|    n_updates            | 500         |
|    policy_gradient_loss | -0.0106     |
|    value_loss           | 8.19        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.89e+

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.98e+03    |
|    ep_rew_mean          | 3.43e+04    |
| time/                   |             |
|    fps                  | 237         |
|    iterations           | 61          |
|    time_elapsed         | 2107        |
|    total_timesteps      | 499712      |
| train/                  |             |
|    approx_kl            | 0.026486693 |
|    clip_fraction        | 0.239       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.21       |
|    explained_variance   | 0.826       |
|    learning_rate        | 0.0003      |
|    loss                 | 4.99        |
|    n_updates            | 600         |
|    policy_gradient_loss | -0.0102     |
|    value_loss           | 7.9         |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 3.01e+03

<stable_baselines3.ppo.ppo.PPO at 0x28e6bd060>

## 4. Save and Reload Model

In [6]:
model_path = os.path.join("Training", "Saved Models", "PPO_CrazyClimber_500k_Model")

In [7]:
model.save(model_path)

In [8]:
del model

## 5. Evaluate and Test

In [9]:
model_path = os.path.join("Training", "Saved Models", "PPO_CrazyClimber_500k_Model")

In [10]:
model = PPO.load(model_path)

In [11]:
env = make_atari_env(environment_name, n_envs=1, seed=0)
env = VecFrameStack(env, n_stack=4)

In [12]:
evaluate_policy(model, env, n_eval_episodes=10, render=True)

  logger.warn(


(24970.0, 4771.1738597540125)

In [18]:
env.close()

In [None]:
# Manually test the model using "model.predict"
EPISODES = 5

for episode in range(1, EPISODES + 1):
    # Setup
    done = False
    score = 0
    
    # Reset environment and get initial observation
    obs = env.reset()
    
    # RL Loop
    while not done:
        # Graphical view
        env.render()
        
        # Get one random available action
        action, _ = model.predict(obs)
        
        # Take that action
        obs, reward, done, info = env.step(action)
        
        # Update score
        score += reward
        
    # Print statistics
    print(f"Episode #{episode} - Score: {score}")

In [None]:
env.close()