# Atari CrazyClimber

In the Atari game CrazyClimber, the goal is to reach the top of four buildings while avoiding obstacles.

## 1. Import Dependencies

In [10]:
# Uncomment to install required packages
# !pip install 'stable-baselines3[extra]'
# !pip install 'pyglet==1.5.27'

In [1]:
# Import Gymnasium and Stable Baseline modules
import gym
from stable_baselines3 import A2C
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_atari_env
import os

## 2. Get a Feel for the Environment

In [6]:
# Load environment
environment_name = "ALE/CrazyClimber-v5"
env = gym.make(environment_name)

A.L.E: Arcade Learning Environment (version 0.7.4+069f8bd)
[Powered by Stella]


In [25]:
# Action space
print(env.action_space)
print(env.action_space.sample())

Discrete(9)
7


In [27]:
# Observation space
print(env.observation_space)
print(env.observation_space.sample())

Box([[[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]

 [[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]

 [[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]

 ...

 [[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]

 [[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]

 [[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]], [[[255 255 255]
  [255 255 255]
  [255 255 255]
  ...
  [255 255 255]
  [255 255 255]
  [255 255 255]]

 [[255 255 255]
  [255 255 255]
  [255 255 255]
  ...
  [255 255 255]
  [255 255 255]
  [255 255 255]]

 [[255 255 255]
  [255 255 255]
  [255 255 255]
  ...
  [255 255 255]
  [255 255 255]
  [255 255 255]]

 ...

 [[255 255 255]
  [255 255 255]
  [255 255 255]
  ...
  [255 255 255]
  [255 255 255]
  [255 255 255]]

 [[255 255 255]
  [255 255 255]
  [255 255 255]
  ...
  [255 255 255]
  [255 255 255]
  [255 255 255]]

 [[255 255 255]
  [255 255 255]
  [255 255 255]
  ...
 

In [None]:
# Reset and view environment
env.reset()
env.render()

In [None]:
# Test the environment
EPISODES = 5

for episode in range(1, EPISODES + 1):
    # Setup
    done = False
    score = 0
    
    # Reset environment and get initial observation
    obs = env.reset()
    
    # RL Loop
    while not done:
        # Graphical view
        env.render()
        
        # Get one random available action
        action = env.action_space.sample()
        
        # Take that action
        obs, reward, done, info = env.step(action)
        
        # Update score
        score += reward
        
    # Print statistics
    print(f"Episode #{episode} - Score: {score}")

In [None]:
# Close environment
env.close()

## 3. Vectorise and Train Model 

In [7]:
# load environment
env = make_atari_env(environment_name, n_envs=4, seed=0)
env = VecFrameStack(env, n_stack=4)

In [8]:
# establish log math and load model
log_path = os.path.join("Training", "Logs")
model = A2C("CnnPolicy", env, verbose=1, tensorboard_log=log_path)

Using cpu device
Wrapping the env in a VecTransposeImage.


In [9]:
# train model, experiment with timesteps
model.learn(total_timesteps=500000)

Logging to Training/Logs/A2C_4
------------------------------------
| time/                 |          |
|    fps                | 322      |
|    iterations         | 100      |
|    time_elapsed       | 6        |
|    total_timesteps    | 2000     |
| train/                |          |
|    entropy_loss       | -0.72    |
|    explained_variance | 0.355    |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | 0.204    |
|    value_loss         | 0.51     |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 2.71e+03 |
|    ep_rew_mean        | 1.05e+04 |
| time/                 |          |
|    fps                | 321      |
|    iterations         | 200      |
|    time_elapsed       | 12       |
|    total_timesteps    | 4000     |
| train/                |          |
|    entropy_loss       | -0.707   |
|    explained_variance | 0.887    |
|    le

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 2.74e+03 |
|    ep_rew_mean        | 1.33e+04 |
| time/                 |          |
|    fps                | 320      |
|    iterations         | 1500     |
|    time_elapsed       | 93       |
|    total_timesteps    | 30000    |
| train/                |          |
|    entropy_loss       | -1.62    |
|    explained_variance | 0.997    |
|    learning_rate      | 0.0007   |
|    n_updates          | 1499     |
|    policy_loss        | -0.0362  |
|    value_loss         | 0.0992   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 2.72e+03 |
|    ep_rew_mean        | 1.32e+04 |
| time/                 |          |
|    fps                | 320      |
|    iterations         | 1600     |
|    time_elapsed       | 99       |
|    total_timesteps    | 32000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 2.7e+03  |
|    ep_rew_mean        | 1.33e+04 |
| time/                 |          |
|    fps                | 320      |
|    iterations         | 2900     |
|    time_elapsed       | 181      |
|    total_timesteps    | 58000    |
| train/                |          |
|    entropy_loss       | -0.988   |
|    explained_variance | 0.993    |
|    learning_rate      | 0.0007   |
|    n_updates          | 2899     |
|    policy_loss        | -0.164   |
|    value_loss         | 0.112    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 2.69e+03 |
|    ep_rew_mean        | 1.33e+04 |
| time/                 |          |
|    fps                | 320      |
|    iterations         | 3000     |
|    time_elapsed       | 187      |
|    total_timesteps    | 60000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 2.62e+03 |
|    ep_rew_mean        | 1.38e+04 |
| time/                 |          |
|    fps                | 319      |
|    iterations         | 4300     |
|    time_elapsed       | 269      |
|    total_timesteps    | 86000    |
| train/                |          |
|    entropy_loss       | -0.795   |
|    explained_variance | 0.985    |
|    learning_rate      | 0.0007   |
|    n_updates          | 4299     |
|    policy_loss        | 0.0817   |
|    value_loss         | 1.62     |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 2.62e+03 |
|    ep_rew_mean        | 1.41e+04 |
| time/                 |          |
|    fps                | 319      |
|    iterations         | 4400     |
|    time_elapsed       | 275      |
|    total_timesteps    | 88000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 2.63e+03 |
|    ep_rew_mean        | 1.8e+04  |
| time/                 |          |
|    fps                | 319      |
|    iterations         | 5600     |
|    time_elapsed       | 350      |
|    total_timesteps    | 112000   |
| train/                |          |
|    entropy_loss       | -0.932   |
|    explained_variance | 0.956    |
|    learning_rate      | 0.0007   |
|    n_updates          | 5599     |
|    policy_loss        | 0.504    |
|    value_loss         | 4.38     |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 2.63e+03 |
|    ep_rew_mean        | 1.84e+04 |
| time/                 |          |
|    fps                | 319      |
|    iterations         | 5700     |
|    time_elapsed       | 356      |
|    total_timesteps    | 114000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 2.68e+03 |
|    ep_rew_mean        | 2.51e+04 |
| time/                 |          |
|    fps                | 319      |
|    iterations         | 7000     |
|    time_elapsed       | 437      |
|    total_timesteps    | 140000   |
| train/                |          |
|    entropy_loss       | -0.573   |
|    explained_variance | 0.999    |
|    learning_rate      | 0.0007   |
|    n_updates          | 6999     |
|    policy_loss        | -0.0821  |
|    value_loss         | 0.342    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 2.68e+03 |
|    ep_rew_mean        | 2.54e+04 |
| time/                 |          |
|    fps                | 319      |
|    iterations         | 7100     |
|    time_elapsed       | 443      |
|    total_timesteps    | 142000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 2.8e+03  |
|    ep_rew_mean        | 3.24e+04 |
| time/                 |          |
|    fps                | 319      |
|    iterations         | 8400     |
|    time_elapsed       | 525      |
|    total_timesteps    | 168000   |
| train/                |          |
|    entropy_loss       | -0.212   |
|    explained_variance | 0.999    |
|    learning_rate      | 0.0007   |
|    n_updates          | 8399     |
|    policy_loss        | -0.0543  |
|    value_loss         | 0.216    |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 2.81e+03  |
|    ep_rew_mean        | 3.29e+04  |
| time/                 |           |
|    fps                | 319       |
|    iterations         | 8500      |
|    time_elapsed       | 531       |
|    total_timesteps    | 170000    |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 2.79e+03 |
|    ep_rew_mean        | 3.69e+04 |
| time/                 |          |
|    fps                | 319      |
|    iterations         | 9700     |
|    time_elapsed       | 607      |
|    total_timesteps    | 194000   |
| train/                |          |
|    entropy_loss       | -0.172   |
|    explained_variance | 0.98     |
|    learning_rate      | 0.0007   |
|    n_updates          | 9699     |
|    policy_loss        | -0.0644  |
|    value_loss         | 0.972    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 2.81e+03 |
|    ep_rew_mean        | 3.67e+04 |
| time/                 |          |
|    fps                | 319      |
|    iterations         | 9800     |
|    time_elapsed       | 613      |
|    total_timesteps    | 196000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 2.78e+03 |
|    ep_rew_mean        | 3.76e+04 |
| time/                 |          |
|    fps                | 319      |
|    iterations         | 11000    |
|    time_elapsed       | 688      |
|    total_timesteps    | 220000   |
| train/                |          |
|    entropy_loss       | -0.0747  |
|    explained_variance | 0.617    |
|    learning_rate      | 0.0007   |
|    n_updates          | 10999    |
|    policy_loss        | -0.825   |
|    value_loss         | 93.3     |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 2.77e+03  |
|    ep_rew_mean        | 3.75e+04  |
| time/                 |           |
|    fps                | 319       |
|    iterations         | 11100     |
|    time_elapsed       | 694       |
|    total_timesteps    | 222000    |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 2.79e+03 |
|    ep_rew_mean        | 3.73e+04 |
| time/                 |          |
|    fps                | 319      |
|    iterations         | 12300    |
|    time_elapsed       | 770      |
|    total_timesteps    | 246000   |
| train/                |          |
|    entropy_loss       | -0.13    |
|    explained_variance | 0.997    |
|    learning_rate      | 0.0007   |
|    n_updates          | 12299    |
|    policy_loss        | 0.0813   |
|    value_loss         | 0.523    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 2.79e+03 |
|    ep_rew_mean        | 3.71e+04 |
| time/                 |          |
|    fps                | 319      |
|    iterations         | 12400    |
|    time_elapsed       | 777      |
|    total_timesteps    | 248000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 2.76e+03 |
|    ep_rew_mean        | 3.73e+04 |
| time/                 |          |
|    fps                | 319      |
|    iterations         | 13700    |
|    time_elapsed       | 857      |
|    total_timesteps    | 274000   |
| train/                |          |
|    entropy_loss       | -0.657   |
|    explained_variance | 0.891    |
|    learning_rate      | 0.0007   |
|    n_updates          | 13699    |
|    policy_loss        | 0.0398   |
|    value_loss         | 37.5     |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 2.76e+03 |
|    ep_rew_mean        | 3.74e+04 |
| time/                 |          |
|    fps                | 319      |
|    iterations         | 13800    |
|    time_elapsed       | 863      |
|    total_timesteps    | 276000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 2.8e+03  |
|    ep_rew_mean        | 3.79e+04 |
| time/                 |          |
|    fps                | 319      |
|    iterations         | 15000    |
|    time_elapsed       | 940      |
|    total_timesteps    | 300000   |
| train/                |          |
|    entropy_loss       | -0.328   |
|    explained_variance | 0.9      |
|    learning_rate      | 0.0007   |
|    n_updates          | 14999    |
|    policy_loss        | 1.43     |
|    value_loss         | 7.89     |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 2.79e+03 |
|    ep_rew_mean        | 3.78e+04 |
| time/                 |          |
|    fps                | 318      |
|    iterations         | 15100    |
|    time_elapsed       | 946      |
|    total_timesteps    | 302000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 2.82e+03 |
|    ep_rew_mean        | 3.95e+04 |
| time/                 |          |
|    fps                | 318      |
|    iterations         | 16300    |
|    time_elapsed       | 1022     |
|    total_timesteps    | 326000   |
| train/                |          |
|    entropy_loss       | -0.0186  |
|    explained_variance | 0.389    |
|    learning_rate      | 0.0007   |
|    n_updates          | 16299    |
|    policy_loss        | -0.00146 |
|    value_loss         | 170      |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 2.83e+03 |
|    ep_rew_mean        | 3.96e+04 |
| time/                 |          |
|    fps                | 318      |
|    iterations         | 16400    |
|    time_elapsed       | 1028     |
|    total_timesteps    | 328000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 2.74e+03 |
|    ep_rew_mean        | 3.89e+04 |
| time/                 |          |
|    fps                | 318      |
|    iterations         | 17600    |
|    time_elapsed       | 1104     |
|    total_timesteps    | 352000   |
| train/                |          |
|    entropy_loss       | -0.035   |
|    explained_variance | 0.228    |
|    learning_rate      | 0.0007   |
|    n_updates          | 17599    |
|    policy_loss        | 0.0172   |
|    value_loss         | 1.43     |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 2.75e+03 |
|    ep_rew_mean        | 3.89e+04 |
| time/                 |          |
|    fps                | 318      |
|    iterations         | 17700    |
|    time_elapsed       | 1110     |
|    total_timesteps    | 354000   |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 2.81e+03  |
|    ep_rew_mean        | 3.97e+04  |
| time/                 |           |
|    fps                | 318       |
|    iterations         | 18900     |
|    time_elapsed       | 1186      |
|    total_timesteps    | 378000    |
| train/                |           |
|    entropy_loss       | -0.00403  |
|    explained_variance | 0.955     |
|    learning_rate      | 0.0007    |
|    n_updates          | 18899     |
|    policy_loss        | -0.000447 |
|    value_loss         | 0.672     |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 2.81e+03 |
|    ep_rew_mean        | 3.98e+04 |
| time/                 |          |
|    fps                | 318      |
|    iterations         | 19000    |
|    time_elapsed       | 1192     |
|    total_timesteps    | 380000   |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 2.78e+03 |
|    ep_rew_mean        | 3.85e+04 |
| time/                 |          |
|    fps                | 318      |
|    iterations         | 20200    |
|    time_elapsed       | 1266     |
|    total_timesteps    | 404000   |
| train/                |          |
|    entropy_loss       | -0.105   |
|    explained_variance | 0.973    |
|    learning_rate      | 0.0007   |
|    n_updates          | 20199    |
|    policy_loss        | -0.00952 |
|    value_loss         | 1.53     |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 2.79e+03 |
|    ep_rew_mean        | 3.86e+04 |
| time/                 |          |
|    fps                | 318      |
|    iterations         | 20300    |
|    time_elapsed       | 1273     |
|    total_timesteps    | 406000   |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 2.8e+03   |
|    ep_rew_mean        | 3.89e+04  |
| time/                 |           |
|    fps                | 318       |
|    iterations         | 21500     |
|    time_elapsed       | 1348      |
|    total_timesteps    | 430000    |
| train/                |           |
|    entropy_loss       | -0.172    |
|    explained_variance | 0.548     |
|    learning_rate      | 0.0007    |
|    n_updates          | 21499     |
|    policy_loss        | -0.000324 |
|    value_loss         | 146       |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 2.79e+03  |
|    ep_rew_mean        | 3.88e+04  |
| time/                 |           |
|    fps                | 318       |
|    iterations         | 21600     |
|    time_elapsed       | 1354      |
|    total_timesteps    | 432000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 2.78e+03  |
|    ep_rew_mean        | 3.94e+04  |
| time/                 |           |
|    fps                | 318       |
|    iterations         | 22800     |
|    time_elapsed       | 1430      |
|    total_timesteps    | 456000    |
| train/                |           |
|    entropy_loss       | -0.00751  |
|    explained_variance | 0.102     |
|    learning_rate      | 0.0007    |
|    n_updates          | 22799     |
|    policy_loss        | -0.000414 |
|    value_loss         | 98.7      |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 2.77e+03 |
|    ep_rew_mean        | 3.94e+04 |
| time/                 |          |
|    fps                | 318      |
|    iterations         | 22900    |
|    time_elapsed       | 1436     |
|    total_timesteps    | 458000   |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 2.76e+03 |
|    ep_rew_mean        | 3.9e+04  |
| time/                 |          |
|    fps                | 318      |
|    iterations         | 24100    |
|    time_elapsed       | 1511     |
|    total_timesteps    | 482000   |
| train/                |          |
|    entropy_loss       | -0.213   |
|    explained_variance | 0.996    |
|    learning_rate      | 0.0007   |
|    n_updates          | 24099    |
|    policy_loss        | -0.104   |
|    value_loss         | 0.646    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 2.76e+03 |
|    ep_rew_mean        | 3.91e+04 |
| time/                 |          |
|    fps                | 318      |
|    iterations         | 24200    |
|    time_elapsed       | 1517     |
|    total_timesteps    | 484000   |
| train/                |          |
|

<stable_baselines3.a2c.a2c.A2C at 0x17fb19a50>

## 4. Save and Reload Model

In [11]:
model_path = os.path.join("Training", "Saved Models", "A2C_CrazyClimber_500k_Model")

In [12]:
model.save(model_path)

In [13]:
del model

## 5. Evaluate and Test

In [10]:
model_path = os.path.join("Training", "Saved Models", "A2C_CrazyClimber_500k_Model")

In [11]:
model = A2C.load(model_path)

In [12]:
env = make_atari_env(environment_name, n_envs=1, seed=0)
env = VecFrameStack(env, n_stack=4)

In [14]:
evaluate_policy(model, env, n_eval_episodes=10, render=True)

(35360.0, 5237.785791725355)

In [18]:
env.close()

In [None]:
# Manually test the model using "model.predict"
EPISODES = 5

for episode in range(1, EPISODES + 1):
    # Setup
    done = False
    score = 0
    
    # Reset environment and get initial observation
    obs = env.reset()
    
    # RL Loop
    while not done:
        # Graphical view
        env.render()
        
        # Get one random available action
        action, _ = model.predict(obs)
        
        # Take that action
        obs, reward, done, info = env.step(action)
        
        # Update score
        score += reward
        
    # Print statistics
    print(f"Episode #{episode} - Score: {score}")

In [None]:
env.close()