In [None]:
#############################
This code was written for the purpose of creating a dataset of 6 different algorithms effects on different environments in the OpenAI gym documentation
It is written in a way where the code can be easily interchanged to run experiments on each environment, quickly changing the algorithm used and the amount
of episodes of training for each environment. Logging for this information has been done manually by copying the results of each experiment into a 
separate spreadsheet.

This code was heavily inspired by
https://www.youtube.com/watch?v=Mut_u40Sqz4&ab_channel=NicholasRenotte
    
which takes a lot of code from the stable baselines documentation

###############################

In [1]:
import os
import gym
from stable_baselines3 import PPO, TD3, DDPG, A2C , DQN, SAC
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
import numpy as np
import time
algorithm_name = "PPO"

environment_name = 'CarRacing-v0'
#environment_name = 'Humanoid-v2'
#environment_names = ['CartPole-v1', 'Acrobot-v1',
#'MountainCar-v0', 'MountainCarContinuous-v0', 'LunarLander-v2', 
#'BipedalWalker-v3', 'CarRacing-v0', Pendulum-v1]
#"BipedalWalker-v3", hardcore=True to make bipedal hardcore environment
#Mujoco environment is InvertedDoublePendulum-v2


env = gym.make(environment_name)



In [18]:

episodes = 5
scores = []

for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0
    
    while not done:
        env.render()
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        score += reward
    
    scores.append(score)
    print('Episode:{} Score:{}'.format(episode, score))

env.close()


#below will calculate and print mean score and standard deviation, copy and paste this into a csv file for later
#data analysis
mean_score = np.mean(scores)
std_score = np.std(scores)


print('Mean Average Score: {:.2f}'.format(mean_score))
print('Standard Deviation: {:.2f}'.format(std_score))

Episode:1 Score:14.0
Episode:2 Score:13.0
Episode:3 Score:10.0
Episode:4 Score:14.0
Episode:5 Score:24.0
Mean Average Score: 15.00
Standard Deviation: 4.73


In [2]:
log_path = os.path.join('Training', 'Logs')

if algorithm_name == 'PPO':
    model = PPO('CnnPolicy', env, verbose=1, tensorboard_log=log_path)
elif algorithm_name == 'TD3':
    model = TD3('MlpPolicy', env, verbose=1, tensorboard_log=log_path)
elif algorithm_name == 'DDPG':
    model = DDPG('MlpPolicy', env, verbose=1, tensorboard_log=log_path)
elif algorithm_name == 'A2C':
    model = A2C('MlpPolicy', env, verbose=1, tensorboard_log=log_path)
elif algorithm_name == 'DQN':
    model = DQN('MlpPolicy', env, verbose=1, tensorboard_log=log_path)
elif algorithm_name == 'SAC':
    model = SAC('MlpPolicy', env, verbose=1, tensorboard_log=log_path)
else:
    raise ValueError(f"Invalid algorithm name: {algorithm_name}")

env = gym.make(environment_name)
env = DummyVecEnv([lambda: env])

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


In [3]:
start_time = time.time()
number_of_episodes = 500000


model.learn(total_timesteps=number_of_episodes)

end_time = time.time()
elapsed_time_ms = (end_time - start_time) * 1000
elapsed_time_sec = elapsed_time_ms / 1000

print("Training time: {:.3f} sec".format(elapsed_time_sec))

Track generation: 1155..1448 -> 293-tiles track
Logging to Training/Logs/PPO_113
Track generation: 992..1244 -> 252-tiles track
Track generation: 1124..1409 -> 285-tiles track
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -51.8    |
| time/              |          |
|    fps             | 236      |
|    iterations      | 1        |
|    time_elapsed    | 8        |
|    total_timesteps | 2048     |
---------------------------------
Track generation: 1152..1444 -> 292-tiles track
Track generation: 1231..1543 -> 312-tiles track
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | -51.6        |
| time/                   |              |
|    fps                  | 150          |
|    iterations           | 2            |
|    time_elapsed         | 27           |
|    total_timesteps      | 4096         |
|

Track generation: 1055..1331 -> 276-tiles track
Track generation: 1312..1644 -> 332-tiles track
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | -46.9       |
| time/                   |             |
|    fps                  | 116         |
|    iterations           | 10          |
|    time_elapsed         | 175         |
|    total_timesteps      | 20480       |
| train/                  |             |
|    approx_kl            | 0.017565988 |
|    clip_fraction        | 0.175       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.01       |
|    explained_variance   | 0.295       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0105     |
|    n_updates            | 90          |
|    policy_gradient_loss | -0.0239     |
|    std                  | 0.915       |
|    value_loss           | 0.2         |
----------------------

Track generation: 1111..1396 -> 285-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1142..1432 -> 290-tiles track
Track generation: 1003..1263 -> 260-tiles track
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | -36         |
| time/                   |             |
|    fps                  | 112         |
|    iterations           | 18          |
|    time_elapsed         | 326         |
|    total_timesteps      | 36864       |
| train/                  |             |
|    approx_kl            | 0.039719738 |
|    clip_fraction        | 0.276       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.66       |
|    explained_variance   | 0.936       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00461    |
|    n_updates            | 170         |
|    policy_gradient

Track generation: 1028..1296 -> 268-tiles track
Track generation: 1098..1383 -> 285-tiles track
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | -15.4       |
| time/                   |             |
|    fps                  | 111         |
|    iterations           | 26          |
|    time_elapsed         | 479         |
|    total_timesteps      | 53248       |
| train/                  |             |
|    approx_kl            | 0.055273205 |
|    clip_fraction        | 0.361       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.35       |
|    explained_variance   | 0.978       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.133       |
|    n_updates            | 250         |
|    policy_gradient_loss | -0.031      |
|    std                  | 0.739       |
|    value_loss           | 0.766       |
----------------------

Track generation: 1049..1320 -> 271-tiles track
Track generation: 1254..1572 -> 318-tiles track
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1e+03      |
|    ep_rew_mean          | 27.3       |
| time/                   |            |
|    fps                  | 110        |
|    iterations           | 34         |
|    time_elapsed         | 630        |
|    total_timesteps      | 69632      |
| train/                  |            |
|    approx_kl            | 0.04865086 |
|    clip_fraction        | 0.41       |
|    clip_range           | 0.2        |
|    entropy_loss         | -3.21      |
|    explained_variance   | 0.986      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.199      |
|    n_updates            | 330        |
|    policy_gradient_loss | -0.00516   |
|    std                  | 0.707      |
|    value_loss           | 1.05       |
----------------------------------------
Tr

Track generation: 1002..1257 -> 255-tiles track
Track generation: 1010..1267 -> 257-tiles track
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1e+03      |
|    ep_rew_mean          | 80.8       |
| time/                   |            |
|    fps                  | 109        |
|    iterations           | 42         |
|    time_elapsed         | 782        |
|    total_timesteps      | 86016      |
| train/                  |            |
|    approx_kl            | 0.08482377 |
|    clip_fraction        | 0.407      |
|    clip_range           | 0.2        |
|    entropy_loss         | -3.07      |
|    explained_variance   | 0.865      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.593      |
|    n_updates            | 410        |
|    policy_gradient_loss | -0.00925   |
|    std                  | 0.678      |
|    value_loss           | 4.14       |
----------------------------------------
Tr

Track generation: 1228..1539 -> 311-tiles track
Track generation: 1148..1439 -> 291-tiles track
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 978        |
|    ep_rew_mean          | 106        |
| time/                   |            |
|    fps                  | 109        |
|    iterations           | 51         |
|    time_elapsed         | 952        |
|    total_timesteps      | 104448     |
| train/                  |            |
|    approx_kl            | 0.10540496 |
|    clip_fraction        | 0.498      |
|    clip_range           | 0.2        |
|    entropy_loss         | -3.04      |
|    explained_variance   | 0.881      |
|    learning_rate        | 0.0003     |
|    loss                 | 2.52       |
|    n_updates            | 500        |
|    policy_gradient_loss | 0.00895    |
|    std                  | 0.671      |
|    value_loss           | 11.9       |
----------------------------------------
Tr

Track generation: 1077..1350 -> 273-tiles track
Track generation: 1074..1347 -> 273-tiles track
Track generation: 1163..1458 -> 295-tiles track
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 969         |
|    ep_rew_mean          | 174         |
| time/                   |             |
|    fps                  | 109         |
|    iterations           | 60          |
|    time_elapsed         | 1121        |
|    total_timesteps      | 122880      |
| train/                  |             |
|    approx_kl            | 0.094212025 |
|    clip_fraction        | 0.509       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.03       |
|    explained_variance   | 0.893       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.21        |
|    n_updates            | 590         |
|    policy_gradient_loss | 0.00736     |
|    std                  | 0.665       |
|    value_loss 

Track generation: 1308..1639 -> 331-tiles track
Track generation: 1098..1377 -> 279-tiles track
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 969        |
|    ep_rew_mean          | 251        |
| time/                   |            |
|    fps                  | 109        |
|    iterations           | 68         |
|    time_elapsed         | 1274       |
|    total_timesteps      | 139264     |
| train/                  |            |
|    approx_kl            | 0.05218497 |
|    clip_fraction        | 0.456      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.94      |
|    explained_variance   | 0.807      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.826      |
|    n_updates            | 670        |
|    policy_gradient_loss | 0.00633    |
|    std                  | 0.646      |
|    value_loss           | 4.53       |
----------------------------------------
Tr

Track generation: 1106..1386 -> 280-tiles track
Track generation: 1038..1305 -> 267-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1343..1682 -> 339-tiles track
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 969        |
|    ep_rew_mean          | 287        |
| time/                   |            |
|    fps                  | 109        |
|    iterations           | 76         |
|    time_elapsed         | 1426       |
|    total_timesteps      | 155648     |
| train/                  |            |
|    approx_kl            | 0.05292443 |
|    clip_fraction        | 0.354      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.79      |
|    explained_variance   | 0.97       |
|    learning_rate        | 0.0003     |
|    loss                 | 0.354      |
|    n_updates            | 750        |
|    policy_gradient_loss | 0.0103    

Track generation: 1234..1547 -> 313-tiles track
Track generation: 1218..1526 -> 308-tiles track
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 969        |
|    ep_rew_mean          | 311        |
| time/                   |            |
|    fps                  | 108        |
|    iterations           | 84         |
|    time_elapsed         | 1579       |
|    total_timesteps      | 172032     |
| train/                  |            |
|    approx_kl            | 0.08181356 |
|    clip_fraction        | 0.429      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.79      |
|    explained_variance   | 0.963      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.2        |
|    n_updates            | 830        |
|    policy_gradient_loss | 0.00477    |
|    std                  | 0.616      |
|    value_loss           | 7.82       |
----------------------------------------
Tr

Track generation: 1238..1552 -> 314-tiles track
Track generation: 1192..1494 -> 302-tiles track
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 972        |
|    ep_rew_mean          | 360        |
| time/                   |            |
|    fps                  | 108        |
|    iterations           | 93         |
|    time_elapsed         | 1750       |
|    total_timesteps      | 190464     |
| train/                  |            |
|    approx_kl            | 0.06984711 |
|    clip_fraction        | 0.5        |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.75      |
|    explained_variance   | 0.789      |
|    learning_rate        | 0.0003     |
|    loss                 | 3.5        |
|    n_updates            | 920        |
|    policy_gradient_loss | 0.0222     |
|    std                  | 0.608      |
|    value_loss           | 13.2       |
----------------------------------------
Tr

Track generation: 1151..1443 -> 292-tiles track
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 988         |
|    ep_rew_mean          | 413         |
| time/                   |             |
|    fps                  | 108         |
|    iterations           | 101         |
|    time_elapsed         | 1905        |
|    total_timesteps      | 206848      |
| train/                  |             |
|    approx_kl            | 0.069574155 |
|    clip_fraction        | 0.434       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.67       |
|    explained_variance   | 0.96        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.65        |
|    n_updates            | 1000        |
|    policy_gradient_loss | 0.00981     |
|    std                  | 0.597       |
|    value_loss           | 5.14        |
-----------------------------------------
Track generation: 1067..1345

Track generation: 1059..1328 -> 269-tiles track
Track generation: 1248..1564 -> 316-tiles track
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 998         |
|    ep_rew_mean          | 446         |
| time/                   |             |
|    fps                  | 108         |
|    iterations           | 109         |
|    time_elapsed         | 2060        |
|    total_timesteps      | 223232      |
| train/                  |             |
|    approx_kl            | 0.099306926 |
|    clip_fraction        | 0.531       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.66       |
|    explained_variance   | 0.78        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.395       |
|    n_updates            | 1080        |
|    policy_gradient_loss | 0.0129      |
|    std                  | 0.593       |
|    value_loss           | 4.55        |
----------------------

Track generation: 1207..1513 -> 306-tiles track
Track generation: 1167..1463 -> 296-tiles track
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 996        |
|    ep_rew_mean          | 464        |
| time/                   |            |
|    fps                  | 108        |
|    iterations           | 117        |
|    time_elapsed         | 2214       |
|    total_timesteps      | 239616     |
| train/                  |            |
|    approx_kl            | 0.09440869 |
|    clip_fraction        | 0.491      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.65      |
|    explained_variance   | 0.95       |
|    learning_rate        | 0.0003     |
|    loss                 | 0.881      |
|    n_updates            | 1160       |
|    policy_gradient_loss | 0.00694    |
|    std                  | 0.592      |
|    value_loss           | 5          |
----------------------------------------
Tr

Track generation: 1252..1569 -> 317-tiles track
Track generation: 1320..1654 -> 334-tiles track
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 991        |
|    ep_rew_mean          | 522        |
| time/                   |            |
|    fps                  | 108        |
|    iterations           | 126        |
|    time_elapsed         | 2387       |
|    total_timesteps      | 258048     |
| train/                  |            |
|    approx_kl            | 0.09330474 |
|    clip_fraction        | 0.525      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.7       |
|    explained_variance   | 0.913      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.4        |
|    n_updates            | 1250       |
|    policy_gradient_loss | 0.0229     |
|    std                  | 0.606      |
|    value_loss           | 9.85       |
----------------------------------------
Tr

Track generation: 1027..1293 -> 266-tiles track
Track generation: 1116..1399 -> 283-tiles track
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 991        |
|    ep_rew_mean          | 538        |
| time/                   |            |
|    fps                  | 107        |
|    iterations           | 134        |
|    time_elapsed         | 2542       |
|    total_timesteps      | 274432     |
| train/                  |            |
|    approx_kl            | 0.08432781 |
|    clip_fraction        | 0.453      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.74      |
|    explained_variance   | 0.929      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.79       |
|    n_updates            | 1330       |
|    policy_gradient_loss | 0.00448    |
|    std                  | 0.612      |
|    value_loss           | 8.06       |
----------------------------------------
Tr

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 991        |
|    ep_rew_mean          | 549        |
| time/                   |            |
|    fps                  | 107        |
|    iterations           | 142        |
|    time_elapsed         | 2697       |
|    total_timesteps      | 290816     |
| train/                  |            |
|    approx_kl            | 0.07964842 |
|    clip_fraction        | 0.486      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.66      |
|    explained_variance   | 0.859      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.914      |
|    n_updates            | 1410       |
|    policy_gradient_loss | 0.0191     |
|    std                  | 0.6        |
|    value_loss           | 6.44       |
----------------------------------------
Track generation: 1039..1307 -> 268-tiles track
Track generation: 1295..1623 -> 328-tiles track
--

Track generation: 1240..1554 -> 314-tiles track
Track generation: 1134..1426 -> 292-tiles track
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 991        |
|    ep_rew_mean          | 553        |
| time/                   |            |
|    fps                  | 107        |
|    iterations           | 151        |
|    time_elapsed         | 2869       |
|    total_timesteps      | 309248     |
| train/                  |            |
|    approx_kl            | 0.18892604 |
|    clip_fraction        | 0.547      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.71      |
|    explained_variance   | 0.83       |
|    learning_rate        | 0.0003     |
|    loss                 | 1.59       |
|    n_updates            | 1500       |
|    policy_gradient_loss | 0.00894    |
|    std                  | 0.61       |
|    value_loss           | 11.7       |
----------------------------------------
Tr

Track generation: 1374..1721 -> 347-tiles track
Track generation: 1103..1383 -> 280-tiles track
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 984        |
|    ep_rew_mean          | 529        |
| time/                   |            |
|    fps                  | 107        |
|    iterations           | 159        |
|    time_elapsed         | 3021       |
|    total_timesteps      | 325632     |
| train/                  |            |
|    approx_kl            | 0.19327639 |
|    clip_fraction        | 0.591      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.77      |
|    explained_variance   | 0.907      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.842      |
|    n_updates            | 1580       |
|    policy_gradient_loss | 0.0326     |
|    std                  | 0.622      |
|    value_loss           | 11.8       |
----------------------------------------
Tr

Track generation: 1168..1469 -> 301-tiles track
Track generation: 1301..1637 -> 336-tiles track
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 988        |
|    ep_rew_mean          | 498        |
| time/                   |            |
|    fps                  | 107        |
|    iterations           | 168        |
|    time_elapsed         | 3194       |
|    total_timesteps      | 344064     |
| train/                  |            |
|    approx_kl            | 0.12772447 |
|    clip_fraction        | 0.59       |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.83      |
|    explained_variance   | 0.922      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.613      |
|    n_updates            | 1670       |
|    policy_gradient_loss | 0.0352     |
|    std                  | 0.632      |
|    value_loss           | 6.44       |
----------------------------------------
Tr

Track generation: 1071..1348 -> 277-tiles track
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 990        |
|    ep_rew_mean          | 445        |
| time/                   |            |
|    fps                  | 107        |
|    iterations           | 176        |
|    time_elapsed         | 3347       |
|    total_timesteps      | 360448     |
| train/                  |            |
|    approx_kl            | 0.13481624 |
|    clip_fraction        | 0.502      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.82      |
|    explained_variance   | 0.885      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.276      |
|    n_updates            | 1750       |
|    policy_gradient_loss | -0.00468   |
|    std                  | 0.631      |
|    value_loss           | 8.38       |
----------------------------------------
Track generation: 1123..1407 -> 284-tiles track
Tr

Track generation: 1176..1474 -> 298-tiles track
Track generation: 1156..1449 -> 293-tiles track
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 987        |
|    ep_rew_mean          | 433        |
| time/                   |            |
|    fps                  | 107        |
|    iterations           | 184        |
|    time_elapsed         | 3501       |
|    total_timesteps      | 376832     |
| train/                  |            |
|    approx_kl            | 0.11930655 |
|    clip_fraction        | 0.515      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.71      |
|    explained_variance   | 0.869      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.589      |
|    n_updates            | 1830       |
|    policy_gradient_loss | 0.0182     |
|    std                  | 0.611      |
|    value_loss           | 3.41       |
----------------------------------------
Tr

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 980        |
|    ep_rew_mean          | 440        |
| time/                   |            |
|    fps                  | 107        |
|    iterations           | 192        |
|    time_elapsed         | 3655       |
|    total_timesteps      | 393216     |
| train/                  |            |
|    approx_kl            | 0.39961296 |
|    clip_fraction        | 0.609      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.76      |
|    explained_variance   | 0.929      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.462      |
|    n_updates            | 1910       |
|    policy_gradient_loss | 0.0105     |
|    std                  | 0.619      |
|    value_loss           | 7.3        |
----------------------------------------
Track generation: 1035..1298 -> 263-tiles track
Track generation: 1015..1273 -> 258-tiles track
--

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 983         |
|    ep_rew_mean          | 431         |
| time/                   |             |
|    fps                  | 107         |
|    iterations           | 200         |
|    time_elapsed         | 3810        |
|    total_timesteps      | 409600      |
| train/                  |             |
|    approx_kl            | 0.122294694 |
|    clip_fraction        | 0.513       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.78       |
|    explained_variance   | 0.959       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.306       |
|    n_updates            | 1990        |
|    policy_gradient_loss | -0.00799    |
|    std                  | 0.619       |
|    value_loss           | 3.78        |
-----------------------------------------
Track generation: 1122..1407 -> 285-tiles track
Track generation: 990..1245 

Track generation: 1123..1408 -> 285-tiles track
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 990       |
|    ep_rew_mean          | 475       |
| time/                   |           |
|    fps                  | 107       |
|    iterations           | 208       |
|    time_elapsed         | 3965      |
|    total_timesteps      | 425984    |
| train/                  |           |
|    approx_kl            | 0.1130427 |
|    clip_fraction        | 0.523     |
|    clip_range           | 0.2       |
|    entropy_loss         | -2.76     |
|    explained_variance   | 0.922     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.422     |
|    n_updates            | 2070      |
|    policy_gradient_loss | 0.0188    |
|    std                  | 0.619     |
|    value_loss           | 7.4       |
---------------------------------------
Track generation: 1215..1523 -> 308-tiles track
Track generation: 1104..

Track generation: 1200..1504 -> 304-tiles track
Track generation: 1241..1555 -> 314-tiles track
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 990       |
|    ep_rew_mean          | 496       |
| time/                   |           |
|    fps                  | 107       |
|    iterations           | 217       |
|    time_elapsed         | 4140      |
|    total_timesteps      | 444416    |
| train/                  |           |
|    approx_kl            | 0.2711899 |
|    clip_fraction        | 0.531     |
|    clip_range           | 0.2       |
|    entropy_loss         | -2.74     |
|    explained_variance   | 0.97      |
|    learning_rate        | 0.0003    |
|    loss                 | 0.388     |
|    n_updates            | 2160      |
|    policy_gradient_loss | 0.0107    |
|    std                  | 0.611     |
|    value_loss           | 4.33      |
---------------------------------------
Track generation: 1149..

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 989        |
|    ep_rew_mean          | 528        |
| time/                   |            |
|    fps                  | 107        |
|    iterations           | 225        |
|    time_elapsed         | 4295       |
|    total_timesteps      | 460800     |
| train/                  |            |
|    approx_kl            | 0.17645034 |
|    clip_fraction        | 0.496      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.77      |
|    explained_variance   | 0.98       |
|    learning_rate        | 0.0003     |
|    loss                 | 0.135      |
|    n_updates            | 2240       |
|    policy_gradient_loss | 0.0165     |
|    std                  | 0.618      |
|    value_loss           | 2.72       |
----------------------------------------
Track generation: 1071..1347 -> 276-tiles track
retry to generate track (normal if there are not m

Track generation: 987..1238 -> 251-tiles track
Track generation: 1060..1327 -> 267-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1124..1409 -> 285-tiles track
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 993        |
|    ep_rew_mean          | 513        |
| time/                   |            |
|    fps                  | 107        |
|    iterations           | 233        |
|    time_elapsed         | 4449       |
|    total_timesteps      | 477184     |
| train/                  |            |
|    approx_kl            | 0.16695535 |
|    clip_fraction        | 0.593      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.76      |
|    explained_variance   | 0.966      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.282      |
|    n_updates            | 2320       |
|    policy_gradient_loss | 0.0149     

Track generation: 953..1202 -> 249-tiles track
Track generation: 1065..1338 -> 273-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1295..1623 -> 328-tiles track
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 999        |
|    ep_rew_mean          | 489        |
| time/                   |            |
|    fps                  | 107        |
|    iterations           | 241        |
|    time_elapsed         | 4603       |
|    total_timesteps      | 493568     |
| train/                  |            |
|    approx_kl            | 0.20764066 |
|    clip_fraction        | 0.569      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.79      |
|    explained_variance   | 0.906      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.437      |
|    n_updates            | 2400       |
|    policy_gradient_loss | 0.0356     

In [4]:
model_name = f"{algorithm_name}_{environment_name}_{number_of_episodes}"
Algorithm_Path = os.path.join('Training', 'Saved Models', model_name)

model.save(Algorithm_Path)


#model_path = os.path.join('Training', 'Saved Models', 'DQN_LunarLander-v2_10000')
#model = DQN.load(model_path)


In [6]:


episodes = 5
scores = []
for episode in range(1, episodes+1):
    obs = env.reset()
    done = False
    score = 0
    
    while not done:
        
        env.render()
        action, _ = model.predict(obs) # WE ARE NOW USING OUR MODEL
        obs, reward, done, info = env.step(action)
        score += reward
    
    print('Episode:{} Score:{}'.format(episode, score))
    scores.append(score)
    
env.close()


mean_score = sum(scores) / len(scores)
std_dev = (sum((score - mean_score)**2 for score in scores) / len(scores))**0.5

print(f"Mean Score: {mean_score}")
print(f"Standard Deviation: {std_dev}")



#del model


Track generation: 1126..1412 -> 286-tiles track
Track generation: 1144..1434 -> 290-tiles track
Episode:1 Score:[482.4544]
Track generation: 1296..1624 -> 328-tiles track
Track generation: 1140..1429 -> 289-tiles track
Episode:2 Score:[340.36218]
Track generation: 1218..1526 -> 308-tiles track
Track generation: 1122..1405 -> 283-tiles track
Episode:3 Score:[-5.5367365]
Track generation: 1107..1395 -> 288-tiles track
Track generation: 1160..1454 -> 294-tiles track
Episode:4 Score:[175.26248]
Track generation: 1047..1313 -> 266-tiles track
Track generation: 1270..1591 -> 321-tiles track
Episode:5 Score:[481.12967]
Mean Score: [294.7344]
Standard Deviation: [187.88304]


In [71]:
del model