In [1]:
import base64
import IPython
import imageio

def embed_mp4(filename):
    """Embeds an mp4 file in the notebook."""
    video = open(filename,'rb').read()
    b64 = base64.b64encode(video)
    tag = '''
    <video width="640" height="480" controls>
    <source src="data:video/mp4;base64,{0}" type="video/mp4">
    Your browser does not support the video tag.
    </video>'''.format(b64.decode())

    return IPython.display.HTML(tag)

def record_game(model, env, num_episodes=5, video_filename='video.mp4'):
    with imageio.get_writer(video_filename, fps=60) as video:
        for _ in range(num_episodes):
            obs = env.reset()
            done = False
            total_reward = 0
            video.append_data(env.render('rgb_array'))

            while not done:
                action, _steps = model.predict(obs)
                obs, reward, done, info = env.step(action)
                total_reward += reward
                video.append_data(env.render('rgb_array'))

            print("score:", total_reward)

In [2]:
import os
import gym
import slimevolleygym
from slimevolleygym import SurvivalRewardEnv

from stable_baselines.common.policies import MlpPolicy
from stable_baselines import logger
from stable_baselines.common.callbacks import EvalCallback

from stable_baselines.ppo1 import PPO1
from stable_baselines import A2C, ACER, ACKTR, DQN, HER, GAIL, TRPO

algo = {
    'a2c': A2C,
    'acer': ACER,
    'acktr': ACKTR,
    'dqn': DQN,
    'her': HER,
    'gail': GAIL,
    'trpo': TRPO,
}
trained_model = {}

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [3]:
def experiment(model_str, timesteps=15_000_000):
    NUM_TIMESTEPS = int(timesteps)
    SEED = 721
    EVAL_FREQ = 25000
    EVAL_EPISODES = 10  # was 1000
    LOGDIR = "sb-" + model_str

    logger.configure(folder=LOGDIR)
    env = gym.make("SlimeVolley-v0")
    env.seed(SEED)

    model = algo[model_str](MlpPolicy, env, verbose=2)
    trained_model[model_str] = model
    eval_callback = EvalCallback(env, best_model_save_path=LOGDIR, log_path=LOGDIR, eval_freq=EVAL_FREQ, n_eval_episodes=EVAL_EPISODES)
    model.learn(total_timesteps=NUM_TIMESTEPS, callback=eval_callback)
    model.save(os.path.join(LOGDIR, "final_model")) # probably never get to this point.
    env.close()

In [None]:
experiment('acktr')

Logging to sb-acktr
Wrapping the env in a DummyVecEnv.
updating 34 eigenvalue/vectors
projecting 12 gradient matrices
----------------------------------
| explained_variance | -1.54e+03 |
| fps                | 29        |
| nupdates           | 1         |
| policy_entropy     | 2.08      |
| policy_loss        | 0.291     |
| total_timesteps    | 20        |
| value_loss         | 0.0336    |
----------------------------------
---------------------------------
| explained_variance | -0.908   |
| fps                | 712      |
| nupdates           | 100      |
| policy_entropy     | 2.07     |
| policy_loss        | 0.118    |
| total_timesteps    | 2000     |
| value_loss         | 0.00883  |
---------------------------------
---------------------------------
| explained_variance | 0.75     |
| fps                | 795      |
| nupdates           | 200      |
| policy_entropy     | 2        |
| policy_loss        | 0.19     |
| total_timesteps    | 4000     |
| value_loss         | 

---------------------------------
| explained_variance | 0.292    |
| fps                | 785      |
| nupdates           | 2600     |
| policy_entropy     | 1.88     |
| policy_loss        | -0.86    |
| total_timesteps    | 52000    |
| value_loss         | 0.329    |
---------------------------------
---------------------------------
| explained_variance | -1.06    |
| fps                | 790      |
| nupdates           | 2700     |
| policy_entropy     | 2.06     |
| policy_loss        | -0.111   |
| total_timesteps    | 54000    |
| value_loss         | 0.00266  |
---------------------------------
---------------------------------
| explained_variance | -0.234   |
| fps                | 793      |
| nupdates           | 2800     |
| policy_entropy     | 2.06     |
| policy_loss        | 0.0586   |
| total_timesteps    | 56000    |
| value_loss         | 0.00235  |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps         

---------------------------------
| explained_variance | 0.932    |
| fps                | 811      |
| nupdates           | 5300     |
| policy_entropy     | 2.06     |
| policy_loss        | -0.084   |
| total_timesteps    | 106000   |
| value_loss         | 0.00107  |
---------------------------------
---------------------------------
| explained_variance | 0.982    |
| fps                | 813      |
| nupdates           | 5400     |
| policy_entropy     | 2.04     |
| policy_loss        | 6.21e-05 |
| total_timesteps    | 108000   |
| value_loss         | 0.000158 |
---------------------------------
---------------------------------
| explained_variance | -287     |
| fps                | 816      |
| nupdates           | 5500     |
| policy_entropy     | 2.06     |
| policy_loss        | 0.326    |
| total_timesteps    | 110000   |
| value_loss         | 0.0907   |
---------------------------------
---------------------------------
| explained_variance | 0.499    |
| fps         

---------------------------------
| explained_variance | -1.08    |
| fps                | 816      |
| nupdates           | 8000     |
| policy_entropy     | 2.05     |
| policy_loss        | -0.141   |
| total_timesteps    | 160000   |
| value_loss         | 0.00744  |
---------------------------------
---------------------------------
| explained_variance | -1       |
| fps                | 817      |
| nupdates           | 8100     |
| policy_entropy     | 2.05     |
| policy_loss        | 0.139    |
| total_timesteps    | 162000   |
| value_loss         | 0.00824  |
---------------------------------
---------------------------------
| explained_variance | -2.84    |
| fps                | 819      |
| nupdates           | 8200     |
| policy_entropy     | 1.97     |
| policy_loss        | -0.174   |
| total_timesteps    | 164000   |
| value_loss         | 0.00813  |
---------------------------------
---------------------------------
| explained_variance | -1.4     |
| fps         

---------------------------------
| explained_variance | 0.862    |
| fps                | 821      |
| nupdates           | 10700    |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0781  |
| total_timesteps    | 214000   |
| value_loss         | 0.00108  |
---------------------------------
---------------------------------
| explained_variance | 0.955    |
| fps                | 821      |
| nupdates           | 10800    |
| policy_entropy     | 2.07     |
| policy_loss        | -0.00759 |
| total_timesteps    | 216000   |
| value_loss         | 0.000703 |
---------------------------------
---------------------------------
| explained_variance | -4.2e+03 |
| fps                | 822      |
| nupdates           | 10900    |
| policy_entropy     | 2.06     |
| policy_loss        | 0.831    |
| total_timesteps    | 218000   |
| value_loss         | 0.173    |
---------------------------------
---------------------------------
| explained_variance | 0.836    |
| fps         

---------------------------------
| explained_variance | 0.66     |
| fps                | 826      |
| nupdates           | 13400    |
| policy_entropy     | 2.05     |
| policy_loss        | 0.0369   |
| total_timesteps    | 268000   |
| value_loss         | 0.0417   |
---------------------------------
---------------------------------
| explained_variance | 0.219    |
| fps                | 826      |
| nupdates           | 13500    |
| policy_entropy     | 2.05     |
| policy_loss        | -0.0516  |
| total_timesteps    | 270000   |
| value_loss         | 0.00307  |
---------------------------------
---------------------------------
| explained_variance | 0.76     |
| fps                | 826      |
| nupdates           | 13600    |
| policy_entropy     | 2.05     |
| policy_loss        | -0.0621  |
| total_timesteps    | 272000   |
| value_loss         | 0.000718 |
---------------------------------
---------------------------------
| explained_variance | 0.693    |
| fps         

---------------------------------
| explained_variance | 0.996    |
| fps                | 816      |
| nupdates           | 16100    |
| policy_entropy     | 2.07     |
| policy_loss        | -0.00894 |
| total_timesteps    | 322000   |
| value_loss         | 0.000435 |
---------------------------------
---------------------------------
| explained_variance | 0.546    |
| fps                | 815      |
| nupdates           | 16200    |
| policy_entropy     | 2.03     |
| policy_loss        | -0.607   |
| total_timesteps    | 324000   |
| value_loss         | 0.164    |
---------------------------------
Eval num_timesteps=325000, episode_reward=-5.00 +/- 0.00
Episode length: 541.20 +/- 71.57
---------------------------------
| explained_variance | 0.623    |
| fps                | 809      |
| nupdates           | 16300    |
| policy_entropy     | 2.06     |
| policy_loss        | -0.225   |
| total_timesteps    | 326000   |
| value_loss         | 0.0854   |
--------------------------

Eval num_timesteps=375000, episode_reward=-4.90 +/- 0.30
Episode length: 589.30 +/- 91.35
---------------------------------
| explained_variance | 0.374    |
| fps                | 803      |
| nupdates           | 18800    |
| policy_entropy     | 2.08     |
| policy_loss        | -0.0763  |
| total_timesteps    | 376000   |
| value_loss         | 0.00127  |
---------------------------------
---------------------------------
| explained_variance | -0.941   |
| fps                | 804      |
| nupdates           | 18900    |
| policy_entropy     | 2.04     |
| policy_loss        | 0.107    |
| total_timesteps    | 378000   |
| value_loss         | 0.00967  |
---------------------------------
---------------------------------
| explained_variance | 0.824    |
| fps                | 804      |
| nupdates           | 19000    |
| policy_entropy     | 2.08     |
| policy_loss        | -0.0319  |
| total_timesteps    | 380000   |
| value_loss         | 0.000162 |
--------------------------

---------------------------------
| explained_variance | 0.477    |
| fps                | 805      |
| nupdates           | 21400    |
| policy_entropy     | 2.07     |
| policy_loss        | -0.713   |
| total_timesteps    | 428000   |
| value_loss         | 0.182    |
---------------------------------
---------------------------------
| explained_variance | 0.43     |
| fps                | 806      |
| nupdates           | 21500    |
| policy_entropy     | 2.06     |
| policy_loss        | -0.258   |
| total_timesteps    | 430000   |
| value_loss         | 0.014    |
---------------------------------
---------------------------------
| explained_variance | 0.92     |
| fps                | 807      |
| nupdates           | 21600    |
| policy_entropy     | 2.07     |
| policy_loss        | 0.066    |
| total_timesteps    | 432000   |
| value_loss         | 0.00203  |
---------------------------------
---------------------------------
| explained_variance | -0.94    |
| fps         

---------------------------------
| explained_variance | -1.89    |
| fps                | 809      |
| nupdates           | 24100    |
| policy_entropy     | 2.08     |
| policy_loss        | -0.0817  |
| total_timesteps    | 482000   |
| value_loss         | 0.00358  |
---------------------------------
---------------------------------
| explained_variance | 0.926    |
| fps                | 809      |
| nupdates           | 24200    |
| policy_entropy     | 2.06     |
| policy_loss        | -0.0592  |
| total_timesteps    | 484000   |
| value_loss         | 0.000527 |
---------------------------------
---------------------------------
| explained_variance | -0.139   |
| fps                | 810      |
| nupdates           | 24300    |
| policy_entropy     | 2.07     |
| policy_loss        | -0.112   |
| total_timesteps    | 486000   |
| value_loss         | 0.00456  |
---------------------------------
---------------------------------
| explained_variance | 0.624    |
| fps         

---------------------------------
| explained_variance | 0.425    |
| fps                | 811      |
| nupdates           | 26800    |
| policy_entropy     | 2.07     |
| policy_loss        | 0.00366  |
| total_timesteps    | 536000   |
| value_loss         | 0.00166  |
---------------------------------
---------------------------------
| explained_variance | 0.697    |
| fps                | 812      |
| nupdates           | 26900    |
| policy_entropy     | 2.08     |
| policy_loss        | -0.094   |
| total_timesteps    | 538000   |
| value_loss         | 0.00166  |
---------------------------------
---------------------------------
| explained_variance | 0.896    |
| fps                | 812      |
| nupdates           | 27000    |
| policy_entropy     | 1.97     |
| policy_loss        | 0.0889   |
| total_timesteps    | 540000   |
| value_loss         | 0.00324  |
---------------------------------
---------------------------------
| explained_variance | 0.701    |
| fps         

---------------------------------
| explained_variance | 0.75     |
| fps                | 815      |
| nupdates           | 29500    |
| policy_entropy     | 2.04     |
| policy_loss        | -0.326   |
| total_timesteps    | 590000   |
| value_loss         | 0.0731   |
---------------------------------
---------------------------------
| explained_variance | 0.641    |
| fps                | 816      |
| nupdates           | 29600    |
| policy_entropy     | 2.07     |
| policy_loss        | 0.0322   |
| total_timesteps    | 592000   |
| value_loss         | 0.0405   |
---------------------------------
---------------------------------
| explained_variance | -24.8    |
| fps                | 816      |
| nupdates           | 29700    |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0337  |
| total_timesteps    | 594000   |
| value_loss         | 0.0118   |
---------------------------------
---------------------------------
| explained_variance | 0.931    |
| fps         

---------------------------------
| explained_variance | 0.962    |
| fps                | 816      |
| nupdates           | 32200    |
| policy_entropy     | 2.07     |
| policy_loss        | 0.0109   |
| total_timesteps    | 644000   |
| value_loss         | 0.000371 |
---------------------------------
---------------------------------
| explained_variance | -4.32    |
| fps                | 816      |
| nupdates           | 32300    |
| policy_entropy     | 2.07     |
| policy_loss        | 0.223    |
| total_timesteps    | 646000   |
| value_loss         | 0.016    |
---------------------------------
---------------------------------
| explained_variance | 0.92     |
| fps                | 817      |
| nupdates           | 32400    |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0132  |
| total_timesteps    | 648000   |
| value_loss         | 3.19e-05 |
---------------------------------
Eval num_timesteps=650000, episode_reward=-4.90 +/- 0.30
Episode length: 605.60 +/

---------------------------------
| explained_variance | 0.504    |
| fps                | 818      |
| nupdates           | 34900    |
| policy_entropy     | 2.02     |
| policy_loss        | -0.0167  |
| total_timesteps    | 698000   |
| value_loss         | 0.0399   |
---------------------------------
Eval num_timesteps=700000, episode_reward=-5.00 +/- 0.00
Episode length: 607.10 +/- 82.20
---------------------------------
| explained_variance | -0.768   |
| fps                | 814      |
| nupdates           | 35000    |
| policy_entropy     | 2.08     |
| policy_loss        | -0.102   |
| total_timesteps    | 700000   |
| value_loss         | 0.00405  |
---------------------------------
---------------------------------
| explained_variance | 0.844    |
| fps                | 815      |
| nupdates           | 35100    |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0615  |
| total_timesteps    | 702000   |
| value_loss         | 0.000609 |
--------------------------

---------------------------------
| explained_variance | 0.0235   |
| fps                | 815      |
| nupdates           | 37600    |
| policy_entropy     | 2.06     |
| policy_loss        | -0.707   |
| total_timesteps    | 752000   |
| value_loss         | 0.144    |
---------------------------------
---------------------------------
| explained_variance | -85.2    |
| fps                | 815      |
| nupdates           | 37700    |
| policy_entropy     | 2.05     |
| policy_loss        | 1.31     |
| total_timesteps    | 754000   |
| value_loss         | 0.438    |
---------------------------------
---------------------------------
| explained_variance | -0.255   |
| fps                | 816      |
| nupdates           | 37800    |
| policy_entropy     | 2.05     |
| policy_loss        | -0.18    |
| total_timesteps    | 756000   |
| value_loss         | 0.00952  |
---------------------------------
----------------------------------
| explained_variance | 0.848     |
| fps       

---------------------------------
| explained_variance | 0.981    |
| fps                | 815      |
| nupdates           | 40300    |
| policy_entropy     | 2.05     |
| policy_loss        | 0.0161   |
| total_timesteps    | 806000   |
| value_loss         | 0.000361 |
---------------------------------
---------------------------------
| explained_variance | 0.721    |
| fps                | 815      |
| nupdates           | 40400    |
| policy_entropy     | 2.06     |
| policy_loss        | -0.08    |
| total_timesteps    | 808000   |
| value_loss         | 0.00126  |
---------------------------------
---------------------------------
| explained_variance | 0.495    |
| fps                | 816      |
| nupdates           | 40500    |
| policy_entropy     | 2.07     |
| policy_loss        | -0.473   |
| total_timesteps    | 810000   |
| value_loss         | 0.0995   |
---------------------------------
---------------------------------
| explained_variance | -0.617   |
| fps         

---------------------------------
| explained_variance | 0.846    |
| fps                | 810      |
| nupdates           | 43000    |
| policy_entropy     | 2.08     |
| policy_loss        | -0.0735  |
| total_timesteps    | 860000   |
| value_loss         | 0.000946 |
---------------------------------
---------------------------------
| explained_variance | 0.969    |
| fps                | 810      |
| nupdates           | 43100    |
| policy_entropy     | 2.06     |
| policy_loss        | 0.0594   |
| total_timesteps    | 862000   |
| value_loss         | 0.00145  |
---------------------------------
---------------------------------
| explained_variance | 0.785    |
| fps                | 810      |
| nupdates           | 43200    |
| policy_entropy     | 1.84     |
| policy_loss        | -0.0239  |
| total_timesteps    | 864000   |
| value_loss         | 0.0406   |
---------------------------------
---------------------------------
| explained_variance | 0.564    |
| fps         

---------------------------------
| explained_variance | 0.923    |
| fps                | 804      |
| nupdates           | 45700    |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0123  |
| total_timesteps    | 914000   |
| value_loss         | 7.97e-05 |
---------------------------------
---------------------------------
| explained_variance | 0.82     |
| fps                | 804      |
| nupdates           | 45800    |
| policy_entropy     | 2.08     |
| policy_loss        | 0.0269   |
| total_timesteps    | 916000   |
| value_loss         | 0.000591 |
---------------------------------
---------------------------------
| explained_variance | 0.722    |
| fps                | 804      |
| nupdates           | 45900    |
| policy_entropy     | 2.07     |
| policy_loss        | -0.141   |
| total_timesteps    | 918000   |
| value_loss         | 0.0042   |
---------------------------------
---------------------------------
| explained_variance | 0.135    |
| fps         

---------------------------------
| explained_variance | 0.864    |
| fps                | 800      |
| nupdates           | 48400    |
| policy_entropy     | 2.05     |
| policy_loss        | 0.127    |
| total_timesteps    | 968000   |
| value_loss         | 0.00545  |
---------------------------------
---------------------------------
| explained_variance | -0.0574  |
| fps                | 800      |
| nupdates           | 48500    |
| policy_entropy     | 2.05     |
| policy_loss        | -0.0588  |
| total_timesteps    | 970000   |
| value_loss         | 0.000966 |
---------------------------------
---------------------------------
| explained_variance | -0.128   |
| fps                | 800      |
| nupdates           | 48600    |
| policy_entropy     | 2.03     |
| policy_loss        | -0.0643  |
| total_timesteps    | 972000   |
| value_loss         | 0.00307  |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps         

---------------------------------
| explained_variance | 0.961    |
| fps                | 796      |
| nupdates           | 51100    |
| policy_entropy     | 2.06     |
| policy_loss        | -0.0435  |
| total_timesteps    | 1022000  |
| value_loss         | 0.0002   |
---------------------------------
---------------------------------
| explained_variance | -0.728   |
| fps                | 796      |
| nupdates           | 51200    |
| policy_entropy     | 2.05     |
| policy_loss        | -0.139   |
| total_timesteps    | 1024000  |
| value_loss         | 0.0104   |
---------------------------------
Eval num_timesteps=1025000, episode_reward=-5.00 +/- 0.00
Episode length: 626.90 +/- 114.17
---------------------------------
| explained_variance | 0.885    |
| fps                | 794      |
| nupdates           | 51300    |
| policy_entropy     | 2.05     |
| policy_loss        | 0.0246   |
| total_timesteps    | 1026000  |
| value_loss         | 0.000599 |
------------------------

Eval num_timesteps=1075000, episode_reward=-4.80 +/- 0.40
Episode length: 588.20 +/- 90.15
---------------------------------
| explained_variance | 0.994    |
| fps                | 791      |
| nupdates           | 53800    |
| policy_entropy     | 2.07     |
| policy_loss        | 0.00215  |
| total_timesteps    | 1076000  |
| value_loss         | 0.000133 |
---------------------------------
---------------------------------
| explained_variance | -9.4     |
| fps                | 791      |
| nupdates           | 53900    |
| policy_entropy     | 2.07     |
| policy_loss        | 0.126    |
| total_timesteps    | 1078000  |
| value_loss         | 0.022    |
---------------------------------
---------------------------------
| explained_variance | 0.039    |
| fps                | 791      |
| nupdates           | 54000    |
| policy_entropy     | 2.07     |
| policy_loss        | -0.162   |
| total_timesteps    | 1080000  |
| value_loss         | 0.00665  |
-------------------------

---------------------------------
| explained_variance | 0.632    |
| fps                | 788      |
| nupdates           | 56400    |
| policy_entropy     | 2.07     |
| policy_loss        | 0.0891   |
| total_timesteps    | 1128000  |
| value_loss         | 0.00346  |
---------------------------------
---------------------------------
| explained_variance | 0.254    |
| fps                | 788      |
| nupdates           | 56500    |
| policy_entropy     | 2.08     |
| policy_loss        | -0.0751  |
| total_timesteps    | 1130000  |
| value_loss         | 0.000979 |
---------------------------------
---------------------------------
| explained_variance | -0.11    |
| fps                | 788      |
| nupdates           | 56600    |
| policy_entropy     | 2.07     |
| policy_loss        | 0.0774   |
| total_timesteps    | 1132000  |
| value_loss         | 0.00341  |
---------------------------------
---------------------------------
| explained_variance | 0.821    |
| fps         

In [None]:
video_filename = 'acktr.mp4'
record_game(
    model=trained_model['acktr'],
    env=gym.make("SlimeVolley-v0"),
    num_episodes=5,
    video_filename=video_filename
)
embed_mp4(video_filename)