# MuZero CartPole VPython

This notebook runs the MuZero RL algorithm from Google Deep Mind in the cartpole environment and visualized in VPython.


In [None]:
from helper import SharedStorage, MuZeroConfig, ReplayBuffer
from self_play import run_selfplay, run_eval
from training import train_network
from cartpolevpython import make_CartPoleVPython_config, create_scene

# MuZero training is split into two independent parts: Network training and
# self-play data generation.
# These two parts only communicate by transferring the latest network checkpoint
# from the training to the self-play, and the finished games from the self-play
# to the training.
train_scores = []
eval_scores = []
def muzero(config: MuZeroConfig):

    storage = SharedStorage(config.new_network(), config.uniform_network(), config.new_optimizer())
    replay_buffer = ReplayBuffer(config)

    for loop in range(config.nb_training_loop):
        print("Training loop", loop)
        score_train = run_selfplay(config, storage, replay_buffer, config.nb_episodes)
        train_network(config, storage, replay_buffer, config.nb_epochs)
        score_eval = run_eval(config, storage, 20)

        print("Train score:", score_train)
        print("Eval score:", score_eval)
        print(f"MuZero played {config.nb_episodes * (loop + 1)} "
              f"episodes and trained for {config.nb_epochs * (loop + 1)} epochs.\n")
        train_scores.append(score_train)
        eval_scores.append(score_eval)

    return storage.latest_network()

display(create_scene())
config = make_CartPoleVPython_config()

latest_network = muzero(config);

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Training loop 0


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Instructions for updating:
Use tf.identity instead.


In [None]:
import matplotlib.pyplot as plt

plt.plot(train_scores, color="red")
plt.plot(eval_scores, color="blue")
plt.xticks(range(20))
plt.xlabel('Loops')
plt.ylabel('Score')
plt.show()

In [None]:
from self_play import play_game
from cartpolevpython import create_scene

display(create_scene)
play_game(config, latest_network);