# CartPole Gym environment with TfAgents

## Install packages (gym, tfagents, tensorflow,....)

In [None]:
!pip install easyagents >/dev/null

##### If in colab: load additional packages for rendering

In [None]:
import sys

if 'google.colab' in sys.modules:
    !apt-get install xvfb >/dev/null
    !pip install pyvirtualdisplay >/dev/null    
    
    from pyvirtualdisplay import Display
    Display(visible=0, size=(960, 720)).start()    

## Dry Run (very short training, no logging)

In [None]:
from easyagents.tfagents import PpoAgent
from easyagents.config import TrainingDurationFast
from easyagents.config import LoggingSilent

ppoAgent = PpoAgent( gym_env_name='CartPole-v0', training_duration=TrainingDurationFast(), logging=LoggingSilent())
ppoAgent.train()

### Visualize Training (average returns, losses)

In [None]:
ppoAgent.plot_average_returns()

In [None]:
ppoAgent.plot_losses()

## Default training with movie  (self-defined network, default logging)

In [None]:
from easyagents.tfagents import PpoAgent

In [None]:
ppoAgent = PpoAgent( gym_env_name = 'CartPole-v0', fc_layers=(100, 50, 25) )
ppoAgent.train()

### Visualize Training (average returns, losses)

In [None]:
ppoAgent.plot_average_returns()

In [None]:
ppoAgent.plot_losses()

### Visualize Trained Policy (playing a game)

In [None]:
from IPython.display import HTML

HTML( ppoAgent.render_episodes_to_html() )

## Custom training (self-defined training scheme, logging & movie)

In [None]:
from easyagents.tfagents import PpoAgent
from easyagents.config import TrainingDuration
from easyagents.config import Logging

In [None]:
training_duration=TrainingDuration( num_iterations = 100,
                                    num_episodes_per_iteration = 10,
                                    max_steps_per_episode = 500,
                                    num_epochs_per_iteration = 10,
                                    num_iterations_between_eval = 5,
                                    num_eval_episodes = 10 )

In [None]:
logging=Logging( log_agent = True, log_gym_api = False)

In [None]:
ppoAgent = PpoAgent( gym_env_name = 'CartPole-v0', 
                     fc_layers=(100, 50, 25),
                     training_duration=training_duration,
                     logging=logging)
ppoAgent.train()

### Visualize Training (average returns, losses)

In [None]:
ppoAgent.plot_average_returns()

In [None]:
ppoAgent.plot_losses()

### Visualize Trained Policy (playing a game)

In [None]:
from IPython.display import HTML

HTML( ppoAgent.render_episodes_to_html( num_episodes=5, fps=10, width=960, height=720) )