# Reinforcement Learning Practical Example for Playing Mario Bros
| Platform | Purpose |
| -- | -- |
| Tensorflow | Base ML Library |
| Tensorforce | Reinforcement Learning Library |
| OpenAI Gym | Reinforcement Learning Environment |

- https://github.com/tensorforce/tensorforce/tree/master/examples
- https://github.com/tensorforce/tensorforce/blob/master/examples/save_load_agent.py
- https://pypi.org/project/gym-super-mario-bros/

### Setup Environment

In [2]:
import sys

!{sys.executable} -m pip install -U pip
!{sys.executable} -m pip install -U gym
!{sys.executable} -m pip install -U gym-super-mario-bros
!{sys.executable} -m pip install -U tensorflow
!{sys.executable} -m pip install -U tensorforce



In [3]:
from nes_py.wrappers import JoypadSpace
import gym_super_mario_bros
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT

In [4]:
env = gym_super_mario_bros.make('SuperMarioBros-v0')
env = JoypadSpace(env, SIMPLE_MOVEMENT)

### Train a Random Agent
An agent that takes random steps and record the reward.

In [5]:
state = env.reset()
done = False
random_agent_reward = 0

while not done:
    state, reward, done, info = env.step(env.action_space.sample())
    random_agent_reward += reward
    
print(f'[RANDOM_AGENT] Reward: {random_agent_reward}')

KeyboardInterrupt: 

### Setup Intelligent Agent

In [None]:
from tensorforce import Agent, Environment, Runner

In [None]:
#Create a wrapped environment of the OpenAI Gym environment we defined higher up.
env = gym_super_mario_bros.make('SuperMarioBros-v0')
env = JoypadSpace(env, SIMPLE_MOVEMENT)
environment = Environment.create(environment=env)

In [None]:
# Create a Tensorforce agent encaptulating the observation space and action space of our wrapped environment. Sequential memory, the Adam optimizer for gradient decent and a custom network.
agent = Agent.create(
    saver=dict(
        directory='data/models/mario_tf',
        frequency=100
    ),
    agent='tensorforce',
    environment=environment,
    memory=10000,
    update=dict(unit='timesteps', batch_size=64),
    optimizer=dict(type='adam', learning_rate=3e-4),
    policy=dict(network='auto'),
    objective='policy_gradient',
    reward_estimation=dict(horizon=20)
)

### Train the Agent

In [None]:
total_episodes = 10
runner = Runner(agent=agent, environment=environment)

runner.run(num_episodes=total_episodes)
runner.run(num_episodes=(total_episodes / 3), evaluation=True)
runner.close()

Episodes:   0%|          | 0/10 [00:00, return=0.00, ts/ep=0, sec/ep=0.00, ms/ts=0.0, agent=0.0%]

### Load Trained Model

In [6]:
%run ./fa.intelligence.notebooks/utilities/common/imaging.ipynb

In [7]:
from tensorforce import Agent, Environment, Runner
from nes_py.wrappers import JoypadSpace
import gym_super_mario_bros
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT

In [12]:
env = gym_super_mario_bros.make('SuperMarioBros-v0')
env = JoypadSpace(env, SIMPLE_MOVEMENT)
environment = Environment.create(environment=env)
agent = Agent.load(directory='data/models/mario_tf', format='checkpoint', environment=environment)

running_reward = 0
done = False
states = environment.reset()
frames = []
min_steps = 1000

while not done and min_steps > 0:
    actions = agent.act(states=states, independent=True)
    states, done, reward = environment.execute(actions=actions)
    #agent.observe(terminal=done, reward=reward). Take independent away.
    running_reward += reward
    frames.append(rgb_array_to_image(states))
    min_steps -= 1

agent.close()
environment.close()

### Visualize It's "Intellgent" Actions

In [18]:
import base64
from IPython.display import HTML

In [19]:
result_gif = images_to_gif_bytes(frames)

In [20]:
experiment_name = 'Tensorforce-OpenAI Mario RL Agent Training Results'

# Test data.
episodes = [result_gif]
episodes_html = ''.join([ f'<div style="display: inline-block; padding: 15px"><div style="font-weight:bold">Ep {min_steps}</div><div style="padding: 5px"><img src="data:image.gif;base64, {base64.b64encode(result_gif).decode()}" /></div><div>Reward: {running_reward}</div></div>' for e in episodes ])

HTML(f'<h1 style="text-align:center">{experiment_name}</h1><div style="text-align: center">{episodes_html}</div><footer style="text-align: center; padding: 15px">FrostAura Intelligence</footer>')