# Reinforcement Learning Practical Example for Playing Mario Bros
| Platform | Purpose |
| -- | -- |
| Tensorflow | Base ML Library |
| KerasRL | Reinforcement Learning Library |
| OpenAI Gym | Reinforcement Learning Environment |

- https://pypi.org/project/gym-super-mario-bros/

### Setup Environment

In [1]:
import sys

!{sys.executable} -m pip install -U pip
!{sys.executable} -m pip install -U gym
!{sys.executable} -m pip install -U gym-super-mario-bros
!{sys.executable} -m pip install -U tensorflow
!{sys.executable} -m pip install -U keras-rl2



In [2]:
from nes_py.wrappers import JoypadSpace
import gym_super_mario_bros
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT

In [3]:
env = gym_super_mario_bros.make('SuperMarioBros-v0')
env = JoypadSpace(env, SIMPLE_MOVEMENT)

### Setup Intelligent Agent

In [4]:
agent = Agent.create(
    saver=dict(
        directory='data/models/mario_tf_ddqn',
        frequency=50,
        max_checkpoints=5
    ),
    agent='double_dqn',
    environment=environment,
    memory=15000,
    batch_size=10000
)

### Train the Agent

In [5]:
from tqdm.notebook import tqdm

In [6]:
total_episodes = 100
progress_bar = tqdm(range(total_episodes))

runner = Runner(agent=agent, environment=environment)

for episode_number in progress_bar:
    runner.run(num_episodes=1)
    progress_bar.set_description(f'Training Progress - Episode')
runner.close()

  0%|          | 0/100 [00:00<?, ?it/s]

Episodes:   0%|          | 0/1 [00:00, return=0.00, ts/ep=0, sec/ep=0.00, ms/ts=0.0, agent=0.0%]

### Load Utility Functions

In [None]:
%run ./fa.intelligence.notebooks/utilities/common/imaging.ipynb

### Train a Random Agent
An agent that takes random steps and record the reward.

In [None]:
env = gym_super_mario_bros.make('SuperMarioBros-v0')
env = JoypadSpace(env, SIMPLE_MOVEMENT)

In [None]:
state = env.reset()
done = False
random_agent_reward = 0
random_frames = []

while not done:
    state, reward, done, info = env.step(env.action_space.sample())
    random_agent_reward += reward
    random_frames.append(rgb_array_to_image(state))
    
print(f'[RANDOM_AGENT] Reward: {random_agent_reward}')

In [None]:
#Create a wrapped environment of the OpenAI Gym environment we defined higher up.
env = gym_super_mario_bros.make('SuperMarioBros-v0')
env = JoypadSpace(env, SIMPLE_MOVEMENT)
environment = Environment.create(environment=env)

### Load Trained Model

In [None]:
env = gym_super_mario_bros.make('SuperMarioBros-v0')
env = JoypadSpace(env, SIMPLE_MOVEMENT)
environment = Environment.create(environment=env)
agent = Agent.load(directory='data/models/mario_tf_ddqn', format='checkpoint', environment=environment)

running_reward = 0
internals = agent.initial_internals()
done = False
states = environment.reset()
frames = []

while not done:
    actions = agent.act(states=states, internals=internals, independent=True)
    states, done, reward = environment.execute(actions=actions)
    running_reward += reward
    frames.append(rgb_array_to_image(states))

agent.close()
environment.close()

### Visualize It's "Intellgent" Actions

In [None]:
import base64
from IPython.display import HTML

In [None]:
random_result_gif = images_to_gif_bytes(random_frames)
result_gif = images_to_gif_bytes(frames)

In [None]:
experiment_name = 'Tensorforce-OpenAI Mario RL Agent Training Results'

# Test data.
episodes_html = f'<div style="display: inline-block; padding: 15px"><div style="font-weight:bold">Intelligent Agent</div><div style="padding: 5px"><img src="data:image.gif;base64, {base64.b64encode(result_gif).decode()}" /></div><div>Reward: {running_reward}</div></div>'
episodes_html += f'<div style="display: inline-block; padding: 15px"><div style="font-weight:bold">Random Agent</div><div style="padding: 5px"><img src="data:image.gif;base64, {base64.b64encode(random_result_gif).decode()}" /></div><div>Reward: {random_agent_reward}</div></div>'
HTML(f'<h1 style="text-align:center">{experiment_name}</h1><div style="text-align: center">{episodes_html}</div><footer style="text-align: center; padding: 15px">FrostAura Intelligence</footer>')