In [None]:
import time
import numpy as np
from nes_py.wrappers import JoypadSpace
import gym_super_mario_bros
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT
from agent import DQNAgent
from wrappers import wrapper


# Build env (first level, right only)
env = gym_super_mario_bros.make('SuperMarioBros-1-1-v0')
env = JoypadSpace(env, SIMPLE_MOVEMENT)
env = wrapper(env)

# Parameters
states = (84, 84, 4)
actions = env.action_space.n

# Agent
agent = DQNAgent(states=states, actions=actions, max_memory=100000, double_q=True)

# Episodes
episodes = 10000
rewards = []

# Timing
start = time.time()
step = 0

# Main loop
for e in range(episodes):

    # Reset env
    state = env.reset()

    # Reward
    total_reward = 0
    iter = 0

    # Play
    while True:

        # Show env
        # env.render()

        # Run agent
        action = agent.run(state=state)

        # Perform action
        next_state, reward, done, info = env.step(action=action)

        # Remember
        agent.add(experience=(state, next_state, action, reward, done))

        # Replay
        agent.learn()

        # Total reward
        total_reward += reward

        # Update state
        state = next_state

        # Increment
        iter += 1

        # If done break loop
        if done or info['flag_get']:
            break

    # Rewards
    rewards.append(total_reward / iter)

    # Print
    if e % 100 == 0:
        print('Episode {e} - '
              'Frame {f} - '
              'Frames/sec {fs} - '
              'Epsilon {eps} - '
              'Mean Reward {r}'.format(e=e,
                                       f=agent.step,
                                       fs=np.round((agent.step - step) / (time.time() - start)),
                                       eps=np.round(agent.eps, 4),
                                       r=np.mean(rewards[-100:])))
        start = time.time()
        step = agent.step

# Save rewards
np.save('rewards.npy', rewards)


Instructions for updating:
Use `tf.cast` instead.
Instructions for updating:
Use `tf.keras.layers.Conv2D` instead.
Instructions for updating:
Please use `layer.__call__` method instead.
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use keras.layers.Flatten instead.
Instructions for updating:
Use keras.layers.Dense instead.


  return (self.ram[0x86] - self.ram[0x071c]) % 256


Episode 0 - Frame 233 - Frames/sec 139.0 - Epsilon 0.9999 - Mean Reward 2.665236051502146
Episode 100 - Frame 43537 - Frames/sec 143.0 - Epsilon 0.9892 - Mean Reward 3.1668371964098183
Episode 200 - Frame 85520 - Frames/sec 142.0 - Epsilon 0.9788 - Mean Reward 3.163144960939203
Episode 300 - Frame 129298 - Frames/sec 84.0 - Epsilon 0.9682 - Mean Reward 3.1623640414451097
Episode 400 - Frame 166310 - Frames/sec 70.0 - Epsilon 0.9593 - Mean Reward 3.409637260533143
Episode 500 - Frame 204562 - Frames/sec 69.0 - Epsilon 0.9501 - Mean Reward 3.5032590715005165
Episode 600 - Frame 245780 - Frames/sec 69.0 - Epsilon 0.9404 - Mean Reward 3.2047334949284334
Episode 700 - Frame 286977 - Frames/sec 69.0 - Epsilon 0.9308 - Mean Reward 3.2198258884503734
Episode 800 - Frame 324685 - Frames/sec 70.0 - Epsilon 0.922 - Mean Reward 3.467922920466985
Episode 900 - Frame 368352 - Frames/sec 71.0 - Epsilon 0.912 - Mean Reward 3.26133360310438
Episode 1000 - Frame 404003 - Frames/sec 70.0 - Epsilon 0.9039

In [2]:
agent.save_model()

In [None]:
agent.replay(env,'./newest_models/',1, plot=False)