In [1]:
import time
import numpy as np
from nes_py.wrappers import JoypadSpace
import gym_super_mario_bros
from gym_super_mario_bros.actions import RIGHT_ONLY
from agent import DQNAgent
from wrappers import wrapper


# Build env (first level, right only)
env = gym_super_mario_bros.make('SuperMarioBros-1-1-v0')
env = JoypadSpace(env, RIGHT_ONLY)
env = wrapper(env)

# Parameters
states = (84, 84, 4)
actions = env.action_space.n

# Agent
agent = DQNAgent(states=states, actions=actions, max_memory=100000, double_q=True)

# Episodes
episodes = 10000
rewards = []

# Timing
start = time.time()
step = 0

# Main loop
for e in range(episodes):

    # Reset env
    state = env.reset()

    # Reward
    total_reward = 0
    iter = 0

    # Play
    while True:

        # Show env
        # env.render()

        # Run agent
        action = agent.run(state=state)

        # Perform action
        next_state, reward, done, info = env.step(action=action)

        # Remember
        agent.add(experience=(state, next_state, action, reward, done))

        # Replay
        agent.learn()

        # Total reward
        total_reward += reward

        # Update state
        state = next_state

        # Increment
        iter += 1

        # If done break loop
        if done or info['flag_get']:
            break

    # Rewards
    rewards.append(total_reward / iter)

    # Print
    if e % 100 == 0:
        print('Episode {e} - '
              'Frame {f} - '
              'Frames/sec {fs} - '
              'Epsilon {eps} - '
              'Mean Reward {r}'.format(e=e,
                                       f=agent.step,
                                       fs=np.round((agent.step - step) / (time.time() - start)),
                                       eps=np.round(agent.eps, 4),
                                       r=np.mean(rewards[-100:])))
        start = time.time()
        step = agent.step

# Save rewards
np.save('rewards.npy', rewards)


Instructions for updating:
Use `tf.cast` instead.
Instructions for updating:
Use `tf.keras.layers.Conv2D` instead.
Instructions for updating:
Please use `layer.__call__` method instead.
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use keras.layers.Flatten instead.
Instructions for updating:
Use keras.layers.Dense instead.


  return (self.ram[0x86] - self.ram[0x071c]) % 256


Episode 0 - Frame 138 - Frames/sec 143.0 - Epsilon 1.0 - Mean Reward 0.4927536231884058
Episode 100 - Frame 27052 - Frames/sec 135.0 - Epsilon 0.9933 - Mean Reward 0.6036848997745411
Episode 200 - Frame 50063 - Frames/sec 137.0 - Epsilon 0.9876 - Mean Reward 0.5952738805014954
Episode 300 - Frame 83916 - Frames/sec 139.0 - Epsilon 0.9792 - Mean Reward 0.5307022568916109
Episode 400 - Frame 119775 - Frames/sec 88.0 - Epsilon 0.9705 - Mean Reward 0.48851863692766045
Episode 500 - Frame 149659 - Frames/sec 67.0 - Epsilon 0.9633 - Mean Reward 0.5456574488190917
Episode 600 - Frame 184265 - Frames/sec 65.0 - Epsilon 0.955 - Mean Reward 0.5164309098626186
Episode 700 - Frame 215877 - Frames/sec 65.0 - Epsilon 0.9475 - Mean Reward 0.5329728993970733
Episode 800 - Frame 246620 - Frames/sec 66.0 - Epsilon 0.9402 - Mean Reward 0.539137052216338
Episode 900 - Frame 274017 - Frames/sec 65.0 - Epsilon 0.9338 - Mean Reward 0.5489943196366907
Episode 1000 - Frame 302215 - Frames/sec 66.0 - Epsilon 0.