In [None]:
import random
import gym
import tensorflow as tf

from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import BoltzmannQPolicy

In [None]:
env = gym.make("CartPole-v1", render_mode="human")

In [None]:
states = env.observation_space.shape[0]
actions = env.action_space.n
states, actions

In [None]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(1, states)),
    tf.keras.layers.Dense(24, activation="relu"),
    tf.keras.layers.Dense(24, activation="relu"),
    tf.keras.layers.Dense(actions, activation="linear")
])

model.summary()

In [None]:
agent = DQNAgent(
    model = model,
    memory = SequentialMemory(limit=50000, window_length=1),
    policy = BoltzmannQPolicy(),
    nb_actions = actions,
    nb_steps_warmup = 10,
    target_model_update = 1e-2
)

In [None]:
agent.compile(tf.keras.optimizers.legacy.Adam(learning_rate=1e-3), metrics=["mae"])

In [None]:
agent.fit(env, nb_steps=50000, visualize=False, verbose=1)

In [None]:
results = agent.test(env, nb_episodes=100, visualize=True)