# CartPole Agent with OpenAI Gym


In [1]:
import gym
import random


## Setting up the environment


In [2]:
env = gym.make("CartPole-v0")
states = env.observation_space.shape[0]
actions = env.action_space.n

In [3]:
# Testing with random moves

episodes = 20

for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0

    while not done:
        env.render()
        action = random.choice([0, 1])
        n_state, reward, done, info = env.step(action)
        score += reward
    print(f"Episode:{episode} Score:{score}")


Episode:1 Score:16.0
Episode:2 Score:19.0
Episode:3 Score:36.0
Episode:4 Score:25.0
Episode:5 Score:23.0
Episode:6 Score:24.0
Episode:7 Score:20.0
Episode:8 Score:17.0
Episode:9 Score:12.0
Episode:10 Score:32.0
Episode:11 Score:21.0
Episode:12 Score:38.0
Episode:13 Score:20.0
Episode:14 Score:19.0
Episode:15 Score:30.0
Episode:16 Score:30.0
Episode:17 Score:25.0
Episode:18 Score:16.0
Episode:19 Score:33.0
Episode:20 Score:17.0


# Creating a Deep Learning Model

In [4]:
import numpy as np
import tensorflow.keras as tf

In [5]:
def make_model(states,actions):
    model=tf.models.Sequential()
    model.add(tf.layers.Flatten(input_shape=(1,states)))
    model.add(tf.layers.Dense(24,activation="relu"))
    model.add(tf.layers.Dense(24,activation="relu"))
    model.add(tf.layers.Dense(actions,activation="linear"))
    return model

In [7]:
model=make_model(states,actions)
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_1 (Flatten)         (None, 4)                 0         
                                                                 
 dense_3 (Dense)             (None, 24)                120       
                                                                 
 dense_4 (Dense)             (None, 24)                600       
                                                                 
 dense_5 (Dense)             (None, 2)                 50        
                                                                 
Total params: 770
Trainable params: 770
Non-trainable params: 0
_________________________________________________________________


## Train agent with Keras RL

In [8]:
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [9]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model,
                   memory=memory,
                   policy=policy,
                   nb_actions=actions,
                   nb_steps_warmup=10,
                   target_model_update=1e-2)
    return dqn


In [10]:
agent=build_agent(model,actions)
agent.compile(tf.optimizers.Adam(learning_rate=1e-3),metrics=['mae'])
agent.fit(env,nb_steps=50000,visualize=False,verbose=1)

AttributeError: 'Sequential' object has no attribute '_compile_time_distribution_strategy'