# CartPole Agent with OpenAI Gym


In [1]:
import gym
import random


## Setting up the environment


In [2]:
env = gym.make("CartPole-v0")
states = env.observation_space.shape[0]
actions = env.action_space.n

In [3]:
# Testing with random moves

episodes = 20

for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0

    while not done:
        env.render()
        action = random.choice([0, 1])
        n_state, reward, done, info = env.step(action)
        score += reward
    print(f"Episode:{episode} Score:{score}")


Episode:1 Score:15.0
Episode:2 Score:12.0
Episode:3 Score:12.0
Episode:4 Score:12.0
Episode:5 Score:29.0
Episode:6 Score:28.0
Episode:7 Score:13.0
Episode:8 Score:15.0
Episode:9 Score:22.0
Episode:10 Score:31.0
Episode:11 Score:20.0
Episode:12 Score:44.0
Episode:13 Score:26.0
Episode:14 Score:17.0
Episode:15 Score:16.0
Episode:16 Score:14.0
Episode:17 Score:12.0
Episode:18 Score:15.0
Episode:19 Score:17.0
Episode:20 Score:22.0


# Creating a Deep Learning Model

In [10]:
import numpy as np
from tensorflow import keras as tf

In [11]:
def make_model(states,actions):
    model=tf.models.Sequential()
    model.add(tf.layers.Flatten(input_shape=(1,states)))
    model.add(tf.layers.Dense(24,activation="relu"))
    model.add(tf.layers.Dense(24,activation="relu"))
    model.add(tf.layers.Dense(actions,activation="linear"))
    return model

In [12]:
model=make_model(states,actions)
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_1 (Flatten)         (None, 4)                 0         
                                                                 
 dense_3 (Dense)             (None, 24)                120       
                                                                 
 dense_4 (Dense)             (None, 24)                600       
                                                                 
 dense_5 (Dense)             (None, 2)                 50        
                                                                 
Total params: 770
Trainable params: 770
Non-trainable params: 0
_________________________________________________________________


## Train agent with Keras RL

In [13]:
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [14]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model,
                   memory=memory,
                   policy=policy,
                   nb_actions=actions,
                   nb_steps_warmup=10,
                   target_model_update=1e-2)
    return dqn


In [15]:
agent=build_agent(model,actions)
agent.compile(tf.optimizers.Adam(learning_rate=1e-3),metrics=['mae'])
agent.fit(env,nb_steps=50000,visualize=False,verbose=1)

Training for 50000 steps ...
Interval 1 (0 steps performed)
    1/10000 [..............................] - ETA: 5:10 - reward: 1.0000

  updates=self.state_updates,


108 episodes - episode_reward: 91.417 [10.000, 200.000] - loss: 2.315 - mae: 18.724 - mean_q: 37.913

Interval 2 (10000 steps performed)
51 episodes - episode_reward: 196.804 [177.000, 200.000] - loss: 5.612 - mae: 38.614 - mean_q: 78.146

Interval 3 (20000 steps performed)
51 episodes - episode_reward: 197.216 [169.000, 200.000] - loss: 6.571 - mae: 43.579 - mean_q: 87.906

Interval 4 (30000 steps performed)
51 episodes - episode_reward: 195.196 [154.000, 200.000] - loss: 4.739 - mae: 40.765 - mean_q: 82.093

Interval 5 (40000 steps performed)
done, took 159.655 seconds


<keras.callbacks.History at 0x12b284c3460>

In [17]:
scores=agent.test(env,nb_episodes=100,visualize=False)
print(np.mean(scores.history['episode_reward']))

Testing for 100 episodes ...
Episode 1: reward: 200.000, steps: 200
Episode 2: reward: 200.000, steps: 200
Episode 3: reward: 200.000, steps: 200
Episode 4: reward: 200.000, steps: 200
Episode 5: reward: 200.000, steps: 200
Episode 6: reward: 200.000, steps: 200
Episode 7: reward: 200.000, steps: 200
Episode 8: reward: 200.000, steps: 200
Episode 9: reward: 200.000, steps: 200
Episode 10: reward: 200.000, steps: 200
Episode 11: reward: 200.000, steps: 200
Episode 12: reward: 200.000, steps: 200
Episode 13: reward: 200.000, steps: 200
Episode 14: reward: 200.000, steps: 200
Episode 15: reward: 200.000, steps: 200
Episode 16: reward: 200.000, steps: 200
Episode 17: reward: 200.000, steps: 200
Episode 18: reward: 200.000, steps: 200
Episode 19: reward: 200.000, steps: 200
Episode 20: reward: 200.000, steps: 200
Episode 21: reward: 200.000, steps: 200
Episode 22: reward: 200.000, steps: 200
Episode 23: reward: 200.000, steps: 200
Episode 24: reward: 200.000, steps: 200
Episode 25: reward: 

In [19]:
agent.test(env,nb_episodes=10,visualize=True)

Testing for 10 episodes ...
Episode 1: reward: 200.000, steps: 200
Episode 2: reward: 200.000, steps: 200
Episode 3: reward: 200.000, steps: 200
Episode 4: reward: 200.000, steps: 200
Episode 5: reward: 200.000, steps: 200
Episode 6: reward: 200.000, steps: 200
Episode 7: reward: 200.000, steps: 200
Episode 8: reward: 200.000, steps: 200
Episode 9: reward: 200.000, steps: 200
Episode 10: reward: 200.000, steps: 200


<keras.callbacks.History at 0x12b2aef3d60>