# Using Deep Reinforcement Learning to create an RL intelligent agent that can Balance a Pole on a moving Cart

https://www.youtube.com/watch?v=cO5g5qLrLSo

## Depencencies Needed
- Tensorflow
- Keras
- Keras RL
- OpenAI gym

## Importing Libraries

In [1]:
# base libs
import gymnasium as gym
import numpy as np
import random


In [6]:
# tf libs
import keras as k
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten
from tensorflow.keras.optimizers import Adam

In [3]:
# RL libs
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

## Testing Random Env with OpenAI Gym

In [20]:
env = gym.make('CartPole-v1', render_mode="human")
states = env.observation_space.shape[0]
print(env.observation_space.shape)
actions = env.action_space.n

(4,)


: 

In [5]:
print(f"States : {states} \nActions : {actions}")

States : 4 
Actions : 2


In [8]:
episodes = 50
for episode in range(episodes):
    state = env.reset()
    done = False
    score = 0

    while not done:
        env.render()
        action = random.choice([0,1])
        n_state, reward, done, truncated, info = env.step(action)
        score += reward
    print(f"Episode:{episode+1} Score:{score}")

Episode:1 Score:16.0
Episode:2 Score:19.0
Episode:3 Score:19.0
Episode:4 Score:15.0
Episode:5 Score:16.0
Episode:6 Score:37.0
Episode:7 Score:14.0
Episode:8 Score:89.0
Episode:9 Score:30.0
Episode:10 Score:10.0
Episode:11 Score:11.0
Episode:12 Score:18.0
Episode:13 Score:18.0
Episode:14 Score:18.0
Episode:15 Score:24.0
Episode:16 Score:25.0
Episode:17 Score:38.0
Episode:18 Score:21.0
Episode:19 Score:21.0
Episode:20 Score:29.0
Episode:21 Score:56.0
Episode:22 Score:13.0
Episode:23 Score:13.0
Episode:24 Score:30.0
Episode:25 Score:18.0
Episode:26 Score:21.0
Episode:27 Score:31.0
Episode:28 Score:13.0
Episode:29 Score:37.0
Episode:30 Score:68.0
Episode:31 Score:17.0
Episode:32 Score:23.0
Episode:33 Score:12.0
Episode:34 Score:18.0
Episode:35 Score:33.0
Episode:36 Score:10.0
Episode:37 Score:9.0
Episode:38 Score:9.0
Episode:39 Score:12.0
Episode:40 Score:25.0
Episode:41 Score:15.0
Episode:42 Score:16.0
Episode:43 Score:19.0
Episode:44 Score:54.0
Episode:45 Score:43.0
Episode:46 Score:26.0

## Creating Deep Learning Model

In [8]:
def build_model(states, actions):
    model = Sequential()
    model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(actions))
    model.add(Activation('linear'))
    return model

In [9]:
model = build_model(states, actions)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 4)                 0         
                                                                 
 dense (Dense)               (None, 16)                80        
                                                                 
 activation (Activation)     (None, 16)                0         
                                                                 
 dense_1 (Dense)             (None, 16)                272       
                                                                 
 activation_1 (Activation)   (None, 16)                0         
                                                                 
 dense_2 (Dense)             (None, 16)                272       
                                                                 
 activation_2 (Activation)   (None, 16)                0

## Building Keras-RL Agent

In [19]:
import numpy as np
import gym

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten
from tensorflow.keras.optimizers import Adam

from rl.agents.dqn import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory


ENV_NAME = 'CartPole-v0'


# Get the environment and extract the number of actions.
env = gym.make(ENV_NAME)
np.random.seed(123)
nb_actions = env.action_space.n

# Next, we build a very simple model.
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in tensorflow.keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
               target_model_update=1e-2, policy=policy)
dqn.compile(Adam(learning_rate=1e-3), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
dqn.fit(env, nb_steps=50000, visualize=True, verbose=2)

# After training is done, we save the final weights.
dqn.save_weights(f'dqn_{ENV_NAME}_weights.h5f', overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=5, visualize=True)

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_1 (Flatten)         (None, 4)                 0         
                                                                 
 dense_4 (Dense)             (None, 16)                80        
                                                                 
 activation_4 (Activation)   (None, 16)                0         
                                                                 
 dense_5 (Dense)             (None, 16)                272       
                                                                 
 activation_5 (Activation)   (None, 16)                0         
                                                                 
 dense_6 (Dense)             (None, 16)                272       
                                                                 
 activation_6 (Activation)   (None, 16)               

ValueError: Error when checking input: expected flatten_1_input to have shape (1, 4) but got array with shape (1, 2)

In [16]:
policy = BoltzmannQPolicy()
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model, nb_actions=actions, memory=memory, nb_steps_warmup=10,
               target_model_update=1e-2, policy=policy)
Adam._name = 'hey'
dqn.compile(Adam(learning_rate=1e-3), metrics=['mae'])


Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


In [17]:
dqn.fit(env, nb_steps=50000, visualize=True, verbose=2)

Training for 50000 steps ...


ValueError: Error when checking input: expected flatten_input to have shape (1, 4) but got array with shape (1, 2)

In [29]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy,
                    nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

In [30]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mse'])
dqn.fit(env, nb_steps=50000, visualize=True, verbose=2)

TypeError: Keras symbolic inputs/outputs do not implement `__len__`. You may be trying to pass Keras symbolic inputs/outputs to a TF API that does not register dispatching, preventing Keras from automatically converting the API call to a lambda layer in the Functional Model. This error will also get raised if you try asserting a symbolic input/output directly.

## Saving Model

## Reloading RL Model 