DeepQ Learning: MountainCar example in Gym

# 1.Import the MountainCar environment

In [5]:
import gym

In [6]:
env = gym.make('MountainCar-v0')

print("Shape of state = ", env.observation_space.shape[0])
print("No of actions =", env.action_space.n)


Shape of state =  2
No of actions = 3


# Try some random moves to provide a baseline

In [7]:
episodes = 5
cumulative_reward = 0

for e in range(1,episodes+1):

    state = env.reset()
    done = False

    total_reward = 0
    
    while not done:
        env.render()
        action = env.action_space.sample()
        print(">",action, end='')
        new_state, reward, done, info = env.step(action)
        total_reward += reward
    
    print(f'\n Episode {e} Total Reward = {total_reward}')
    cumulative_reward += total_reward
    
average_reward = cumulative_reward/episodes 
print('Average Reward =',average_reward )

> 1> 0> 2> 0> 0> 0> 2> 1> 0> 1> 1> 0> 0> 1> 1> 2> 0> 1> 1> 1> 1> 1> 0> 0> 1> 2> 2> 2> 1> 1> 0> 0> 2> 0> 1> 2> 0> 2> 2> 0> 1> 0> 1> 2> 1> 2> 2> 0> 1> 0> 0> 2> 0> 2> 0> 1> 2> 2> 0> 1> 1> 1> 2> 1> 0> 0> 2> 0> 0> 0> 0> 1> 1> 2> 0> 0> 2> 0> 2> 0> 1> 1> 2> 0> 2> 0> 2> 0> 0> 2> 2> 0> 1> 2> 1> 0> 2> 2> 2> 2> 0> 0> 2> 1> 1> 1> 2> 1> 0> 1> 2> 1> 1> 2> 2> 2> 1> 1> 0> 0> 1> 2> 1> 0> 0> 0> 0> 0> 2> 1> 2> 1> 1> 2> 2> 2> 2> 1> 2> 1> 2> 1> 2> 2> 2> 0> 0> 2> 1> 2> 1> 0> 2> 0> 2> 1> 0> 1> 2> 0> 2> 2> 0> 2> 0> 2> 2> 0> 0> 0> 1> 2> 2> 2> 0> 1> 0> 2> 0> 0> 1> 2> 2> 1> 2> 1> 1> 1> 2> 0> 1> 2> 2> 2> 0> 1> 1> 2> 0> 2
 Episode 1 Total Reward = -200.0
> 2> 0> 2> 1> 0> 2> 0> 2> 2> 2> 1> 0> 2> 0> 2> 2> 2> 2> 2> 0> 1> 1> 1> 2> 2> 2> 0> 1> 1> 0> 1> 0> 1> 2> 0> 2> 0> 1> 2> 0> 2> 0> 2> 2> 2> 2> 2> 2> 1> 1> 0> 1> 1> 0> 2> 2> 0> 2> 2> 1> 0> 2> 0> 0> 1> 1> 2> 2> 0> 0> 1> 0> 2> 1> 0> 1> 0> 1> 2> 2> 2> 2> 1> 0> 1> 2> 2> 2> 1> 1> 2> 0> 1> 0> 1> 1> 1> 2> 0> 0> 2> 2> 1> 0> 2> 0> 0> 1> 0> 0> 0> 2> 2> 2> 2> 2> 2> 1> 0> 2> 2> 0

In [8]:
# if env.viewer: env.viewer.close() # close the visulisation

# 3. Use the DQN model for the CartPole example

In [9]:
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import RMSprop

from rl.agents.dqn import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import BoltzmannQPolicy,LinearAnnealedPolicy

ModuleNotFoundError: No module named 'tensorflow'

In [None]:

def create_agent(states, actions):
    model = Sequential()
    model.add(Flatten(input_shape = (1, states)))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model
  
model = create_agent(env.observation_space.shape[0], env.action_space.n)

In [None]:
env.reset()

array([-0.43965834,  0.        ], dtype=float32)

In [None]:
memory = SequentialMemory(limit=100000, window_length=1)

#policy = LinearAnnealedPolicy(BoltzmannQPolicy(), attr='tau',
                              #value_max=1., value_min=.1, value_test=.05, nb_steps=10000)

policy = BoltzmannQPolicy()

dqn = DQNAgent(model=model, nb_actions=env.action_space.n, 
               memory=memory, nb_steps_warmup=100, gamma=0.99, policy=policy, 
               enable_double_dqn= True,target_model_update= 1e-3 )

dqn.compile(RMSprop(learning_rate=1e-3), metrics=['mae'])

In [None]:

dqn.fit(env, nb_steps=100000, visualize=True, verbose=1)

Training for 100000 steps ...
Interval 1 (0 steps performed)
    3/10000 [..............................] - ETA: 5:35 - reward: -1.0000 

  updates=self.state_updates,


  569/10000 [>.............................] - ETA: 5:30 - reward: -1.0000

In [None]:
env.reset()
res = dqn.test(env, nb_episodes=5, visualize=True)

In [None]:
res.history

In [None]:
import numpy as np
print(np.average(res.history['episode_reward']))