## Creating an RL DQN Agent for Mountain Car
#### Challenge : Ride a Mountain Car using Reinforcement Learning
A car is on a one-dimensional track, positioned between two "mountains". The goal is to drive up the mountain on the right; however, the car's engine is not strong enough to scale the mountain in a single pass. Therefore, the only way to succeed is to drive back and forth to build up momentum.

#### Reinforcement Learning Based Strategy
Use Q-Learning Principle, to devise a strategy to climb the mountain.

https://gym.openai.com/envs/MountainCar-v0/

In [1]:
import gym

In [2]:
env = gym.make('MountainCar-v0')

In [6]:
env.reset()

array([-0.55961635,  0.        ])

In [7]:
env.action_space

Discrete(3)

In [8]:
env.action_space.n

3

In [9]:
env.observation_space

Box(-1.2000000476837158, 0.6000000238418579, (2,), float32)

In [10]:
env.observation_space.shape

(2,)

In [11]:
env.reset()
for t in range(1000):
    random_action = env.action_space.sample()
    env.step(random_action)
    env.render()
env.close()

In [12]:
for e in range(20):
    
    observation = env.reset()
    for t in range(500):
        env.render()
        action = env.action_space.sample()
        observarion, reward, done, other_info = env.step(action)
        
        if done: 
            print("Game Episode: {}/{} High Score: {}".format(e,20,t))
            break
env.close()
print("All 20 episodes over!")

Game Episode: 0/20 High Score: 199
Game Episode: 1/20 High Score: 199
Game Episode: 2/20 High Score: 199
Game Episode: 3/20 High Score: 199
Game Episode: 4/20 High Score: 199
Game Episode: 5/20 High Score: 199
Game Episode: 6/20 High Score: 199
Game Episode: 7/20 High Score: 199
Game Episode: 8/20 High Score: 199
Game Episode: 9/20 High Score: 199
Game Episode: 10/20 High Score: 199
Game Episode: 11/20 High Score: 199
Game Episode: 12/20 High Score: 199
Game Episode: 13/20 High Score: 199
Game Episode: 14/20 High Score: 199
Game Episode: 15/20 High Score: 199
Game Episode: 16/20 High Score: 199
Game Episode: 17/20 High Score: 199
Game Episode: 18/20 High Score: 199
Game Episode: 19/20 High Score: 199
All 20 episodes over!


In [20]:
import numpy as np
import matplotlib.pyplot as plt
import os
import random
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam

## Agent Design and Neural Model

In [28]:
class Agent:
    def __init__(self,state_size,action_size,deque_size=2000,gamma=0.95,learning_rate=0.001):
        self.state_size = state_size
        self.action_size= action_size
        self.memory = deque(maxlen=deque_size)
        self.gamma = gamma
        
        self.epsilon = 1.0
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.01
        self.learning_rate = learning_rate
        
        self.model = self._create_model()
        
        
    def _create_model(self):
        model = Sequential()
        model.add(Dense(128,input_dim = self.state_size,activation='relu'))
        model.add(Dense(128,input_dim = self.state_size,activation='relu'))
        model.add(Dense(64,activation='relu'))
        model.add(Dense(32,activation='relu'))
        model.add(Dense(self.action_size,activation='linear'))
        
        model.compile(loss='mse',optimizer=Adam(lr=self.learning_rate))
        
        return model
    
    def remember(self,state,action,reward,next_state,done):
        self.memory.append((state,action,reward,next_state,done))
        
    def act(self,state):
        
        if np.random.rand()<=self.epsilon:
            return random.randrange(self.action_size)
        
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])
    
    def train(self,batch_size=32):
        minibatch = random.sample(self.memory,batch_size)
        for state,action,reward,next_state,done in minibatch:
            
            if not done:
                target = reward + self.gamma*np.amax(self.model.predict(next_state)[0])
            
            else:
                target = reward
            
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state,target_f,epochs=1,verbose=0)
            
        if self.epsilon > self.epsilon_min:
            self.epsilon*= self.epsilon_decay
        
    def load(self,name):
        self.model.load_weights(name)
    
    def save(self,name):
        self.model.save_weights(name)
        

## Training the DQN Agent (Deep Q-Learner)

In [30]:
n_episodes=100
output_dir="mountaincar_model/"

In [31]:
state_size = env.reset().shape[0]
action_size = env.action_space.n
batch_size = 32

In [32]:
agent = Agent(state_size,action_size)
done=False

for e in range(n_episodes):
    state = env.reset()
    state = np.reshape(state,[1,state_size])
    
    for time in range(5000):
        env.render()
        action = agent.act(state)
        next_state,reward,done,other_info = env.step(action)
        reward = reward if not done else -10
        next_state = np.reshape(next_state,[1,state_size])
        agent.remember(state,action,reward,next_state,done)
        state = next_state
        
        if done:
            print("Game Episode :{}/{}, High Score :{}, Exploration Rate:{:.2}".format(e,n_episodes,time,agent.epsilon))
            break
    
    if len(agent.memory) > batch_size:
        agent.train(batch_size)
        
    if e%50 == 0:
        agent.save(output_dir+"weights_"+'{:04d}'.format(e)+".hdf5")
        
env.close()

Game Episode :0/100, High Score :199, Exploration Rate:1.0
Game Episode :1/100, High Score :199, Exploration Rate:0.99
Game Episode :2/100, High Score :199, Exploration Rate:0.99
Game Episode :3/100, High Score :199, Exploration Rate:0.99
Game Episode :4/100, High Score :199, Exploration Rate:0.98
Game Episode :5/100, High Score :199, Exploration Rate:0.98
Game Episode :6/100, High Score :199, Exploration Rate:0.97
Game Episode :7/100, High Score :199, Exploration Rate:0.97
Game Episode :8/100, High Score :199, Exploration Rate:0.96
Game Episode :9/100, High Score :199, Exploration Rate:0.96
Game Episode :10/100, High Score :199, Exploration Rate:0.95
Game Episode :11/100, High Score :199, Exploration Rate:0.95
Game Episode :12/100, High Score :199, Exploration Rate:0.94
Game Episode :13/100, High Score :199, Exploration Rate:0.94
Game Episode :14/100, High Score :199, Exploration Rate:0.93
Game Episode :15/100, High Score :199, Exploration Rate:0.93
Game Episode :16/100, High Score :1