# Continuous Action Mountain Car trained with Deep Deterministic Policy Gradients

## Step 0: Import necessary packages

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import gym
import random

## Step 1: Activate the environment and examine state/action spaces

In [None]:
env = gym.make('MountainCarContinuous-v0')

In [None]:
print(env.observation_space)
print(env.observation_space.high)
print(env.observation_space.low)
print(env.action_space)
print(env.action_space.high)
print(env.action_space.low)

In [None]:
state_size = env.observation_space.shape[0]
action_size = env.action_space.shape[0]
action_high = env.action_space.high
action_low = env.action_space.low

Let's take some random actions in the environment and see what happens...

In [None]:
for i_episode in range(20):
    observation = env.reset()
    for t in range(100):
        env.render()
        print(observation)
        action = env.action_space.sample()
        observation, reward, done, info = env.step(action)
        if done:
            print("Episode finished after {} timesteps".format(t+1))
            break
env.close()

## Step 2: Create the agent

In [None]:
from agent import DDPG
from collections import deque

In [None]:
# Define all hyperparameters here
ACTOR_LR = 1e-4
CRITIC_LR = 1e-3
RANDOM_SEED = 42
MU = 0.0
THETA = 0.15
SIGMA = 0.2
BUFFER_SIZE = 1e5
BATCH_SIZE = 128
GAMMA = 0.99
TAU = 1e-3
N_TIME_STEPS = 1
N_LEARN_UPDATES = 1

if tf.test.is_gpu_available():
    DEVICE = "/GPU:0"
else:
    DEVICE = "/device:CPU:0"

In [None]:
agent = DDPG(state_size, action_size, action_high, action_low, ACTOR_LR, CRITIC_LR,
             RANDOM_SEED, MU, THETA, SIGMA, BUFFER_SIZE, BATCH_SIZE,
             GAMMA, TAU, N_TIME_STEPS, N_LEARN_UPDATES, DEVICE)

In [None]:
def ddpg(n_episodes=1000, print_every=100):
    scores_deque = deque(maxlen=print_every)
    scores = []
    
    for i_episode in range(1, n_episodes+1):
        state = env.reset()
        agent.reset()
        score = 0
        t = 0
        
        while(True):
            t += 1
            action = agent.act(state)
            next_state, reward, done, _ = env.step(action)
            agent.step(t, state, action, reward, next_state, done)
            state = next_state
            score += reward
            if done:
                break 
        scores_deque.append(score)
        scores.append(score)
        print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_deque)), end="")
        agent.actor_local.model.save('checkpoint_actor.h5')
        agent.critic_local.model.save('checkpoint_critic.h5')
        if i_episode % print_every == 0:
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_deque)))
            
        if np.mean(scores_deque) >= 90.0:
            print('\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'.format(i_episode-100, np.mean(scores_deque)))
            agent.actor_local.model.save('checkpoint_actor.h5')
            agent.critic_local.model.save('checkpoint_critic.h5')
            break
            
    return scores

scores = ddpg()

fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(1, len(scores)+1), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()

In [None]:
state = env.reset()
state

In [None]:
action = agent.actor_local.predict([1.5])

In [None]:
action

In [None]:
from tensorflow.keras.layers import Dense, Input, Concatenate, BatchNormalization, Activation
from tensorflow.keras import Model

states = Input(shape=(state_size,))

# Add hidden layers
net = Dense(units=400, activation='relu', kernel_initializer='glorot_uniform')(states)
net = Dense(units=300, activation='relu', kernel_initializer='glorot_uniform')(net)

# Add final output layer with tanh activation
actions = Dense(units=action_size, activation='tanh', kernel_initializer='glorot_uniform')(net)

In [None]:
test_model = Model(inputs=states, outputs=actions, name="test")

In [None]:
state = np.expand_dims(state, axis=0)
state.shape

In [None]:
test_model(state).numpy()

In [None]:
# Define input layers
states = Input(shape=(state_size,))
actions = Input(shape=(action_size,))

# Add hidden layer for state pathway
net_states = Dense(units=400, activation='relu', kernel_initializer='glorot_uniform')(states)

# Combine state and action pathways
net = Concatenate(axis=-1)([net_states, actions])

# Add more layers to the combined network
net = Dense(units=300, activation='relu', kernel_initializer='glorot_uniform')(net)

# Add final output layer to produce action values (Q values)
Q_values = Dense(units=1, activation='linear', kernel_initializer='glorot_uniform')(net)

# Create Keras model
Q_model = Model(inputs=[states, actions], outputs=Q_values)

In [None]:
action = test_model(state).numpy()

In [None]:
Q_pred = Q_model([state, action])

In [None]:
Q_target = Q_model([state, action])

In [None]:
from tensorflow.keras.losses import MSE