# Continuous control with Deep Reinforcement Learning

This notebook provides an implementation and a simple test of the following article:

Lillicrap et. al., *Continuous control with Deep Reinforcement Learning*, 2016

The test is executed on the Pendulum problem.

In [1]:
import numpy as np
import gym
import tensorflow as tf

### Experience replay

In [8]:
class ExperienceReplay: # Handles the expereiences (state, action, reward, next state)
    
    def __init__(self, size):
        self.size = size
        self.queue = [0] * size
        
    def append(self, experience_list):
        app_size = len(experience_list)
        
        for idx in range(app_size):
            self.queue.remove(0)
            self.queue.append(experience_list[idx])
    
    def get_random_batch(self, number):
        indicies = np.random.randint(0, self.size, number)
        
        rand_batch = []
        for idx in indicies:
            rand_batch.append(self.queue[idx])
        
        return rand_batch

### Noise

In [None]:
def noise(action, low, high):
    
    sigma = 0.5
    action = action + sigma * np.random.randn(1) # generates a value from standard normal disrt.
    if action < low:
        return low
    elif action > high:
        return high
    else:
        return action

### Actor

In [None]:
class Actor:
    
    

### The actor-critic algorithm

In [None]:
# Initialization
replay = ExperienceReplay(100)
critic = Critic()
actor = Actor()
env = gym.make('Pendulum-v0').reset()
M = 100
N = 32

for _ in range(M):
    
    done = False
    state = env.render(mode='rgb_array', close=False)
    
    while not done:
        action = noise(actor.action(state), env.action_space.low, env.action_space.high)
        _, reward, done, _ = env.step(action)
        state_new = env.render(mode='rgb_array', close=False)
        replay.append([(state, action, reward, state_new)])
        state = state_new 
        samples = replay.get_random_batch(N)
        critic.update(actor, samples)
        actor.update(critic, samples)
        critic.soft_target_update()
        actor.soft_target_update()