# Debugging D_S_Actor and D_S_Critic

- Use Keras Version 2.2.5. If you use tf.keras it causes some error. (In the tf.keras.backend.function)


In [23]:
import tensorflow
import keras
print(tensorflow.__version__)
print(tensorflow.keras.__version__)
print(keras.__version__)

1.15.0
2.2.4-tf
2.2.5


### D_S_Actor

In [94]:
'''
Sites and pages that are helpful:
DDPG: https://spinningup.openai.com/en/latest/algorithms/ddpg.html
DDPG Code: https://github.com/germain-hug/Deep-RL-Keras/tree/master/DDPG
DDPG Code Updated: https://github.com/samhiatt/ddpg_agent/blob/master/ddpg_agent/agents/agent.py
Keras.Gradient: https://www.tensorflow.org/api_docs/python/tf/gradients
Keras.Function: https://www.tensorflow.org/api_docs/python/tf/keras/backend/function
Zip: https://www.geeksforgeeks.org/zip-in-python/
TensorFlow version used: less than 2.0.0 as then tf.gradients dosent work
'''

import tensorflow as tf
from keras import layers, models, optimizers
from keras import backend as K
import collections
import random
import numpy as np
import math
import copy

class D_S_Actor:

    def __init__(self, state_dim, lr, gaussian_std, act_dim=1):
        self.state_dim = state_dim
        self.act_dim = act_dim
        self.lr = lr
        self.gaussian_std = gaussian_std
        
        self.network()
#         self.optimizer = self.optim()

    def network(self):
        states = layers.Input(shape= (self.state_dim,))
        #
        x = layers.Dense(32, activation='relu')(states)
        x = layers.BatchNormalization()(x)
        x = layers.GaussianNoise(self.gaussian_std)(x)
        #
        x = layers.Dense(32, activation='relu')(x)
        x = layers.BatchNormalization()(x)
        x = layers.GaussianNoise(self.gaussian_std)(x)
        #
        actions = layers.Dense(self.act_dim, activation='tanh')(x)
        #
        self.model = models.Model(inputs=states, outputs=actions)
        
        # Define loss function using action value (Q value) gradients
        action_gradients = layers.Input(shape=(self.act_dim,))
        loss = K.mean(-action_gradients * actions)

        # Define optimizer and training function
        optimizer = optimizers.Adam()
        updates_op = optimizer.get_updates(params=self.model.trainable_weights, loss=loss)
        self.train_fn = K.function(
            inputs=[self.model.input, action_gradients, K.learning_phase()],
            outputs=[loss],
            updates=updates_op)

    def summary(self):

        print(self.model.summary())

    def action(self, state):

        return self.model.predict(state)
    
    def train(self, state, action_gradients):
        
        return self.train_fn([state, action_gradients, 1])

    def save(self, path):

        self.model.save_weights(path + '_D_S_Actor.h5')

    def load_weights(self, path):

        self.model.load_weights(path)

### D_S_Critic

In [95]:
'''
Sites and pages that are helpful:
DDPG: https://spinningup.openai.com/en/latest/algorithms/ddpg.html
DDPG Code: https://github.com/germain-hug/Deep-RL-Keras/tree/master/DDPG
DDPG Code Updated: https://github.com/samhiatt/ddpg_agent/blob/master/ddpg_agent/agents/agent.py
Keras.Gradient: https://www.tensorflow.org/api_docs/python/tf/gradients
Keras.Function: https://www.tensorflow.org/api_docs/python/tf/keras/backend/function
Zip: https://www.geeksforgeeks.org/zip-in-python/
TensorFlow version used: less than 2.0.0 as then tf.gradients dosent work
'''

import tensorflow as tf
from keras import layers, models, optimizers
from keras import backend as K
import collections
import random
import numpy as np
import math
import copy

class D_S_Critic:

    def __init__(self, state_dim, act_dim):
        self.gamma = 0.8
        self.state_dim = state_dim
        self.act_dim = act_dim
        self.model = self.network()
        self.network()
        
    def network(self):
        state_inp = layers.Input(shape = (self.state_dim, ))

        action_inp = layers.Input(shape = (self.act_dim, ))
        #
        x = layers.concatenate([state_inp, action_inp])
        x = layers.Dense(32, activation='relu')(x)
        x = layers.BatchNormalization()(x)
        #
        x = layers.Dense(32, activation='relu')(x)
        x = layers.BatchNormalization()(x)
        #
        output = layers.Dense(1, activation='linear')(x)

        self.model = models.Model(inputs=[state_inp, action_inp], outputs=output)
        
        optimizer = optimizers.Adam()
        self.model.compile(optimizer=optimizer, loss='mse')
        
        # Define an additional function to fetch action gradients (to be used by actor model)
        self.get_action_gradients = K.function(
            inputs=[self.model.input[0], self.model.input[1], K.learning_phase()],
            outputs= K.gradients(self.model.output,  [self.model.input[1]]))

    def gradients(self, state_inp, action_inp):

        return self.get_action_gradients([state_inp, action_inp, 0])

    def reward_value(self, state_inp, action_inp):

        return self.model.predict([state_inp, action_inp])

    def train(self, state_inp, action_inp, state_inp_next, action_inp_next, total_rewards):

        y = total_rewards + self.gamma*self.model.predict([state_inp_next, action_inp_next])
        loss = self.model.train_on_batch(x=[state_inp, action_inp], y=y)

        return loss

    def save(self, path):

        self.model.save_weights(path + '_D_S_Critic.h5')

    def load_weights(self, path):

        self.model.load_weights(path)


### Importing Actor and Critic

In [111]:
D_S_Actor_ = D_S_Actor(5, 0.01, 0.01)
D_S_Critic_ = D_S_Critic(5, 1)

### Debugging the Critic 

In [122]:
state = np.array([[1,2,3,4,5]]).astype(np.float32)
state

array([[1., 2., 3., 4., 5.]], dtype=float32)

In [123]:
action = np.array([[1]]).astype(np.float32).reshape(-1, 1) 
action

array([[1.]], dtype=float32)

In [124]:
# State is np.asarray([[1,2,3,4,5]])
# Action is np.array([[1]])
grads = D_S_Critic_.gradients(state, action)  # Outputs gradient w.r.t actions
print(grads)

[array([[84.712944]], dtype=float32)]


In [125]:
D_S_Critic_.reward_value(np.asarray([[1,2,3,4,5]]), np.array([[1]]))  # Outputs reward

array([[0.00845966]], dtype=float32)

In [126]:
# Next State is np.asarray([[3,5,3,3,2]]
# Next Action is np.array([[2]]
# Reward is np.array([[5]])
# Function outputs loss
D_S_Critic_.train(np.asarray([[1,2,3,4,5]]), np.array([[1]]), np.asarray([[3,5,3,3,2]]), np.array([[2]]), np.array([[5]]))

60063.895

### Debugging the Actor

In [127]:
D_S_Actor_.model.predict(state)

array([[0.36620203]], dtype=float32)

In [128]:
action_gradients = np.reshape(D_S_Critic_.gradients(state, action), (-1,1))
action_gradients

array([[84.9182]], dtype=float32)

### Checking if the Gradient works by looking at a change in the action

In [129]:
D_S_Actor_.model.predict(state)

array([[0.36620203]], dtype=float32)

In [130]:
D_S_Actor_.train(state, action_gradients)

[-2.8813336]

In [131]:
D_S_Actor_.model.predict(state)

array([[0.37086245]], dtype=float32)