In [137]:
import tensorflow as tf
import gym
import numpy as np
from collections import namedtuple

from tensorflow.keras import Sequential
from tensorflow.keras.layers import InputLayer
from tensorflow.keras.layers import Dense



In [3]:
#GPU check
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  1


In [6]:
# enable tensor debugging
tf.debugging.set_log_device_placement(True)

In [8]:
env = gym.make('MountainCarContinuous-v0')

# Replay buffer

In [136]:
Batch = namedtuple(
    'Batch', ('states', 'actions', 'rewards', 'next_states', 'dones')
)

class ReplayMemory:
    
    def __init__(self, max_size, state_dim, action_dim):
        self.max_size = max_size
        self.state_dim = state_dim
        
        self.states = np.random.random((max_size, state_dim))
        self.actions = np.random.random((max_size, action_dim))
        self.rewards = np.random.random((max_size, 1))
        self.next_states = np.random.random((max_size, state_dim))
        self.dones = np.full((max_size, 1), True)
        
        self.idx = 0
        self.size = 0
    
    def add(self, state, action, reward, next_state, done):
        
        self.states[self.idx] = state
        self.actions[self.idx] = action
        self.rewards[self.idx] = reward
        self.next_states[self.idx] = next_state
        self.dones[self.idx] = done
        
        self.idx = (self.idx + 1) % self.max_size
        self.size = min(self.size + 1, self.max_size)
    
    def sample(self, batch_size):
        
        if self.size <= batch_size:
            sample_indices = np.random.choice(self.size, self.size, replace=False)
        else:
            sample_indices = np.random.choice(self.size, batch_size, replace=False)
        print(sample_indices)
        batch = Batch(
                        states = self.states[ sample_indices ,:],
                        actions = self.actions[ sample_indices ,:],
                        rewards = self.rewards[ sample_indices ,:],
                        next_states = self.next_states[ sample_indices ,:],
                        dones = self.dones[ sample_indices ,:]
                    )
        return batch

        

In [138]:
class Critic:
    
    def __init__(self):
        print("Critic network")
    
    def create_network(self, input_size, output_size, hidden_layers, perceptrons_count=64):
        model = Sequential()
        model.add(InputLayer(input_size))
        for _ in range(hidden_layers):
            model.add(Dense(perceptrons_count, activation='relu'))
        model.add(Dense(output_size, activation='tanh'))
        
        return model
    
    def train(model)