<a href="https://colab.research.google.com/github/amirhoseinaghaei/Twin-Delayed-DDPG-Implementation/blob/main/TD3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Twin-Delayed DDPG

## Installing the packages

In [None]:
!pip install pybullet

## Importing the libraries

In [7]:
import os
import time
import random
import numpy as np
import matplotlib.pyplot as plt
# import pybullet_envs
import gym
import torch
import torch.nn as nn
import torch.nn.functional as F
from gym import wrappers
from torch.autograd import Variable
from collections import deque

Step 1 which is implementing ReplayBuffer

In [8]:
class ReplayBuffer(object):
  def __init__(self, max_size = 1e6):
    self.storage = []
    self.max_size = max_size
    self.ptr = 0
  def add(self, transition):
    if len(self.storage) == self.max_size:
        self.storage[int(self.ptr)] = transition
        self.ptr = (self.ptr + 1)% self.max_size
    else:
        self.storage.append(transition)

  def sample(self, batch_size):
      ind = np.random.randint(0, len(self.storage), size = batch_size)
      batch_states, batch_next_states, batch_actions, batch_rewards, batch_dones = [], [], [], [], []
      for i in ind:
        state, next_state, action, reward, done = self.storage[i]
        batch_states.append(np.array(state, copy = False))
        batch_next_states.append(np.array(next_state, copy = False))
        batch_actions.append(np.array(action, copy = False))
        batch_rewards.append(np.array(reward, copy = False))
        batch_dones.append(np.array(done, copy = False))
      return np.array(batch_states), np.array(batch_next_states), np.array(batch_actions), np.array(batch_rewards).reshape(-1,1), np.array(batch_dones).reshape(-1,1)


Step 2 which is defining the Actor neural network

In [9]:
class Actor(nn.Module):
  def __init__(self, state_dim, action_dim, max_action):
      super(Actor, self).__init__()
      self.layer1 = nn.Linear(state_dim, 400)
      self.layer2 = nn.linear(400,300)
      self.layer3 = nn.Linear(300, action_dim)
      self.max_action = max_action
  def forward(self, x):
    x = F.relu(self.layer1(x))
    x = F.relu(self.layer2(x))
    x = self.max_action * nn.Tanh(self.layer3(x))
    return x 

Step 3 which is defining the Critic neural network

In [12]:
class Critic(nn.Module):
  def __init__(self, state_dim , action_dim):
      super(Critic, self).__init__()
      # Defining the first Critic neural network
      self.layer1 = nn.Linear(state_dim + action_dim, 400)
      self.layer2 = nn.linear(400,300)
      self.layer3 = nn.Linear(300, 1)
      # Defining the second Critic neural network
      self.layer4 = nn.Linear(state_dim + action_dim, 400)
      self.layer5 = nn.linear(400,300)
      self.layer6 = nn.Linear(300, 1)
  def forward(self, x, u):
    xu = torch.cat([x,u],1)
    # Forward propagation on the first Critic neural network
    x1 = F.relu(self.layer1(xu))
    x1 = F.relu(self.layer2(x1))
    x1 = self.layer3(x1)
    # Forward propagation on the second Critic neural network
    x2 = F.relu(self.layer4(xu))
    x2 = F.relu(self.layer5(x2))
    x2 = self.layer6(x2)
    return x1, x2 
  def Q1(self, x, u):
    xu = torch.cat([x,u],1)
    x1 = F.relu(self.layer1(xu))
    x1 = F.relu(self.layer2(x1))
    x1 = self.layer3(x1)
    return x1