# DDPG算法实现

引包

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import numpy as np

import random

参数设置

In [None]:
class DDPGConfig:
  def __init__(self):
    self.device = torch.device("cpu")#"cuda" if torch.cuda.is_available() else "cpu")  # 检测GPU
    self.train_eps = 50  # 训练的回合数
    self.test_eps = 50  # 测试的回合数
    self.gamma = 0.99   # 折扣因子
    self.critic_lr = 1e-4  # 评论家网络的学习率
    self.actor_lr = 1e-5   # 演员网络的学习率
    self.memory_capacity = 8000   # 经验回放的容量
    self.batch_size = 64  # mini-batch SGD中的批量大小
    self.target_update = 2  # 目标网络的更新频率
    self.hidden1_dim = 256  # 网络隐藏层维度
    self.hidden2_dim = 128
    self.state_dim = 18
    self.action_dim = 21
    self.soft_tau = 1e-2  # 软更新参数

经验回放池

In [None]:
class ReplayBuffer:
    def __init__(self, capacity):
        self.capacity = capacity  # 经验回放的容量
        self.buffer = []  # 缓冲区
        self.position = 0

    def push(self, state, action, reward, next_state):
        # 缓冲区是一个队列，容量超出时去掉开始存入的转移(transition)
        if len(self.buffer) < self.capacity:
            self.buffer.append(None)
        self.buffer[self.position] = (state, action, reward, next_state)
        self.position = (self.position + 1) % self.capacity

    def sample(self, batch_size):
        batch = random.sample(self.buffer, batch_size)  # 从列表中随机获取batch_size个元素
        state, action, reward, next_state = zip(*batch)  # 解压成状态，动作等
        return state, action, reward, next_state

    def __len__(self):
        # 返回当前存储的量
        return len(self.buffer)