# Import Libraries

In [1]:
import ptan
import torch
import torch.nn as nn

# Define NNs

In [2]:
class DQNNet(nn.Module):
    def __init__(self, actions: int):
        super(DQNNet, self).__init__()
        
        self.actions = actions
        
    def forward(self, x):
        # We always produce diagonal tensor of shape (batch_size, actions)
        return torch.eye(x.size()[0], self.actions)

In [3]:
class PolicyNet(nn.Module):
    def __init__(self, actions: int):
        super(PolicyNet, self).__init__()
        
        self.actions = actions
    
    def forward(self, x):
        # Now we produce the tensor with first two actions having the same logit scores
        shape = (x.size()[0], self.actions)
        res = torch.zeros(shape, dtype=torch.float32)
        res[:, 0] = 1
        res[:, 1] = 1
        
        return res

# Test DQNNet NN

In [4]:
net = DQNNet(actions=3)

In [5]:
net_out = net(torch.zeros(2, 10))
net_out

tensor([[1., 0., 0.],
        [0., 1., 0.]])

# DQN Agent with Argmax Action Selector

In [6]:
selector = ptan.actions.ArgmaxActionSelector()

agent = ptan.agent.DQNAgent(dqn_model=net, action_selector=selector)
ag_out = agent(torch.zeros(2, 5))
ag_out # Tuple of actions for the batch and internal state (if any, otherwise None) of the agent

(array([0, 1]), [None, None])

# DQN Agent with Epsilon Greedy Action Selector

In [7]:
selector = ptan.actions.EpsilonGreedyActionSelector(epsilon=1.0)

agent = ptan.agent.DQNAgent(dqn_model=net, action_selector=selector)
ag_out = agent(torch.zeros(10, 5))
ag_out

(array([2, 0, 2, 1, 2, 1, 1, 2, 0, 2]),
 [None, None, None, None, None, None, None, None, None, None])

In [8]:
selector.epsilon = 0.5
ag_out = agent(torch.zeros(10, 5))
ag_out

(array([2, 1, 2, 0, 0, 0, 2, 2, 0, 0]),
 [None, None, None, None, None, None, None, None, None, None])

In [9]:
selector.epsilon = 0.1
ag_out = agent(torch.zeros(10, 5))
ag_out

(array([0, 1, 2, 0, 0, 0, 0, 0, 0, 0]),
 [None, None, None, None, None, None, None, None, None, None])

# Test PolicyNet NN

In [10]:
net = PolicyNet(actions=5)

In [11]:
net_out = net(torch.zeros(6, 10))
net_out

tensor([[1., 1., 0., 0., 0.],
        [1., 1., 0., 0., 0.],
        [1., 1., 0., 0., 0.],
        [1., 1., 0., 0., 0.],
        [1., 1., 0., 0., 0.],
        [1., 1., 0., 0., 0.]])

# Policy Agent with Probability Action Selector

In [12]:
selector = ptan.actions.ProbabilityActionSelector()

agent = ptan.agent.PolicyAgent(model=net, action_selector=selector, apply_softmax=True)
ag_out = agent(torch.zeros(6, 5))
ag_out

(array([1, 3, 1, 4, 0, 0]), [None, None, None, None, None, None])