In [13]:
import torch.nn as nn
import torch
import torch.nn.functional as F

In [2]:
class DQN(nn.Module):

    def __init__(self, n_observations, n_actions):
        print(f'*** Initializing with {n_observations}, {n_actions}')
        super(DQN, self).__init__()
        self.layer1 = nn.Linear(n_observations, 32)
        self.ln1 = nn.LayerNorm(32)
        self.layer2 = nn.Linear(32, 16)
        self.ln2 = nn.LayerNorm(16)
        self.layer3 = nn.Linear(16, n_actions)

    # Called with either one element to determine next action, or a batch
    # during optimization. Returns tensor([[left0exp,right0exp]...]).
    def forward(self, x):
        x = F.relu(self.ln1(self.layer1(x)))
        x = F.relu(self.ln2(self.layer2(x)))
#        x = F.relu(self.layer3(x))
        return self.layer3(x)

In [3]:
n_observations=25
n_actions=4

In [4]:
model = DQN(n_observations, n_actions)

*** Initializing with 25, 4


In [5]:
model_path = "layernorm-normalize-debug-2cust-scheduler-nodedop-to_zero_explore.nnet"  # Adjust path as necessary
model.load_state_dict(torch.load(model_path))

<All keys matched successfully>

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.load_state_dict(torch.load(model_path, map_location=device))
model.to(device) 

DQN(
  (layer1): Linear(in_features=25, out_features=32, bias=True)
  (ln1): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
  (layer2): Linear(in_features=32, out_features=16, bias=True)
  (ln2): LayerNorm((16,), eps=1e-05, elementwise_affine=True)
  (layer3): Linear(in_features=16, out_features=4, bias=True)
)

In [42]:
max_dist=14.94427190999916

In [43]:
observation=[1, 0, 0, 5, 1, 9, 3, 5, 4.47213595499958, 4.47213595499958, 4, 2, 4, 2, 8, 1, 3, 3.0, 3.0, 3, 0, 3, 0, 10, 10]
observation[8]=observation[8]/max_dist
observation[9]=observation[9]/max_dist
observation[17]=observation[17]/max_dist
observation[18]=observation[18]/max_dist

In [44]:
observation

[1,
 0,
 0,
 5,
 1,
 9,
 3,
 5,
 0.2992541879546028,
 0.2992541879546028,
 4,
 2,
 4,
 2,
 8,
 1,
 3,
 0.20074581204539718,
 0.20074581204539718,
 3,
 0,
 3,
 0,
 10,
 10]

In [37]:
state=torch.tensor(observation, dtype=torch.float32, device=device).unsqueeze(0)

In [38]:
state

tensor([[ 1.0000,  0.0000,  0.0000,  5.0000,  1.0000,  9.0000,  3.0000,  5.0000,
          0.2993,  0.2993,  4.0000,  2.0000,  4.0000,  2.0000,  8.0000,  1.0000,
          3.0000,  0.2007,  0.2007,  3.0000,  0.0000,  3.0000,  0.0000, 10.0000,
         10.0000]], device='cuda:0')

In [39]:
model.eval()

DQN(
  (layer1): Linear(in_features=25, out_features=32, bias=True)
  (ln1): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
  (layer2): Linear(in_features=32, out_features=16, bias=True)
  (ln2): LayerNorm((16,), eps=1e-05, elementwise_affine=True)
  (layer3): Linear(in_features=16, out_features=4, bias=True)
)

In [40]:
action_values = model(state)

In [41]:
action_values

tensor([[ 0.1425,  0.1870,  0.1848, -0.3884]], device='cuda:0',
       grad_fn=<AddmmBackward0>)

In [21]:
observation=[0, 1, 0, 5, 1, 9, 3, 5, 4.47213595499958, 4.47213595499958, 4, 2, 4, 2, 8, 1, 3, 3.0, 3.0, 3, 0, 3, 0, 10, 10]
observation[8]=observation[8]/max_dist
observation[9]=observation[9]/max_dist
observation[17]=observation[17]/max_dist
observation[18]=observation[18]/max_dist
state=torch.tensor(observation, dtype=torch.float32, device=device).unsqueeze(0)

In [22]:
action_values = model(state)
action_values

tensor([[ 0.0687,  0.3876, -0.2408, -0.1966]], device='cuda:0',
       grad_fn=<AddmmBackward0>)

In [23]:
observation=[0, 0, 1, 5, 1, 9, 3, 5, 4.47213595499958, 4.47213595499958, 4, 2, 4, 2, 8, 1, 3, 3.0, 3.0, 3, 0, 3, 0, 10, 10]
observation[8]=observation[8]/max_dist
observation[9]=observation[9]/max_dist
observation[17]=observation[17]/max_dist
observation[18]=observation[18]/max_dist
state=torch.tensor(observation, dtype=torch.float32, device=device).unsqueeze(0)

In [24]:
action_values = model(state)
action_values

tensor([[ 0.1795,  0.3390,  0.1555, -0.2040]], device='cuda:0',
       grad_fn=<AddmmBackward0>)