In [1]:
import torch
import torch.nn as nn

In [2]:
def mlp(sizes, activation=nn.ReLU, output_activation=nn.Identity):
    layers = []
    for j in range(len(sizes)-1):
        act = activation if j < len(sizes)-2 else output_activation
        layers += [nn.Linear(sizes[j], sizes[j+1]), act()]
    return nn.Sequential(*layers)

In [3]:
class Actor(nn.Module):
    def __init__(self, feature_extract):
        super(Actor, self).__init__()
        self.feature_extract =  feature_extract
        self.mlp = mlp([64, 10])
        
    
    def forward(self, x):
        x = self.feature_extract(x)
        x = self.mlp(x)
        return x

In [4]:
feature_extract = mlp([64, 64])
actor = Actor(feature_extract=feature_extract)

In [5]:
batch_size = 10
x = torch.rand((batch_size, 64))
actor_out = actor(x)

In [6]:
actor_label = torch.rand((batch_size, 10))
loss_fn = nn.MSELoss()
actor_loss = loss_fn(actor_label, actor_out)
print(actor_loss)

tensor(0.4350, grad_fn=<MseLossBackward0>)


In [9]:
list(feature_extract.parameters())

[Parameter containing:
 tensor([[-4.3276e-02,  8.2608e-02,  5.2922e-02,  ...,  8.5482e-02,
           8.5523e-02,  2.5699e-02],
         [-1.0463e-01,  9.2084e-02,  6.6882e-02,  ..., -1.0245e-01,
           1.2008e-01, -7.0674e-03],
         [ 9.1978e-02,  8.9840e-02, -1.2117e-01,  ..., -7.6714e-02,
           1.1591e-03,  3.7793e-02],
         ...,
         [-3.1141e-03, -8.6605e-02, -4.3342e-02,  ..., -7.8976e-05,
          -3.1943e-02, -1.1460e-01],
         [ 8.5651e-02,  1.4943e-02, -9.1573e-02,  ..., -1.1747e-01,
           5.6839e-02,  9.6282e-02],
         [-5.7501e-02,  7.1120e-02, -3.6035e-02,  ..., -5.3469e-02,
          -5.3681e-02, -3.2296e-02]], requires_grad=True),
 Parameter containing:
 tensor([ 0.0572, -0.0287,  0.0044, -0.0911, -0.0667, -0.0174, -0.0043, -0.0451,
          0.0847,  0.0097,  0.0620,  0.0180,  0.0878,  0.1005,  0.0419, -0.0678,
         -0.0747,  0.0355, -0.0825, -0.0514,  0.0374,  0.0405,  0.0347, -0.0311,
          0.1047,  0.0908,  0.1030, -0.0277, 

In [11]:
opt = torch.optim.Adam(actor.parameters(), lr=0.01)
opt.zero_grad()
actor_loss.backward()
opt.step()

In [12]:
list(feature_extract.parameters())

[Parameter containing:
 tensor([[-0.0533,  0.0726,  0.0429,  ...,  0.0755,  0.0755,  0.0157],
         [-0.0946,  0.0821,  0.0569,  ..., -0.1124,  0.1101, -0.0171],
         [ 0.1020,  0.0998, -0.1112,  ..., -0.0667,  0.0112,  0.0478],
         ...,
         [-0.0131, -0.0966, -0.0533,  ...,  0.0099, -0.0419, -0.1246],
         [ 0.0957,  0.0249, -0.0816,  ..., -0.1075,  0.0668,  0.1063],
         [-0.0475,  0.0811, -0.0260,  ..., -0.0435, -0.0437, -0.0223]],
        requires_grad=True),
 Parameter containing:
 tensor([ 0.0472, -0.0387,  0.0144, -0.1011, -0.0767, -0.0274,  0.0057, -0.0551,
          0.0947,  0.0197,  0.0720,  0.0080,  0.0978,  0.1105,  0.0319, -0.0578,
         -0.0847,  0.0455, -0.0925, -0.0614,  0.0274,  0.0305,  0.0447, -0.0411,
          0.1147,  0.1008,  0.1130, -0.0377, -0.0006, -0.1006,  0.0998,  0.0726,
         -0.0516, -0.0680,  0.0635,  0.0777,  0.0519, -0.1082, -0.1028,  0.0931,
         -0.0977,  0.0516,  0.0576,  0.0341,  0.1311, -0.1046,  0.0832,  0.0420