In [14]:
import torch

boxes_nms=torch.randint(-1000,1000,(3,4))
print(boxes_nms)

tensor([[ 501,   10, -520, -122],
        [  17,  588, -698,  664],
        [ 312, -359, -300, -389]])


In [15]:
boxes_nms[:, 0] = torch.clamp(boxes_nms[:, 0], 10)
boxes_nms[:, 1] = torch.clamp(boxes_nms[:, 1], max=500)
boxes_nms

tensor([[ 501,   10, -520, -122],
        [  17,  500, -698,  664],
        [ 312, -359, -300, -389]])

In [16]:
from torch import nn
from torch.nn import functional as F

net = nn.Sequential(nn.Linear(20, 256),
                    nn.ReLU(),
                    nn.Linear(256, 10))
torch.manual_seed(42)
X = torch.rand(2, 20)

X, net(X)

(tensor([[0.8823, 0.9150, 0.3829, 0.9593, 0.3904, 0.6009, 0.2566, 0.7936, 0.9408,
          0.1332, 0.9346, 0.5936, 0.8694, 0.5677, 0.7411, 0.4294, 0.8854, 0.5739,
          0.2666, 0.6274],
         [0.2696, 0.4414, 0.2969, 0.8317, 0.1053, 0.2695, 0.3588, 0.1994, 0.5472,
          0.0062, 0.9516, 0.0753, 0.8860, 0.5832, 0.3376, 0.8090, 0.5779, 0.9040,
          0.5547, 0.3423]]),
 tensor([[ 0.1924, -0.0091,  0.4433, -0.2924,  0.0434, -0.3230, -0.0967, -0.0847,
          -0.0464,  0.3652],
         [ 0.2456, -0.0599,  0.2142, -0.2261,  0.0586, -0.1186, -0.1335, -0.1829,
          -0.0717,  0.2763]], grad_fn=<AddmmBackward0>))

In [17]:
# define a MLP module, implement the same funtion as Net above
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20, 256)
        self.out = nn.Linear(256, 10)
    
    def forward(self, X):
        return self.out(F.relu(self.hidden(X)))
    
net = MLP()
X, net(X)

(tensor([[0.8823, 0.9150, 0.3829, 0.9593, 0.3904, 0.6009, 0.2566, 0.7936, 0.9408,
          0.1332, 0.9346, 0.5936, 0.8694, 0.5677, 0.7411, 0.4294, 0.8854, 0.5739,
          0.2666, 0.6274],
         [0.2696, 0.4414, 0.2969, 0.8317, 0.1053, 0.2695, 0.3588, 0.1994, 0.5472,
          0.0062, 0.9516, 0.0753, 0.8860, 0.5832, 0.3376, 0.8090, 0.5779, 0.9040,
          0.5547, 0.3423]]),
 tensor([[ 0.1074,  0.1122, -0.0164, -0.1884,  0.2030,  0.1430,  0.0029, -0.0170,
           0.0118, -0.3402],
         [ 0.0556,  0.0728,  0.0151, -0.1841,  0.2343,  0.1248,  0.0682,  0.0488,
          -0.0727, -0.1874]], grad_fn=<AddmmBackward0>))

### Parameter managements

In [18]:
import torch
from torch import nn

net = nn.Sequential(nn.Linear(4,8),
                    nn.ReLU(),
                    nn.Linear(8,1))

X = torch.rand((2,4))
net(X)

tensor([[-0.1784],
        [-0.2106]], grad_fn=<AddmmBackward0>)

In [19]:
# access those parameters in layers
print(net[2].state_dict())


OrderedDict([('weight', tensor([[ 0.0138, -0.2764, -0.1944,  0.2576,  0.0113, -0.1603,  0.2390, -0.1049]])), ('bias', tensor([-0.1438]))])


In [20]:
print(type(net[2].bias))
print(net[2].bias)
print(net[2].bias.data)

<class 'torch.nn.parameter.Parameter'>
Parameter containing:
tensor([-0.1438], requires_grad=True)
tensor([-0.1438])


In [21]:
print(*[(name, param.shape) for name, param in net[0].named_parameters()])

print(*[(name, param.shape) for name, param in net.named_parameters()])


('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))
('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))


In [22]:
print(net[2].state_dict()['bias'].data)
print(net.state_dict()['2.bias'].data) # same as

tensor([-0.1438])
tensor([-0.1438])


In [11]:
def block1():
    return nn.Sequential(nn.Linear(4,8),
                         nn.ReLU(),
                         nn.Linear(8,4),
                         nn.ReLU())

def block2():
    net = nn.Sequential()
    for i in range(4):
        net.add_module(f'block {i}', block1())
    return net

rgnet = nn.Sequential(block2(), nn.Linear(4,1))
rgnet(X)

tensor([[-0.2315],
        [-0.2313]], grad_fn=<AddmmBackward0>)

In [12]:
print(*[(name, param.shape) for name, param in rgnet.named_parameters()], sep="\n")

('0.block 0.0.weight', torch.Size([8, 4]))
('0.block 0.0.bias', torch.Size([8]))
('0.block 0.2.weight', torch.Size([4, 8]))
('0.block 0.2.bias', torch.Size([4]))
('0.block 1.0.weight', torch.Size([8, 4]))
('0.block 1.0.bias', torch.Size([8]))
('0.block 1.2.weight', torch.Size([4, 8]))
('0.block 1.2.bias', torch.Size([4]))
('0.block 2.0.weight', torch.Size([8, 4]))
('0.block 2.0.bias', torch.Size([8]))
('0.block 2.2.weight', torch.Size([4, 8]))
('0.block 2.2.bias', torch.Size([4]))
('0.block 3.0.weight', torch.Size([8, 4]))
('0.block 3.0.bias', torch.Size([8]))
('0.block 3.2.weight', torch.Size([4, 8]))
('0.block 3.2.bias', torch.Size([4]))
('1.weight', torch.Size([1, 4]))
('1.bias', torch.Size([1]))


In [13]:
print(rgnet)

Sequential(
  (0): Sequential(
    (block 0): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 1): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 2): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 3): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
  )
  (1): Linear(in_features=4, out_features=1, bias=True)
)


In [27]:
rgnet[0][1][0].bias.data

tensor([-0.1554,  0.3400,  0.2938,  0.1362,  0.2092,  0.2568,  0.2069,  0.4257])

In [28]:
def init_normal(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, mean=0, std=0.01)
        nn.init.zeros_(m.bias)

# actually, we do not have to do that because pytorch will initialize it for us
net.apply(init_normal)
net[0].weight.data[0], net[0].bias.data[0]

(tensor([ 0.0024, -0.0036, -0.0080,  0.0112]), tensor(0.))

In [None]:
def init_constant(m):
    if type(m) == nn.Linear:

        # _ means replace = True
        nn.init.constant_(m.weight, 1)
        nn.init.zeros_(m.bias)

# another init method
net.apply(init_constant)
net[0].weight.data[0], net[0].bias.data[0]

(tensor([1., 1., 1., 1.]), tensor(0.))

In [32]:
def init_xavier(m):
    if type(m) == nn.Linear:
        nn.init.xavier_uniform_(m.weight)
def init_42(m):
    if type(m) == nn.Linear:
        nn.init.constant_(m.weight, 42)

net[0].apply(init_xavier)
net[2].apply(init_42)
print(net[0].weight.data[0])
print(net[2].weight.data)

tensor([-0.1334,  0.0748, -0.2355, -0.0801])
tensor([[42., 42., 42., 42., 42., 42., 42., 42.]])


In [33]:
# init whatever you want
def my_init(m):
    if type(m) == nn.Linear:
        print("Init", 
              *[(name, param.shape) 
                for name, param in m.named_parameters()][0])
        nn.init.uniform_(m.weight, -10, 10)
        m.weight.data *= m.weight.data.abs() >= 5

net.apply(my_init)
net[0].weight[:2]

Init weight torch.Size([8, 4])
Init weight torch.Size([1, 8])


tensor([[ 8.3071,  8.6604,  0.0000,  5.4948],
        [-5.3545,  0.0000,  9.5895, -0.0000]], grad_fn=<SliceBackward0>)

In [34]:
# 我们需要给共享层一个名称，以便可以引用它的参数
shared = nn.Linear(8, 8)
net = nn.Sequential(nn.Linear(4, 8), 
                    nn.ReLU(),
                    shared, 
                    nn.ReLU(),
                    shared, 
                    nn.ReLU(),
                    nn.Linear(8, 1))
net(X)
# 检查参数是否相同
print(net[2].weight.data[0] == net[4].weight.data[0])
net[2].weight.data[0, 0] = 100
# 确保它们实际上是同一个对象，而不只是有相同的值
print(net[2].weight.data[0] == net[4].weight.data[0])

tensor([True, True, True, True, True, True, True, True])
tensor([True, True, True, True, True, True, True, True])


you can also customization layer

In [43]:
class CenteredLayer(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, X):
        return (X - X.mean()) / X.std()

layer = CenteredLayer()
ct_tensor = torch.FloatTensor([1,2,3,4,5])
layer(ct_tensor), F.sigmoid(ct_tensor)

(tensor([-1.2649, -0.6325,  0.0000,  0.6325,  1.2649]),
 tensor([0.7311, 0.8808, 0.9526, 0.9820, 0.9933]))

In [None]:
# and we use our customized layer in nn
net = nn.Sequential(nn.Linear(8,128), 
                    CenteredLayer())

# CenteredLayer() make Y become a mean = 0, std = 1 rv
Y = net(torch.rand(4,8))
Y.mean(), Y.std()    

(tensor(2.6077e-08, grad_fn=<MeanBackward0>),
 tensor(1., grad_fn=<StdBackward0>))

In [59]:
# layer including parameters
class MyLinear(nn.Module):
    def __init__(self, in_units, units):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(in_units, units))
        self.bias = nn.Parameter(torch.randn(units))

    def forward(self, X):
        linear = torch.matmul(X, self.weight) + self.bias
        return F.relu(linear)
    
dense = MyLinear(5, 3)
dense.weight

Parameter containing:
tensor([[-1.2870,  0.8693, -0.4714],
        [ 0.2476, -0.0401,  0.3944],
        [ 1.8425,  1.2547,  0.0323],
        [-0.2044, -0.2735, -1.5424],
        [-1.5960, -1.0708,  0.9460]], requires_grad=True)

In [68]:
net  = nn.Sequential(MyLinear(64,8),
                     MyLinear(8, 1))

net(torch.rand(2, 64))

tensor([[0.],
        [0.]], grad_fn=<ReluBackward0>)

### Saving files

In [74]:
matrix = torch.arange(8).view(4, 2)
torch.save(matrix, '../data/matrix-file')
print(matrix)

x2 = torch.load('../data/matrix-file')
x2

tensor([[0, 1],
        [2, 3],
        [4, 5],
        [6, 7]])


tensor([[0, 1],
        [2, 3],
        [4, 5],
        [6, 7]])

How can I save a MLP?

In [76]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20, 256)
        self.output = nn.Linear(256, 10)

    def forward(self, x):
        return self.output(F.relu(self.hidden(x)))

net = MLP()
X = torch.randn(size=(2, 20))
Y = net(X)
Y

tensor([[ 0.2719, -0.1220,  0.0460,  0.0040,  0.0071,  0.4501,  0.0755,  0.2225,
          0.2181,  0.0275],
        [-0.1422,  0.0183, -0.1103,  0.4559, -0.0465,  0.3411,  0.2825,  0.5192,
          0.3347, -0.0471]], grad_fn=<AddmmBackward0>)

In [77]:
torch.save(net.state_dict(), '../model/mlp.params')

In [79]:
clone = MLP()
clone.load_state_dict(torch.load('../model/mlp.params'))
clone.eval()
Y_clone = clone(X)
Y_clone == Y

tensor([[True, True, True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True]])