## 深度学习计算

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [3]:
net = nn.Sequential(nn.Linear(20, 256),
                    nn.ReLU(),
                    nn.Linear(256, 10))
X = torch.randn(2, 20)
print(net(X))

tensor([[ 0.4991, -0.2851, -0.0465,  0.0492, -0.3156,  0.2185, -0.2036, -0.0525,
         -0.4470, -0.1752],
        [ 0.4347, -0.2926,  0.5307,  0.4380, -0.4316,  0.2426, -0.6666,  0.1185,
         -0.0972,  0.0773]], grad_fn=<AddmmBackward0>)


In [4]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20, 256)
        self.out = nn.Linear(256, 10)

    def forward(self, X):
        return self.out(F.relu(self.hidden(X)))

In [5]:
net = MLP()
print(net(X))

tensor([[ 0.2698,  0.1221,  0.0640, -0.0645,  0.0584,  0.1774, -0.2562,  0.0208,
         -0.0652, -0.2183],
        [ 0.0331, -0.0116, -0.0227, -0.1497,  0.1637,  0.6946, -0.1518,  0.6945,
         -0.0137, -0.8431]], grad_fn=<AddmmBackward0>)


In [6]:
class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for idx, module in enumerate(args):
            self.add_module(str(idx), module)

    def forward(self, X):
        for module in self._modules.values():
            X = module(X)
        return X

In [7]:
net  = MySequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
print(net(X))

tensor([[-0.0503,  0.2224, -0.2909, -0.3664, -0.3681,  0.0506, -0.0514, -0.1537,
          0.1903,  0.4282],
        [ 0.2772, -0.0137, -0.3113, -0.0985,  0.3356,  0.4591,  0.2226,  0.0769,
          0.3688,  0.8713]], grad_fn=<AddmmBackward0>)


In [14]:
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.rand_weight = torch.rand((20, 20), requires_grad=False)
        self.linear = nn.Linear(20, 20)

    def forward(self, X):
        X = self.linear(X)
        X = F.relu(torch.mm(X, self.rand_weight) + 1)
        X = self.linear(X)
        while X.abs().sum() > 1:
            X /= 2
        
        return self.linear(X)

In [15]:
net = FixedHiddenMLP()
print(net(X))

tensor([[ 0.0773,  0.2476,  0.1560, -0.0445,  0.1721,  0.0442,  0.1749, -0.0615,
         -0.2389,  0.1722, -0.1484, -0.2001,  0.0007, -0.0301,  0.1026, -0.1920,
         -0.2003, -0.0263,  0.0253, -0.0542],
        [ 0.0459,  0.2142,  0.1855, -0.0157,  0.1729,  0.0882,  0.1543, -0.0593,
         -0.2242,  0.1402, -0.1784, -0.1755,  0.0451, -0.0114,  0.0934, -0.2018,
         -0.1820, -0.0539,  0.0309, -0.0463]], grad_fn=<AddmmBackward0>)


In [16]:
class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(20, 64), nn.ReLU(), nn.Linear(64, 32), nn.ReLU())
        self.linear = nn.Linear(32, 16)
    
    def forward(self, X):
        return self.linear(self.net(X))
chimera = nn.Sequential(NestMLP(), nn.Linear(16, 20), FixedHiddenMLP())
chimera(X)

tensor([[ 1.3750e-01,  1.2581e-02,  3.9144e-02, -4.5786e-02,  1.4665e-01,
          9.1764e-02, -1.2198e-01, -7.4244e-02,  8.0128e-03,  8.8010e-05,
         -2.3515e-02,  2.0612e-02,  1.0110e-01, -2.1562e-01,  1.3709e-01,
          1.8578e-01, -1.3833e-01, -9.5749e-02, -1.3792e-01,  1.7625e-01],
        [ 1.3785e-01,  1.3485e-02,  3.9002e-02, -4.5999e-02,  1.4709e-01,
          9.1931e-02, -1.2206e-01, -7.4625e-02,  7.3047e-03, -2.3676e-04,
         -2.3358e-02,  2.0685e-02,  1.0151e-01, -2.1587e-01,  1.3729e-01,
          1.8540e-01, -1.3845e-01, -9.6155e-02, -1.3792e-01,  1.7633e-01]],
       grad_fn=<AddmmBackward0>)

In [17]:
net = nn.Sequential(
    nn.Linear(4, 8),
    nn.ReLU(),
    nn.Linear(8, 1)
)
X = torch.randn(2, 4)
print(net(X))

tensor([[ 0.1399],
        [-0.1189]], grad_fn=<AddmmBackward0>)


In [18]:
print(net[2].state_dict())

OrderedDict([('weight', tensor([[ 0.2076, -0.2971, -0.1239, -0.3278,  0.1315,  0.0702,  0.1360,  0.3372]])), ('bias', tensor([-0.1265]))])


In [21]:
print(type(net[2].bias))
print(net[2].bias)
print(net[2].bias.data)
net[2].weight.grad == None

<class 'torch.nn.parameter.Parameter'>
Parameter containing:
tensor([-0.1265], requires_grad=True)
tensor([-0.1265])


True

In [22]:
print(*[(name, param.shape) for name, param in net[0].named_parameters()])
print(*[(name, param.shape) for name, param in net.named_parameters()])

('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))
('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))


In [23]:
net.state_dict()['2.bias'].data

tensor([-0.1265])

In [24]:
def block1():
    return nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 4), nn.ReLU())
def block2():
    net = nn.Sequential()
    for i in range(4):
        net.add_module(f'block {i}', block1())
    return net

rgnet = nn.Sequential(block2(), nn.Linear(4, 1))
print(rgnet(X))

tensor([[-0.3294],
        [-0.3294]], grad_fn=<AddmmBackward0>)


In [25]:
print(rgnet)

Sequential(
  (0): Sequential(
    (block 0): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 1): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 2): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 3): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
  )
  (1): Linear(in_features=4, out_features=1, bias=True)
)


In [26]:
rgnet[0][1][0].bias.data

tensor([ 0.4189, -0.3938, -0.3104,  0.0105, -0.1262,  0.2937,  0.4695,  0.2360])

In [27]:
def init_normal(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, std=0.01)
        nn.init.zeros_(m.bias)
net.apply(init_normal)
print(net[0].weight.data[0], net[0].bias.data)

tensor([ 0.0028, -0.0039, -0.0047,  0.0181]) tensor([0., 0., 0., 0., 0., 0., 0., 0.])


In [29]:
def initialize_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.normal_(m.weight, mean=0, std=0.01)
        nn.init.zeros_(m.bias)
net.apply(initialize_weights)
print(net[0].weight.data[0], net[0].bias.data)

tensor([ 0.0023,  0.0072,  0.0058, -0.0059]) tensor([0., 0., 0., 0., 0., 0., 0., 0.])


In [30]:
def init_xavier(m):
    if type(m) == nn.Linear:
        nn.init.xavier_uniform_(m.weight)
def init_42(m):
    if type(m) == nn.Linear:
        nn.init.constant_(m.weight,42)
net[0].apply(init_xavier)
net[2].apply(init_42)
print(net[0].weight.data[0])
print(net[2].weight.data)

tensor([ 0.2646, -0.3179, -0.0592,  0.5297])
tensor([[42., 42., 42., 42., 42., 42., 42., 42.]])


In [31]:
def my_init(m):
    if type(m) == nn.Linear:
        print("Init",*[(name,param.shape) for name,param in m.named_parameters()])
        nn.init.uniform_(m.weight,-10,10)
        m.weight.data *=m.weight.data.abs()>=5

net.apply(my_init)
net[0].weight[:2]

Init ('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))
Init ('weight', torch.Size([1, 8])) ('bias', torch.Size([1]))


tensor([[-5.3597, -0.0000,  5.7331,  0.0000],
        [ 0.0000, -5.7184,  0.0000, -8.0952]], grad_fn=<SliceBackward0>)

In [41]:
print(net[0].weight.data)
net[0].weight.data[:] +=1
print(net[0].weight.data)
net

tensor([[ 2.6403,  8.0000, 13.7331,  8.0000],
        [ 8.0000,  2.2816,  8.0000, -0.0952],
        [-1.1097,  8.0000, 14.1464,  8.0000],
        [ 8.0000,  2.1192, 15.5830,  8.0000],
        [ 8.0000,  8.0000,  8.0000,  8.0000],
        [-0.8041,  8.0000,  8.0000, 13.1649],
        [ 8.0000, 14.9392,  2.9329,  8.0000],
        [17.4399,  8.0000, 17.9835,  8.0000]])
tensor([[ 3.6403,  9.0000, 14.7331,  9.0000],
        [ 9.0000,  3.2816,  9.0000,  0.9048],
        [-0.1097,  9.0000, 15.1464,  9.0000],
        [ 9.0000,  3.1192, 16.5830,  9.0000],
        [ 9.0000,  9.0000,  9.0000,  9.0000],
        [ 0.1959,  9.0000,  9.0000, 14.1649],
        [ 9.0000, 15.9392,  3.9329,  9.0000],
        [18.4399,  9.0000, 18.9835,  9.0000]])


Sequential(
  (0): Linear(in_features=4, out_features=8, bias=True)
  (1): ReLU()
  (2): Linear(in_features=8, out_features=1, bias=True)
)

In [44]:
shared = nn.Linear(8,8)
net = nn.Sequential(nn.Linear(4,8),nn.ReLU(),shared,nn.ReLU(),shared,nn.ReLU(),nn.Linear(8,1))
net(X)
print(net[2].weight.data[0]==net[4].weight.data[0])
net[2].weight.data[0,0] = 100
print(net[2].weight.data[0]==net[4].weight.data[0])

tensor([True, True, True, True, True, True, True, True])
tensor([True, True, True, True, True, True, True, True])


In [45]:
class CenteredLayyer(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self,X):
        return X-X.mean()

In [46]:
layer = CenteredLayyer()
layer(torch.FloatTensor([1,2,3,4,5]))

tensor([-2., -1.,  0.,  1.,  2.])

In [47]:
net = nn.Sequential(
    nn.Linear(8,128),CenteredLayyer()
)
Y = net(torch.rand(4,8))
Y.mean()

tensor(6.5193e-09, grad_fn=<MeanBackward0>)

In [51]:
class MyLinear(nn.Module):
    def __init__(self,in_units,units):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(in_units,units))
        self.bias = nn.Parameter(torch.randn(units,))
    def forward(self,X):
        linear = torch.matmul(X,self.weight.data) + self.bias.data
        return F.relu(linear)

In [52]:
linear = MyLinear(5,3)
linear.weight

Parameter containing:
tensor([[ 0.9146,  1.3321, -1.7771],
        [ 1.0491,  0.0602,  1.2255],
        [-0.5873,  0.8492, -1.2646],
        [-0.2423, -0.6302, -1.2189],
        [-1.2080, -0.8825, -2.7891]], requires_grad=True)

In [53]:
linear(torch.rand(2,5))

tensor([[0.0000, 0.0000, 0.0996],
        [0.0000, 0.0108, 0.0000]])

In [54]:
net = nn.Sequential(
    MyLinear(64,8),MyLinear(8,1)
)
net(torch.rand(2,64))

tensor([[0.],
        [0.]])

In [57]:
x = torch.arange(4)
torch.save(x,'x-file')
x2 = torch.load('x-file')
x2

tensor([0, 1, 2, 3])

In [58]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20, 256)
        self.out = nn.Linear(256, 10)

    def forward(self, X):
        return self.out(F.relu(self.hidden(X)))

net = MLP()
X = torch.randn(size = (2,20))
Y = net(X)
torch.save(net.state_dict(),'mlp.params')

In [59]:
clone = MLP()
clone.load_state_dict(torch.load('mlp.params'))
clone.eval()

MLP(
  (hidden): Linear(in_features=20, out_features=256, bias=True)
  (out): Linear(in_features=256, out_features=10, bias=True)
)

In [60]:
Y_clone = clone(X)
Y_clone == Y

tensor([[True, True, True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True]])

In [61]:
def try_gpu(i=0):
    if torch.cuda.device_count() >= i:
        return torch.device(f'cuda:{i}')
    return torch.device('cpu')

In [63]:
try_gpu(),try_gpu(10)

(device(type='cuda', index=0), device(type='cpu'))