In [1]:
import torch 
torch.__version__
print(torch.cuda.is_available())

False


In [2]:
import torch
import torch.nn as nn 
from torch.nn import functional as F 
net = nn.Sequential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10))
X = torch.rand(2,20)
print(net(X))

tensor([[-0.0753,  0.1815, -0.1812, -0.0054,  0.2334, -0.0130, -0.0685,  0.0698,
          0.1198, -0.1576],
        [-0.1133,  0.0458, -0.1501,  0.0498,  0.2123, -0.0960, -0.0562,  0.0230,
          0.1341,  0.0570]], grad_fn=<AddmmBackward0>)


In [3]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20,256)
        self.out = nn.Linear(256,10)
    def forward(self,X):
        return self.out(F.relu(self.hidden(X)))

net = MLP()
print(net(X))

tensor([[ 0.2161, -0.1708,  0.0190,  0.0539,  0.0474, -0.1362, -0.1699,  0.1086,
         -0.0803,  0.2041],
        [ 0.2866, -0.1707, -0.0197,  0.1495,  0.1270, -0.1205, -0.2122,  0.0705,
         -0.1157,  0.0162]], grad_fn=<AddmmBackward0>)


In [4]:
class MySequential(nn.Module):
    def __init__(self, *args) -> None:
        super().__init__()
        for indexid,module in enumerate(args):
            self._modules[str(indexid)] = module
    def forward(self,X):
        for module in self._modules.values():
            X = module(X)
        return X
net = MySequential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10))
print(net(X))

tensor([[-0.1434, -0.0026, -0.1779, -0.2177, -0.0589,  0.1496,  0.0689, -0.1030,
          0.0693,  0.3950],
        [-0.1458,  0.0294, -0.0035, -0.0575, -0.0343,  0.0650,  0.0564, -0.1525,
          0.0467,  0.4029]], grad_fn=<AddmmBackward0>)


In [5]:
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.fixed_weight = torch.rand(20,20,requires_grad=False)
        self.line = nn.Linear(20,20)
    def forward(self,X):
        X = self.line(X)
        X = torch.mm(X,self.fixed_weight)+1
        X = self.line(X)
        while(X.abs().sum()>1):
            X = X/2
        return X.sum()

net = FixedHiddenMLP()
print(net(X))

tensor(-0.0336, grad_fn=<SumBackward0>)


In [6]:
class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(20,64),nn.ReLU(),nn.Linear(64,32))
        self.linear = nn.Linear(32,16)
    def forward(self,X):
        return self.linear(self.net(X))

net = nn.Sequential(NestMLP(),nn.Linear(16,20),FixedHiddenMLP())
net(X)

tensor(0.3501, grad_fn=<SumBackward0>)

## 参数管理

In [7]:
# test 
a = torch.tensor([[1,2,3],[1,2,3]])
b = torch.tensor([1,2,3])
print(torch.mm(a,b))

RuntimeError: mat2 must be a matrix

print(*[(name, param.shape) for name, param in net.named_parameters()]),这句代码中的*号是什么意思

在Python中，*符号可以用来解包一个序列或可迭代对象。在这个例子中，*号将一个元组列表解包成了一个由多个元组组成的参数列表，然后将其传递给print()函数。这样做的效果是将每个元组作为单独的参数传递给print()函数，从而使输出更加易读。

In [8]:
# class Block1(nn.Module):
#     def __init__(self):
#         super().__init__()
#         self.net = nn.Sequential(nn.Linear(4,8),nn.Linear(8,4))
#     def forward(self,X):
#         return self.net(X)

def block1():
    return nn.Sequential(nn.Linear(4,8),nn.Linear(8,4))

# class Block2(nn.Module):
#     def __init__(self):
#         super().__init__()
#         self.net = nn.Sequential()
#         for i in range(4):
#             self.net.add_module(f'block {i}',block1())
#     def forward(self,X):
#         return self.net(X)
def block2():
    net = nn.Sequential()
    for i in range(4):
        net.add_module(f'block {i}',block1())
    return net

wgnet = nn.Sequential(block2(),nn.Linear(4,1))
print(wgnet)


Sequential(
  (0): Sequential(
    (block 0): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): Linear(in_features=8, out_features=4, bias=True)
    )
    (block 1): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): Linear(in_features=8, out_features=4, bias=True)
    )
    (block 2): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): Linear(in_features=8, out_features=4, bias=True)
    )
    (block 3): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): Linear(in_features=8, out_features=4, bias=True)
    )
  )
  (1): Linear(in_features=4, out_features=1, bias=True)
)


In [9]:
X = torch.rand(2,4)
print(wgnet(X))

tensor([[-0.2439],
        [-0.2435]], grad_fn=<AddmmBackward0>)


In [10]:
wgnet[0][1][0].bias.data

tensor([-0.3983,  0.1915,  0.0086, -0.4750,  0.0407,  0.0755, -0.0124, -0.3499])

## 参数初始化

In [11]:
def parameter_init_normal(m):
    if(type(m)==nn.Linear):
        nn.init.normal_(m.weight,mean=0,std=0.01)
        nn.init.zeros_(m.bias)
def parameter_init_constant(m):
    if(type(m)==nn.Linear):
        nn.init.constant_(m.weight,1)
        nn.init.zeros_(m.bias)
net = nn.Sequential(nn.Linear(20,4),nn.ReLU(),nn.Linear(4,1))
net.apply(parameter_init_constant)
print(net[0].weight.data[0])
print(net[0].bias.data)
X = torch.rand(10,20)
print(net(X))



tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1.])
tensor([0., 0., 0., 0.])
tensor([[37.2525],
        [41.9075],
        [43.4474],
        [42.0509],
        [37.6657],
        [38.6354],
        [38.9725],
        [35.1170],
        [33.6378],
        [46.2599]], grad_fn=<AddmmBackward0>)


In [12]:
def xavier(m):
    if(type(m)==nn.Linear):
        nn.init.xavier_uniform_(m.weight)
def init_42(m):
    if(type(m)==nn.Linear):
        nn.init.constant_(m.weight,42)
net[0].apply(xavier)
net[2].apply(init_42)
print(net[0].weight.data[0])
print(net[2].weight.data)

tensor([-0.1959, -0.4714,  0.1382,  0.3327,  0.2914, -0.4969, -0.3284, -0.1302,
        -0.3640,  0.2145,  0.1068,  0.4011,  0.2599, -0.4051, -0.1244, -0.1086,
        -0.1508,  0.4982, -0.0287,  0.3347])
tensor([[42., 42., 42., 42.]])


## ⾃定义初始化

In [15]:
def my_init(m):
    if(type(m)==nn.Linear):
        print('now Initing:',*[(name,param.shape) for name,param in m.named_parameters()][0] )
        nn.init.uniform_(m.weight,-10,10)
        m.weight.data *= m.weight.data.abs() >=5
net[0].apply(my_init)
print(net[0].weight.data[0])

now Initing: weight torch.Size([4, 20])
tensor([-0.0000,  7.6048, -5.1895, -9.8662, -0.0000,  0.0000,  0.0000,  8.9224,
        -8.3338,  8.3607,  8.9415, -9.4858,  6.4705,  0.0000, -0.0000,  0.0000,
         8.0651, -0.0000, -9.2468,  0.0000])


In [16]:
# 直接设置参数
net[0].weight.data +=1 
net[0].weight.data[0,0] =42
print(net[0].weight.data[0])

tensor([42.0000,  8.6048, -4.1895, -8.8662,  1.0000,  1.0000,  1.0000,  9.9224,
        -7.3338,  9.3607,  9.9415, -8.4858,  7.4705,  1.0000,  1.0000,  1.0000,
         9.0651,  1.0000, -8.2468,  1.0000])


In [17]:
shared = nn.Linear(8,8)
net = nn.Sequential(nn.Linear(4,8),nn.ReLU(),shared,nn.ReLU(),shared,nn.ReLU(),nn.Linear(8,1))
X = torch.rand(10,4)
print(net(X))

tensor([[0.0863],
        [0.0911],
        [0.0949],
        [0.1009],
        [0.0925],
        [0.0901],
        [0.0872],
        [0.0937],
        [0.0884],
        [0.0840]], grad_fn=<AddmmBackward0>)


In [18]:
net[2].weight.data[0] == net[4].weight.data[0]

tensor([True, True, True, True, True, True, True, True])

In [19]:
net[2].weight.data[0] +=1
net[2].weight.data[0] == net[4].weight.data[0]

tensor([True, True, True, True, True, True, True, True])

## 不带参数的层 自定义层

In [21]:
class CenteredLayer(nn.Module):
    def __init__(self):
        super().__init__()
    def forward(self,X):
        return X - X.mean()
    
centered_layer = CenteredLayer()
centered_layer(torch.FloatTensor([1,2,3,4,5]))

tensor([-2., -1.,  0.,  1.,  2.])

In [23]:
net = nn.Sequential(nn.Linear(8,128),CenteredLayer())
X = torch.rand(4,8)
net(X).mean()

tensor(-2.0955e-09, grad_fn=<MeanBackward0>)

In [28]:
## 带参数的层
class MyLinear(nn.Module):
    def __init__(self,input_n,output_n):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(input_n,output_n))
        self.bias = nn.Parameter(torch.zeros(output_n))
    def forward(self,X):
        linear = torch.mm(X,self.weight.data)+self.bias.data
        return F.relu(linear)

linear = MyLinear(12,4)
print(linear.weight)


Parameter containing:
tensor([[ 0.0334, -0.9891, -0.7508,  1.0393],
        [ 0.9010,  0.6360, -0.8480, -0.1904],
        [ 1.0690, -0.8615,  0.0803, -1.0801],
        [ 0.2910, -1.4121,  1.4311, -0.3681],
        [ 0.7326, -0.1772, -0.0719,  0.0794],
        [-0.3534, -0.0235, -2.6597,  0.1482],
        [ 0.8656, -0.5322,  1.5258, -0.2301],
        [-1.5393,  0.4759, -0.1086,  0.6218],
        [ 0.6162, -0.7912,  0.9437, -0.0983],
        [-0.7819, -0.6871,  0.2163, -1.2243],
        [ 0.7624,  1.7866,  0.4706,  0.3238],
        [-0.1634, -0.1359, -0.1154,  0.7761]], requires_grad=True)


In [29]:
linear(torch.rand(3,12))

tensor([[0.1164, 0.0000, 0.0000, 0.6667],
        [1.7693, 0.0000, 0.5816, 0.0000],
        [0.9837, 0.0000, 0.0000, 0.8493]])

In [30]:
net = nn.Sequential(MyLinear(8,19),MyLinear(19,1))
net(torch.rand(3,8))

tensor([[0.4602],
        [0.0000],
        [0.0000]])

# 读写文件

In [31]:
import torch 
import torch.nn as nn
import torch.nn.functional as F 

a = torch.tensor([1,2,3])
torch.save(a,'a-file')

In [32]:
a2 = torch.load('a-file')

In [33]:
a2

tensor([1, 2, 3])

In [42]:
b = torch.tensor([2,3,4])
c = (a,b)
cc =[a,b]
ccc = {'a':a,'b':b}
torch.save(c,'c-file')
torch.save(cc,'cc-file')
torch.save(ccc,'ccc-file')

In [43]:
c2 = torch.load('c-file')
c3 = torch.load('cc-file')
c4 = torch.load('ccc-file')
print(c2)
print(c3)
print(c4)

(tensor([1, 2, 3]), tensor([2, 3, 4]))
[tensor([1, 2, 3]), tensor([2, 3, 4])]
{'a': tensor([1, 2, 3]), 'b': tensor([2, 3, 4])}


In [47]:
# 存储整个模型的参数
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(10,20)
        self.output = nn.Linear(20,10)
    def forward(self,X):
        return self.output(F.relu(self.hidden(X)))
net = MLP()
X = torch.rand(2,10)
print(net(X))
torch.save(net.state_dict(),'net_params')

tensor([[-6.5696e-02, -3.1831e-01, -2.2090e-01,  2.7626e-01,  9.6878e-02,
          1.5350e-01, -1.9059e-01,  4.5661e-01, -3.0889e-02,  1.1137e-01],
        [ 6.7214e-05, -3.8045e-01, -1.4539e-01,  2.6692e-01,  1.4782e-01,
          4.3826e-01, -2.1198e-02,  2.7450e-01, -8.1349e-02,  3.5255e-02]],
       grad_fn=<AddmmBackward0>)


In [48]:
net2 = MLP()
net2.load_state_dict(torch.load('net_params'))
net2.eval()
net2(X)

tensor([[-6.5696e-02, -3.1831e-01, -2.2090e-01,  2.7626e-01,  9.6878e-02,
          1.5350e-01, -1.9059e-01,  4.5661e-01, -3.0889e-02,  1.1137e-01],
        [ 6.7214e-05, -3.8045e-01, -1.4539e-01,  2.6692e-01,  1.4782e-01,
          4.3826e-01, -2.1198e-02,  2.7450e-01, -8.1349e-02,  3.5255e-02]],
       grad_fn=<AddmmBackward0>)

In [49]:
net2(X)==net(X)

tensor([[True, True, True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True]])

# GPU

In [50]:
torch.cuda.device_count()

0

In [51]:
def try_GPU(i=0):
    if(torch.cuda.device_count()>=i+1):
        return torch.device(f'cuda:{i}')
    return torch.device('cpu')
def try_all_GPU():
    devices = [torch.device(f'cuda:{i}') for i in range(torch.cuda.device_count())]
    return devices if devices else [torch.device('cpu')]
try_GPU(),try_GPU(10),try_all_GPU()

(device(type='cpu'), device(type='cpu'), [device(type='cpu')])

In [53]:
x = torch.tensor([1,2,3])
x.device

device(type='cpu')

In [55]:
a = torch.tensor([1,2,3],device=try_GPU())
a

tensor([1, 2, 3])

In [56]:
b = torch.tensor([1,2,3],device=try_GPU(1))
b

tensor([1, 2, 3])

In [57]:
z = a.cuda(1)
b+z

AssertionError: Torch not compiled with CUDA enabled

In [59]:
net = nn.Sequential(nn.Linear(3,1))
net.to(try_GPU())

Sequential(
  (0): Linear(in_features=3, out_features=1, bias=True)
)

In [60]:
print(net)

Sequential(
  (0): Linear(in_features=3, out_features=1, bias=True)
)
