In [22]:
import torch
from torch import nn
from torch.nn import functional as F

In [23]:
net = nn.Sequential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10))
x = torch.rand(100,20)
net(x).shape

torch.Size([100, 10])

In [24]:
class MLP(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20,256)
        self.out = nn.Linear(256,10)
        
    def forward(self,x):
        return self.out(F.relu(self.hidden(x)))

In [25]:
m = MLP()
m(x).shape

torch.Size([100, 10])

In [26]:
class FixedHiddenMLP(nn.Module):
    
    def __init__(self):
        super().__init__()
        
        self.rand_weight = torch.rand((20,20),requires_grad= False)
        self.linear = nn.Linear(20,20)
    
    def forward(self,x):
        
        X = self.linear(x)
        X = F.relu(torch.mm(self,rand_weight,X)+1)
        X= self.linear(X)

In [33]:
class MySequential(nn.Module):
    
    def __init__(self,*args):
        super().__init__()
        for inx,module in enumerate(args):
            self._modules[str(inx)] = module
            
    def forward(self,X):
        
        for block in self._modules.values():
            X = block(X)
        return X 
        

In [34]:
net = MySequential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10))
x = torch.rand(100,20)
net(x).shape

torch.Size([100, 10])

In [39]:
def init_normal(m):
    if type(m) == nn.Linear:
        nn.init.normal(m.weight,mean = 0,std =0.01)
        nn.init.zeros_(m.bias)
        
net.apply(init_normal)


  nn.init.normal(m.weight,mean = 0,std =0.01)


MySequential(
  (0): Linear(in_features=20, out_features=256, bias=True)
  (1): ReLU()
  (2): Linear(in_features=256, out_features=10, bias=True)
)

In [47]:
# 我们需要给共享层一个名称，以便可以引用它的参数
shared = nn.Linear(8, 8)
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(),
                    shared, nn.ReLU(),
                    nn.Linear(8,16),nn.ReLU(),
                    nn.Linear(16,8),nn.ReLU(),
                    shared, nn.ReLU(),
                    nn.Linear(8, 1))

X = torch.rand(10,4)
net(X)
# 检查参数是否相同
print(net[2].weight.data[0] == net[4].weight.data[0])
net[2].weight.data[0, 0] = 100
# 确保它们实际上是同一个对象，而不只是有相同的值
print(net[2].weight.data[0] == net[4].weight.data[0])

tensor([False, False, False, False, False, False, False, False])
tensor([False, False, False, False, False, False, False, False])


In [48]:
for i in net.parameters():
    print(i.shape)

torch.Size([8, 4])
torch.Size([8])
torch.Size([8, 8])
torch.Size([8])
torch.Size([16, 8])
torch.Size([16])
torch.Size([8, 16])
torch.Size([8])
torch.Size([1, 8])
torch.Size([1])


In [50]:
torch.cuda.device_count()

2

In [51]:
def try_gpu(i=0):  #@save
    """如果存在，则返回gpu(i)，否则返回cpu()"""
    if torch.cuda.device_count() >= i + 1:
        return torch.device(f'cuda:{i}')
    return torch.device('cpu')

In [52]:
X = torch.ones(2, 3, device=try_gpu())
Y = torch.rand(2, 3, device=try_gpu(1))

In [53]:
X+Y

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cuda:1!