<a href="https://colab.research.google.com/github/Voyageran/StartNN/blob/main/Pytorch_NN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import sys
from google.colab import drive
drive.mount('/content/gdrive')
sys.path.insert(0,"/content/content/notebooks/colabInstallPackage")

!cp -av '/content/gdrive/MyDrive/Colab Notebooks/d2l' '/content/'

Mounted at /content/gdrive
'/content/gdrive/MyDrive/Colab Notebooks/d2l' -> '/content/d2l'
'/content/gdrive/MyDrive/Colab Notebooks/d2l/paddle.py' -> '/content/d2l/paddle.py'
'/content/gdrive/MyDrive/Colab Notebooks/d2l/mxnet.py' -> '/content/d2l/mxnet.py'
'/content/gdrive/MyDrive/Colab Notebooks/d2l/tensorflow.py' -> '/content/d2l/tensorflow.py'
'/content/gdrive/MyDrive/Colab Notebooks/d2l/torch.py' -> '/content/d2l/torch.py'
'/content/gdrive/MyDrive/Colab Notebooks/d2l/__init__.py' -> '/content/d2l/__init__.py'
'/content/gdrive/MyDrive/Colab Notebooks/d2l/__pycache__' -> '/content/d2l/__pycache__'
'/content/gdrive/MyDrive/Colab Notebooks/d2l/__pycache__/mxnet.cpython-310.pyc' -> '/content/d2l/__pycache__/mxnet.cpython-310.pyc'
'/content/gdrive/MyDrive/Colab Notebooks/d2l/__pycache__/__init__.cpython-310.pyc' -> '/content/d2l/__pycache__/__init__.cpython-310.pyc'
'/content/gdrive/MyDrive/Colab Notebooks/d2l/__pycache__/torch.cpython-310.pyc' -> '/content/d2l/__pycache__/torch.cpython-

# **Layers and Modules**


## Model Framework

In [3]:
import torch
from torch import nn
from torch.nn import functional as F #

# Simple single NN
net = nn.Sequential(nn.Linear(20,256), nn.ReLU(), nn.Linear(256, 10))

X = torch.rand(2,20)
net(X).shape #outputs

torch.Size([2, 10])

**自定义块**

任何一个层和一个神经网络都是Module的一个subclass。

Define a class MLP which is the subclass of `nn.Module`

****Remark:***

model == nn.Linear(2, 1)

input = torch.Tensor([1, 2])
output = model(input)

print(output)

In [31]:
class MLP(nn.Module):
  def __init__(self):
    super().__init__() # Review inherent
    self.hidden = nn.Linear(20,256) #model==nn.Linear(20,256)
    self.out = nn.Linear(256, 10)

  def forward(self, X):
    return self.out(F.relu(self.hidden(X)))

In [32]:
#instantiate
net = MLP()
net(X)  # == net.forward(X)

tensor([[ 0.2917,  0.0283, -0.3037,  0.1345, -0.2363, -0.0971,  0.2100,  0.1017,
          0.1113, -0.3179],
        [ 0.2103,  0.0192, -0.2455,  0.1382, -0.1039,  0.0098, -0.0248,  0.1356,
          0.0627, -0.1701]], grad_fn=<AddmmBackward0>)

In [35]:
X.shape, net(X).shape, net.forward(X).shape

(torch.Size([2, 20]), torch.Size([2, 10]), torch.Size([2, 10]))

In [48]:
# 试一下nnSequential
class MySequential(nn.Module):
  def __init__(self, *args):
    super().__init__()
    for block in args: #add layers, as keys
      self._modules[str(block)] = block #ordered dict, insert by order

  def forward(self, X):
    for block in self._modules.values(): #values返回的list跟插入进去返回的顺序一样
      X = block(X) #values里面的keys
    return X

In [50]:
net = MySequential(nn.Linear(20,256), nn.ReLU(), nn.Linear(256, 10))

In [52]:
print(net.forward(X))

tensor([[-0.0211,  0.1789,  0.0505,  0.2012,  0.0369,  0.1904,  0.0514, -0.0115,
          0.1747, -0.0095],
        [-0.0809,  0.0909,  0.1927,  0.0850, -0.2080,  0.1496,  0.0335, -0.0416,
          0.2090, -0.0328]], grad_fn=<AddmmBackward0>)


In [42]:
net = MySequential(nn.Linear(20,256), nn.ReLU(), nn.Linear(256, 10))
net(X)

tensor([[ 0.0420, -0.0817,  0.0341, -0.0889,  0.1902, -0.1443,  0.0811, -0.0778,
          0.0302,  0.2398],
        [-0.0157, -0.1873,  0.0423,  0.0459,  0.0532,  0.0789,  0.0298, -0.1673,
         -0.1093,  0.1977]], grad_fn=<AddmmBackward0>)

In [4]:
# 展示一下我们可以在init和forward函数中可以多么爽地自定义（瞎搞）
class FixedHiddenMLP(nn.Module):
  def __init__(self): # review: self is a instance,可以直接用类调用，也可以创建实例再调用
    super().__init__()
    self.rand_weight = torch.rand((20,20), requires_grad = False) #不需要weight的梯度
    self.linear = nn.Linear(20,20)

  def forward(self, X):
    X = self.linear(X)
    X = F.relu(torch.mm(X, self.rand_weight) + 1)
    X = self.linear(X)
    while X.abs().sum()>1:
      return X.sum()

In [6]:
net = FixedHiddenMLP()
net(X)

tensor(-0.5235, grad_fn=<SumBackward0>)

In [7]:
#嵌套玩法
class NestMLP(nn.Module):
  def __init__(self):
    super().__init__()
    # sequential也是Module的一个subclass
    self.net = nn.Sequential(nn.Linear(20,64), nn.ReLU(),
                             nn.Linear(64,32), nn.ReLU())
    self.linear = nn.Linear(32,16)

  def forward(self, X):
    return self.linear(self.net(X))

In [9]:
chimera = nn.Sequential(NestMLP(), nn.Linear(16,20), FixedHiddenMLP())
chimera(X)

tensor(-3.2638, grad_fn=<SumBackward0>)

## **Visiting Parameters**

In [10]:
import torch
from torch import nn
from torch.nn import functional as F #

# Simple single NN
net = nn.Sequential(nn.Linear(4,8), nn.ReLU(), nn.Linear(8, 1))

X = torch.rand(size = (2,4))
net(X)

tensor([[-0.2717],
        [-0.2763]], grad_fn=<AddmmBackward0>)

In [12]:
# test nn.Linear(8, 1)
print(net[2].state_dict()) #_module, list

OrderedDict([('weight', tensor([[-0.1128, -0.3085, -0.1111,  0.0242,  0.1859, -0.1935, -0.0376, -0.2188]])), ('bias', tensor([-0.0815]))])


In [13]:
# Directly vist
print(type(net[2].bias))
print(net[2].bias)
print(net[2].bias.data) #data是本身，用data是因为还可以访问梯度

<class 'torch.nn.parameter.Parameter'>
Parameter containing:
tensor([-0.0815], requires_grad=True)
tensor([-0.0815])


In [14]:
net[2].weight.grad == None

True

**Visiting total parameters**

In [15]:
print(*[(name, param.shape) for name, param in net[0].named_parameters()])
print(*[(name, param.shape) for name, param in net.named_parameters()])   #ReLU:no parameters

('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))
('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))


***Remark:**

my_function(*[1, 2, 3]) == my_function(1, 2, 3)

In [16]:
net.state_dict()['2.bias'].data #从list里面拿最后一层的bias的data

tensor([-0.0815])

In [17]:
#从嵌套块收集params
def block1():
    return nn.Sequential(nn.Linear(4, 8), nn.ReLU(),
                         nn.Linear(8, 4), nn.ReLU())

# block2 嵌套4个block1
def block2():
  net = nn.Sequential()
  for i in range(4):
    net.add_module(f'block{i}', block1())
  return net

In [18]:
rgnet = nn.Sequential(block2(), nn.Linear(4,1))
rgnet(X)

tensor([[0.0024],
        [0.0024]], grad_fn=<AddmmBackward0>)

In [19]:
print(rgnet) #see the net

Sequential(
  (0): Sequential(
    (block0): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block1): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block2): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block3): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
  )
  (1): Linear(in_features=4, out_features=1, bias=True)
)


In [None]:
# init params
