In [1]:
#（1） 使用numpy 初始化
import numpy as np
import torch
from torch import nn


In [4]:
# 定义一个Sequential 模型
net1=nn.Sequential(
    nn.Linear(30,40),
    nn.ReLU(),
    nn.Linear(40,50),
    nn.ReLU(),
    nn.Linear(50,10)

)

In [5]:
# 访问第一层的参数
w1=net1[0].weight
b1=net1[0].bias
print(w1)

Parameter containing:
tensor([[-0.1759, -0.0534, -0.0315,  ..., -0.0820, -0.1613,  0.0855],
        [-0.1420,  0.1403, -0.0070,  ...,  0.1055, -0.0490, -0.1554],
        [-0.1236,  0.1171,  0.1662,  ..., -0.0687,  0.0305, -0.1281],
        ...,
        [ 0.0151,  0.0241,  0.0617,  ...,  0.1767,  0.1216,  0.1684],
        [-0.0849,  0.0360, -0.0462,  ...,  0.0710, -0.1191,  0.0485],
        [ 0.0582,  0.0103,  0.1186,  ..., -0.1641, -0.0785, -0.1579]],
       requires_grad=True)


In [6]:
# 定义一个tensor 直接对其进行替换
net1[0].weight.data=torch.from_numpy(np.random.uniform(3,5,size=(40,30)))
print(net1[0].weight)

Parameter containing:
tensor([[3.3215, 4.5167, 4.6777,  ..., 4.7180, 3.9787, 3.2890],
        [4.6107, 4.6168, 4.9732,  ..., 4.4668, 3.6412, 4.3926],
        [3.9327, 4.0463, 3.7557,  ..., 4.6944, 3.4947, 4.1673],
        ...,
        [4.7221, 4.6619, 3.6206,  ..., 3.4100, 3.3430, 3.3230],
        [4.5434, 3.7546, 3.0671,  ..., 3.8950, 3.9671, 4.8067],
        [3.4885, 4.6526, 4.4762,  ..., 3.0110, 4.4440, 3.3162]],
       dtype=torch.float64, requires_grad=True)


In [7]:
# 模型中相同类型的层都需要初始化成相同的方式。这个时候需要高效的方法循环访问

for layer in net1:
    if isinstance(layer,nn.Linear):# 判断是否为线性层
        param_shape=layer.weight.shape
        layer.weight.data=torch.from_numpy(np.random.normal(0,0.5,size=param_shape))
        # 定义为均值为0，方差为0.5 的正太分布
        

In [8]:
#Xavier 的初始化方法，可以使得每一层的输出的方差尽可能相等


In [9]:
class sim_net(nn.Module):
    def __init__(self):
        super(sim_net,self).__init__()
        self.l1=nn.Sequential(
            nn.Linear(30,40),
            nn.ReLU()
        )
        
        self.l1[0].weight.data=torch.randn(40,30) # 直接对某一层初始化
        
        self.l2=nn.Sequential(
            nn.Linear(40,50),
            nn.ReLU()
        )
        
        self.l3=nn.Sequential(
            nn.Linear(50,10),
            nn.ReLU()
        )
    def forward(self,x):
        x=self.l1(x)
        x=self.l2(x)
        x=self.l3(x)
        return x
    

In [10]:
net2=sim_net()

In [12]:
# 访问children 
for i in net2.children():
    print(i)

Sequential(
  (0): Linear(in_features=30, out_features=40, bias=True)
  (1): ReLU()
)
Sequential(
  (0): Linear(in_features=40, out_features=50, bias=True)
  (1): ReLU()
)
Sequential(
  (0): Linear(in_features=50, out_features=10, bias=True)
  (1): ReLU()
)


In [13]:
for i in net2.modules():
    print(i)

sim_net(
  (l1): Sequential(
    (0): Linear(in_features=30, out_features=40, bias=True)
    (1): ReLU()
  )
  (l2): Sequential(
    (0): Linear(in_features=40, out_features=50, bias=True)
    (1): ReLU()
  )
  (l3): Sequential(
    (0): Linear(in_features=50, out_features=10, bias=True)
    (1): ReLU()
  )
)
Sequential(
  (0): Linear(in_features=30, out_features=40, bias=True)
  (1): ReLU()
)
Linear(in_features=30, out_features=40, bias=True)
ReLU()
Sequential(
  (0): Linear(in_features=40, out_features=50, bias=True)
  (1): ReLU()
)
Linear(in_features=40, out_features=50, bias=True)
ReLU()
Sequential(
  (0): Linear(in_features=50, out_features=10, bias=True)
  (1): ReLU()
)
Linear(in_features=50, out_features=10, bias=True)
ReLU()


In [14]:
# children 会访问到模型定义中的第一层，只会访问到三个Sequential，module 可以访问到Sequential，
#也会访问Sequential 的里面
for layer in net2.modules():
    if isinstance(layer,nn.Linear):
        param_shape=layer.weight.shape
        layer.weight.data=torch.from_numpy(np.random.normal(0,0.5,size=param_shape))
        

In [15]:
# pytorch 中的torch.nn.init
from torch.nn import init
print(net1[0].weight)

Parameter containing:
tensor([[ 0.0362,  0.2540,  0.5150,  ..., -0.3860,  0.2401,  0.6048],
        [-0.8705,  0.4459, -0.3552,  ..., -0.5662, -0.4738, -0.5342],
        [-0.4372,  0.1200, -0.4754,  ...,  0.4852, -0.9524, -0.1624],
        ...,
        [-0.3028,  0.5826,  0.7155,  ...,  0.0737,  0.2575, -0.7277],
        [ 0.0914, -0.4864,  0.2795,  ..., -0.1814, -0.5568, -0.0240],
        [ 0.5167, -0.0557,  0.2035,  ..., -0.4044,  0.0760,  0.4382]],
       dtype=torch.float64, requires_grad=True)


In [16]:
#xaiver 初始化方法
init.xavier_uniform(net1[0].weight)

  from ipykernel import kernelapp as app


Parameter containing:
tensor([[ 0.2917, -0.0791,  0.0431,  ...,  0.1649, -0.0862, -0.1139],
        [ 0.0570, -0.2820, -0.1322,  ...,  0.0162, -0.1478, -0.2078],
        [ 0.0915,  0.0035,  0.0166,  ..., -0.1243,  0.2276, -0.2067],
        ...,
        [-0.0474, -0.0815, -0.1936,  ...,  0.0916,  0.1454,  0.0861],
        [ 0.1655, -0.1860,  0.2559,  ..., -0.2685, -0.0705,  0.1172],
        [ 0.0189,  0.0022, -0.2839,  ..., -0.2922, -0.1037,  0.1908]],
       dtype=torch.float64, requires_grad=True)

In [17]:
print(net1[0].weight)

Parameter containing:
tensor([[ 0.2917, -0.0791,  0.0431,  ...,  0.1649, -0.0862, -0.1139],
        [ 0.0570, -0.2820, -0.1322,  ...,  0.0162, -0.1478, -0.2078],
        [ 0.0915,  0.0035,  0.0166,  ..., -0.1243,  0.2276, -0.2067],
        ...,
        [-0.0474, -0.0815, -0.1936,  ...,  0.0916,  0.1454,  0.0861],
        [ 0.1655, -0.1860,  0.2559,  ..., -0.2685, -0.0705,  0.1172],
        [ 0.0189,  0.0022, -0.2839,  ..., -0.2922, -0.1037,  0.1908]],
       dtype=torch.float64, requires_grad=True)
