In [2]:
import torch
import torch.nn as nn
import torchvision.models as models

# 權重初始化定義在模型內部

## torch.nn.Parameter

In [3]:
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.net = models.resnet18()
        print("weights: ", self.net.fc.weight[0][:10])
        print("bias: ", self.net.fc.bias[:10])
        print("=====================")
        
        self.net.fc.weight = torch.nn.Parameter(torch.ones(self.net.fc.weight.shape)*0.9, requires_grad=True)
        self.net.fc.bias = torch.nn.Parameter(torch.zeros(self.net.fc.bias.shape), requires_grad=True)
        print("weights: ", self.net.fc.weight[0][:10])
        print("bias: ", self.net.fc.bias[:10])
        
    def forward(self, x):
        output = self.net(x)
        return output

In [4]:
model = MyModel()

weights:  tensor([ 0.0426,  0.0268, -0.0006, -0.0057, -0.0088, -0.0295, -0.0015,  0.0246,
        -0.0041,  0.0095], grad_fn=<SliceBackward0>)
bias:  tensor([ 0.0128,  0.0003,  0.0330, -0.0188, -0.0301, -0.0290,  0.0102, -0.0316,
         0.0381, -0.0402], grad_fn=<SliceBackward0>)
weights:  tensor([0.9000, 0.9000, 0.9000, 0.9000, 0.9000, 0.9000, 0.9000, 0.9000, 0.9000,
        0.9000], grad_fn=<SliceBackward0>)
bias:  tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], grad_fn=<SliceBackward0>)


## data.normal_、data.zero_

In [5]:
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.net = models.resnet18()
        print("weights: ", self.net.fc.weight[0][:10])
        print("bias: ", self.net.fc.bias[:10])
        print("=====================")
        
        self.net.fc.weight.data.normal_(mean=0.0, std=1.0)
        self.net.fc.bias.data.zero_()
        
        print("weights: ", self.net.fc.weight[0][:10])
        print("bias: ", self.net.fc.bias[:10])
        
    def forward(self, x):
        output = self.net(x)
        return output

In [6]:
model = MyModel()

weights:  tensor([-0.0244, -0.0371, -0.0281,  0.0006,  0.0299, -0.0292, -0.0032, -0.0422,
         0.0313, -0.0164], grad_fn=<SliceBackward0>)
bias:  tensor([ 0.0148, -0.0264, -0.0145,  0.0291, -0.0015,  0.0304, -0.0170,  0.0353,
        -0.0319,  0.0003], grad_fn=<SliceBackward0>)
weights:  tensor([ 0.2429, -0.5703, -1.5922,  0.3605,  0.5135,  0.3904, -0.4094, -0.3470,
         0.2323,  0.1666], grad_fn=<SliceBackward0>)
bias:  tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], grad_fn=<SliceBackward0>)


## nn.init.normal_、nn.init.zeros_

In [7]:
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.net = models.resnet18()
        print("weights: ", self.net.fc.weight[0][:10])
        print("bias: ", self.net.fc.bias[:10])
        print("=====================")
        
        nn.init.normal_(self.net.fc.weight.data, mean=0.0, std=1.0)
        nn.init.zeros_(self.net.fc.bias.data)
        
        print("weights: ", self.net.fc.weight[0][:10])
        print("bias: ", self.net.fc.bias[:10])
        
    def forward(self, x):
        output = self.net(x)
        return output

In [8]:
model = MyModel()

weights:  tensor([ 0.0296,  0.0150,  0.0162,  0.0311,  0.0227,  0.0142,  0.0257,  0.0052,
         0.0267, -0.0273], grad_fn=<SliceBackward0>)
bias:  tensor([-0.0106, -0.0324,  0.0441, -0.0311, -0.0144, -0.0150,  0.0284,  0.0063,
         0.0424, -0.0204], grad_fn=<SliceBackward0>)
weights:  tensor([ 0.1584,  0.3998,  0.8412, -0.6938, -2.1568, -1.2821, -0.7416, -1.8385,
         1.6660, -0.3524], grad_fn=<SliceBackward0>)
bias:  tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], grad_fn=<SliceBackward0>)


## nn.init.constant_

In [9]:
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.net = models.resnet18()
        print("weights: ", self.net.fc.weight[0][:10])
        print("bias: ", self.net.fc.bias[:10])
        print("=====================")
        
        nn.init.constant_(self.net.fc.weight, 1)
        nn.init.constant_(self.net.fc.bias, 0)
        
        print("weights: ", self.net.fc.weight[0][:10])
        print("bias: ", self.net.fc.bias[:10])
        
    def forward(self, x):
        output = self.net(x)
        return output

In [10]:
model = MyModel()

weights:  tensor([ 0.0248,  0.0262,  0.0107,  0.0300, -0.0369,  0.0325,  0.0136, -0.0440,
         0.0023,  0.0258], grad_fn=<SliceBackward0>)
bias:  tensor([ 9.7215e-05, -4.1210e-03, -3.4068e-02,  2.0162e-02,  3.1296e-02,
        -3.7656e-03,  3.4689e-02, -2.4909e-02,  3.8025e-02,  1.4642e-02],
       grad_fn=<SliceBackward0>)
weights:  tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], grad_fn=<SliceBackward0>)
bias:  tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], grad_fn=<SliceBackward0>)


## apply
### nn.init 寫法

In [11]:
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.net = models.resnet18()
        print("weights: ", self.net.fc.weight[0][:10])
        print("bias: ", self.net.fc.bias[:10])
        print("=====================")
        
        self.apply(self.init_weights)
        print("weights: ", self.net.fc.weight[0][:10])
        print("bias: ", self.net.fc.bias[:10])
        
    def init_weights(self, module):
        if isinstance(module, nn.Linear):
            nn.init.normal_(module.weight.data, mean=0.0, std=1.0)
            
            if module.bias is not None:
                nn.init.zeros_(module.bias.data)
        
    def forward(self, x):
        output = self.net(x)
        return output

In [12]:
model = MyModel()

weights:  tensor([ 0.0124, -0.0300,  0.0275, -0.0354, -0.0005,  0.0229,  0.0324, -0.0056,
         0.0028, -0.0215], grad_fn=<SliceBackward0>)
bias:  tensor([-0.0225, -0.0149,  0.0258,  0.0410,  0.0232, -0.0307, -0.0196,  0.0391,
        -0.0055, -0.0414], grad_fn=<SliceBackward0>)
weights:  tensor([ 0.3663,  1.0643,  0.0746,  0.0075,  0.8304, -0.5193, -0.0839,  0.1247,
         0.7318, -1.8451], grad_fn=<SliceBackward0>)
bias:  tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], grad_fn=<SliceBackward0>)


### data.normal_、data.zero_ 寫法

In [13]:
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.net = models.resnet18()
        print("weights: ", self.net.fc.weight[0][:10])
        print("bias: ", self.net.fc.bias[:10])
        print("=====================")
        
        self.apply(self.init_weights)
        print("weights: ", self.net.fc.weight[0][:10])
        print("bias: ", self.net.fc.bias[:10])
        
    def init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=1.0)
            
            if module.bias is not None:
                module.bias.data.zero_()
        
    def forward(self, x):
        output = self.net(x)
        return output

In [14]:
model = MyModel()

weights:  tensor([-0.0099,  0.0295, -0.0219,  0.0023, -0.0422, -0.0371,  0.0238, -0.0077,
         0.0160,  0.0006], grad_fn=<SliceBackward0>)
bias:  tensor([-0.0103,  0.0023, -0.0279,  0.0155, -0.0325,  0.0441,  0.0082,  0.0163,
         0.0407, -0.0351], grad_fn=<SliceBackward0>)
weights:  tensor([ 0.8214, -1.7671,  0.5230, -0.1822, -0.0327,  0.8022,  0.2237,  0.1050,
         0.5963, -0.1409], grad_fn=<SliceBackward0>)
bias:  tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], grad_fn=<SliceBackward0>)


## 將多個不同 layer 進行初始化設定

In [15]:
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.net = models.resnet18()
        print("weights: ", self.net.conv1.weight[0][:10])
        print("=====================")
        
        self.apply(self.init_weights)
        print("weights: ", self.net.conv1.weight[0][:10])
        
    def init_weights(self, module):
        if isinstance(module, (nn.Linear, nn.Conv2d)):
            nn.init.normal_(module.weight.data, mean=0.0, std=1.0)
            
            if module.bias is not None:
                nn.init.zeros_(module.bias.data)
        
    def forward(self, x):
        output = self.net(x)
        return output

In [16]:
model = MyModel()

weights:  tensor([[[-0.0319, -0.0145, -0.0185,  0.0057,  0.0001,  0.0433, -0.0117],
         [ 0.0405, -0.0316, -0.0135,  0.0019,  0.0007,  0.0032,  0.0305],
         [ 0.0050, -0.0084, -0.0146,  0.0184, -0.0172, -0.0029,  0.0319],
         [ 0.0082,  0.0106,  0.0229,  0.0060,  0.0499, -0.0197,  0.0040],
         [ 0.0133,  0.0116, -0.0512,  0.0193, -0.0222, -0.0523,  0.0177],
         [-0.0311, -0.0272,  0.0080, -0.0590,  0.0138,  0.0333, -0.0300],
         [-0.0417, -0.0587,  0.0343,  0.0311,  0.0013,  0.0216,  0.0217]],

        [[ 0.0033,  0.0007,  0.0176, -0.0081, -0.0109, -0.0139,  0.0220],
         [-0.0155, -0.0324, -0.0097, -0.0308, -0.0033, -0.0112, -0.0142],
         [-0.0187,  0.0068, -0.0196, -0.0388, -0.0116,  0.0081, -0.0016],
         [ 0.0434, -0.0057, -0.0240, -0.0335,  0.0303, -0.0025, -0.0194],
         [ 0.0018,  0.0358, -0.0005,  0.0317, -0.0479,  0.0113,  0.0030],
         [ 0.0159, -0.0216, -0.0118, -0.0303,  0.0004, -0.0065,  0.0262],
         [-0.0066, -0.0389

# 權重初始化定義在模型外部

## 直接定義
### data.normal_、data.zero_

In [38]:
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.net = models.resnet18()
        
    def forward(self, x):
        output = self.net(x)
        return output

In [39]:
model = MyModel()

In [40]:
print("weights: ", model.net.fc.weight[0][:10])
print("bias: ", model.net.fc.bias[:10])
print("=====================")
model.net.fc.weight.data.normal_(mean=0.0, std=1.0)
model.net.fc.bias.data.zero_()
print("weights: ", model.net.fc.weight[0][:10])
print("bias: ", model.net.fc.bias[:10])

weights:  tensor([-0.0177, -0.0383,  0.0188,  0.0390,  0.0256, -0.0224, -0.0312,  0.0102,
         0.0342,  0.0323], grad_fn=<SliceBackward0>)
bias:  tensor([ 0.0340,  0.0321,  0.0181,  0.0400,  0.0119,  0.0009, -0.0185, -0.0041,
         0.0240,  0.0040], grad_fn=<SliceBackward0>)
weights:  tensor([-0.2647, -0.7330,  0.1211, -0.1159, -0.1106,  0.3854,  0.4741,  1.1223,
        -0.5903, -0.0706], grad_fn=<SliceBackward0>)
bias:  tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], grad_fn=<SliceBackward0>)


## apply
### torch.nn.Parameter

In [20]:
model = MyModel()

In [21]:
def init_weights(module):
    if isinstance(module, nn.Linear):
        module.weight = torch.nn.Parameter(torch.ones(module.weight.shape)*0.9, requires_grad=True)

        if module.bias is not None:
            module.bias = torch.nn.Parameter(torch.zeros(module.bias.shape), requires_grad=True)

In [22]:
print("weights: ", model.net.fc.weight[0][:10])
print("bias: ", model.net.fc.bias[:10])
print("=====================")

model.apply(init_weights)
print("weights: ", model.net.fc.weight[0][:10])
print("bias: ", model.net.fc.bias[:10])

weights:  tensor([ 0.0149,  0.0048,  0.0107, -0.0305, -0.0032, -0.0392, -0.0277,  0.0231,
         0.0438,  0.0220], grad_fn=<SliceBackward0>)
bias:  tensor([-0.0240,  0.0434,  0.0303, -0.0400,  0.0099,  0.0414, -0.0095, -0.0247,
         0.0179, -0.0266], grad_fn=<SliceBackward0>)
weights:  tensor([0.9000, 0.9000, 0.9000, 0.9000, 0.9000, 0.9000, 0.9000, 0.9000, 0.9000,
        0.9000], grad_fn=<SliceBackward0>)
bias:  tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], grad_fn=<SliceBackward0>)


### data.normal_、data.zero_

In [23]:
model = MyModel()

In [24]:
def init_weights(module):
    if isinstance(module, nn.Linear):
        module.weight.data.normal_(mean=0.0, std=1.0)
            
        if module.bias is not None:
            module.bias.data.zero_()

In [25]:
print("weights: ", model.net.fc.weight[0][:10])
print("bias: ", model.net.fc.bias[:10])
print("=====================")

model.apply(init_weights)
print("weights: ", model.net.fc.weight[0][:10])
print("bias: ", model.net.fc.bias[:10])

weights:  tensor([ 0.0011,  0.0332,  0.0402, -0.0004, -0.0139,  0.0054, -0.0275, -0.0018,
        -0.0239,  0.0356], grad_fn=<SliceBackward0>)
bias:  tensor([ 0.0101,  0.0019,  0.0201,  0.0248, -0.0377, -0.0390, -0.0401, -0.0009,
        -0.0267, -0.0100], grad_fn=<SliceBackward0>)
weights:  tensor([ 0.0938, -0.6255,  1.1889,  0.6034,  1.6854,  0.1906,  1.3271,  1.3844,
        -1.7708, -1.0778], grad_fn=<SliceBackward0>)
bias:  tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], grad_fn=<SliceBackward0>)


# 模型權重複製

## 複製一個模型權重至另一個模型

In [26]:
checkpoint = torch.load('resnet_weights.pth')

In [27]:
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.resnet = models.resnet18()
        self.resnet.fc = nn.Linear(512, 256)
        self.linear = nn.Linear(256, 10)
        
    def forward(self, x):
        x = self.net(x)
        output = self.linear(x)
        
        return output

In [28]:
model = MyModel()

In [29]:
print("weights: ", model.resnet.conv1.weight[0][0])

weights:  tensor([[-0.0006, -0.0150,  0.0463, -0.0079, -0.0152,  0.0115,  0.0249],
        [-0.0154,  0.0046,  0.0252,  0.0479,  0.0018, -0.0133, -0.0048],
        [ 0.0047,  0.0077,  0.0154,  0.0016, -0.0147, -0.0068, -0.0362],
        [ 0.0425, -0.0370,  0.0107,  0.0094, -0.0246,  0.0161, -0.0203],
        [ 0.0256, -0.0016, -0.0147,  0.0002,  0.0113,  0.0191,  0.0252],
        [ 0.0187, -0.0308,  0.0402,  0.0105, -0.0355, -0.0450,  0.0135],
        [ 0.0285,  0.0155, -0.0073, -0.0131, -0.0070,  0.0490,  0.0098]],
       grad_fn=<SelectBackward0>)


In [30]:
pretrained_dict = {k: v for k, v in checkpoint.items() if k not in ['resnet.fc.weight', 'resnet.fc.bias']}

In [31]:
pretrained_dict['resnet.conv1.weight'][0][0]

tensor([[-0.0322, -0.0509, -0.0117, -0.0062,  0.0003, -0.0347,  0.0073],
        [-0.0072, -0.0488, -0.0295, -0.0035, -0.0362, -0.0497, -0.0226],
        [ 0.0087,  0.0136,  0.0176,  0.0150, -0.0127,  0.0358,  0.0585],
        [-0.0243,  0.0452,  0.0083,  0.0163, -0.0355,  0.0162, -0.0159],
        [-0.0291,  0.0263,  0.0014,  0.0211, -0.0300,  0.0307,  0.0133],
        [ 0.0156, -0.0002,  0.0679,  0.0492, -0.0200, -0.0276,  0.0333],
        [-0.0059, -0.0139,  0.0266, -0.0367, -0.0117,  0.0113, -0.0111]])

In [32]:
model_state = model.state_dict()
model_state.update(pretrained_dict)
model.load_state_dict(model_state, strict=False)

<All keys matched successfully>

In [33]:
print("weights: ", model.resnet.conv1.weight[0][0])

weights:  tensor([[-0.0322, -0.0509, -0.0117, -0.0062,  0.0003, -0.0347,  0.0073],
        [-0.0072, -0.0488, -0.0295, -0.0035, -0.0362, -0.0497, -0.0226],
        [ 0.0087,  0.0136,  0.0176,  0.0150, -0.0127,  0.0358,  0.0585],
        [-0.0243,  0.0452,  0.0083,  0.0163, -0.0355,  0.0162, -0.0159],
        [-0.0291,  0.0263,  0.0014,  0.0211, -0.0300,  0.0307,  0.0133],
        [ 0.0156, -0.0002,  0.0679,  0.0492, -0.0200, -0.0276,  0.0333],
        [-0.0059, -0.0139,  0.0266, -0.0367, -0.0117,  0.0113, -0.0111]],
       grad_fn=<SelectBackward0>)


## 複製某些層權重至其他層

In [34]:
from collections import OrderedDict

class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.net1 = nn.Sequential(OrderedDict([
            ('conv1', nn.Conv2d(256, 128, 3)), 
            ('relu1', nn.ReLU()), 
            ('conv2', nn.Conv2d(128, 10, 3)),
            ('relu2', nn.ReLU()),
        ]))
        
        self.net2 = nn.Sequential(OrderedDict([
            ('conv1', nn.Conv2d(256, 128, 3)), 
            ('relu1', nn.ReLU()), 
            ('conv2', nn.Conv2d(128, 10, 3)),
            ('relu2', nn.ReLU()),
        ]))
                
    def forward(self, x):
        x1 = self.net1(x)
        x2 = self.net2(x)
        
        return x1, x2

In [35]:
model_1 = MyModel()

In [36]:
for name, param in model_1.named_parameters():
    print("name: ", name)

name:  net1.conv1.weight
name:  net1.conv1.bias
name:  net1.conv2.weight
name:  net1.conv2.bias
name:  net2.conv1.weight
name:  net2.conv1.bias
name:  net2.conv2.weight
name:  net2.conv2.bias


In [37]:
with torch.no_grad():
    for i in range(len(model_1.net1)):
        if isinstance(model_1.net1[i], nn.Conv2d):
            print("========= {} =========".format(i))
            print("org:")
            print("net1 weights:", model_1.net1[i].weight[0][0])
            print("net2 weights:", model_1.net2[i].weight[0][0])
            
            model_1.net2[i].weight.copy_(model_1.net1[i].weight)

            print("=====================")
            print("new:")
            print("net1 weights:", model_1.net1[i].weight[0][0])
            print("net2 weights:", model_1.net2[i].weight[0][0])

org:
net1 weights: tensor([[-0.0167,  0.0080, -0.0142],
        [-0.0078, -0.0056, -0.0023],
        [ 0.0091, -0.0146, -0.0120]], requires_grad=True)
net2 weights: tensor([[-0.0045,  0.0046,  0.0058],
        [-0.0091,  0.0138, -0.0193],
        [ 0.0091,  0.0119, -0.0138]], requires_grad=True)
new:
net1 weights: tensor([[-0.0167,  0.0080, -0.0142],
        [-0.0078, -0.0056, -0.0023],
        [ 0.0091, -0.0146, -0.0120]], requires_grad=True)
net2 weights: tensor([[-0.0167,  0.0080, -0.0142],
        [-0.0078, -0.0056, -0.0023],
        [ 0.0091, -0.0146, -0.0120]], requires_grad=True)
org:
net1 weights: tensor([[-0.0119,  0.0113, -0.0167],
        [-0.0195,  0.0121, -0.0249],
        [-0.0240,  0.0281, -0.0243]], requires_grad=True)
net2 weights: tensor([[ 0.0269, -0.0043,  0.0169],
        [ 0.0139,  0.0185, -0.0151],
        [ 0.0261, -0.0009, -0.0192]], requires_grad=True)
new:
net1 weights: tensor([[-0.0119,  0.0113, -0.0167],
        [-0.0195,  0.0121, -0.0249],
        [-0.0240