In [1]:
import torch
import torch.nn as nn
import torchvision.models as models

# 凍結 pre-trained model weight
可使用 pytorch 的 torchvision.models 中所提供的模型權重，也可以使用自己訓練或下載的模型權重檔。

## 使用 pytorch 提供的 pre-trained model weight

In [2]:
model_1 = models.resnet18(pretrained=True)

In [3]:
cnt = 0
for name, param in model_1.named_parameters():
    print("name: ", name)
    print("requires_grad: ", param.requires_grad)
    
    cnt += 1
    if cnt == 5:
        break

print("name: fc.weight")
print("requires_grad: ", model_1.fc.weight.requires_grad)

print("name: fc.bias")
print("requires_grad: ", model_1.fc.bias.requires_grad)

name:  conv1.weight
requires_grad:  True
name:  bn1.weight
requires_grad:  True
name:  bn1.bias
requires_grad:  True
name:  layer1.0.conv1.weight
requires_grad:  True
name:  layer1.0.bn1.weight
requires_grad:  True
name: fc.weight
requires_grad:  True
name: fc.bias
requires_grad:  True


### 將除了全連接層以外的網路層都凍結

In [4]:
for name, param in model_1.named_parameters():
    if name not in ['fc.weight', 'fc.bias']:
        param.requires_grad = False

In [5]:
cnt = 0
for name, param in model_1.named_parameters():
    print("name: ", name)
    print("requires_grad: ", param.requires_grad)
    
    cnt += 1
    if cnt == 5:
        break

print("name: fc.weight")
print("requires_grad: ", model_1.fc.weight.requires_grad)

print("name: fc.bias")
print("requires_grad: ", model_1.fc.bias.requires_grad)

name:  conv1.weight
requires_grad:  False
name:  bn1.weight
requires_grad:  False
name:  bn1.bias
requires_grad:  False
name:  layer1.0.conv1.weight
requires_grad:  False
name:  layer1.0.bn1.weight
requires_grad:  False
name: fc.weight
requires_grad:  True
name: fc.bias
requires_grad:  True


### 在優化器中加入 filter 進行過濾

filter() 用於過濾不符合條件的元素，其語法為 filter(function, iterable)，其中 function 為判斷條件、iterable 為元素列表，返回值為一個迭代器。進行過濾的過程是將 iterable 的值傳遞給 function 進行判斷，返回值為 True 的元素才會放入要回傳的迭代器中。

In [6]:
### filter example

def is_odd(n):
    return n % 2 == 1
 
newlist = filter(is_odd, [i for i in range(10)])

list(newlist)

[1, 3, 5, 7, 9]

In [7]:
# 進行過濾
parameters_1 = filter(lambda p: p.requires_grad, model_1.parameters())
optimizer_1 = torch.optim.Adam(parameters_1, lr=0.001, weight_decay=1e-5)

## 使用自己訓練或下載的權重檔

In [None]:
import gdown

resnet_model = 'https://download.pytorch.org/models/resnet18-5c106cde.pth'
gdown.download(resnet_model, "resnet-5c106cde.pth")

In [8]:
checkpoint = torch.load('resnet-5c106cde.pth')

In [9]:
cnt = 0
for k, v in checkpoint.items():
    print("name: ", k)
    print("requires_grad: ", v.requires_grad)
    
    cnt += 1
    if cnt == 5:
        break
        
print("name: fc.weight")
print("requires_grad: ", checkpoint['fc.weight'].requires_grad)

print("name: fc.bias")
print("requires_grad: ", checkpoint['fc.bias'].requires_grad)

name:  conv1.weight
requires_grad:  True
name:  bn1.running_mean
requires_grad:  False
name:  bn1.running_var
requires_grad:  False
name:  bn1.weight
requires_grad:  True
name:  bn1.bias
requires_grad:  True
name: fc.weight
requires_grad:  True
name: fc.bias
requires_grad:  True


### 將除了全連接層以外的網路層都凍結

In [10]:
for k, v in checkpoint.items():
    if k not in ['fc.weight', 'fc.bias']:
        v.requires_grad = False

In [11]:
cnt = 0
for k, v in checkpoint.items():
    print("name: ", k)
    print("requires_grad: ", v.requires_grad)
    
    cnt += 1
    if cnt == 5:
        break
        
print("name: fc.weight")
print("requires_grad: ", checkpoint['fc.weight'].requires_grad)

print("name: fc.bias")
print("requires_grad: ", checkpoint['fc.bias'].requires_grad)

name:  conv1.weight
requires_grad:  False
name:  bn1.running_mean
requires_grad:  False
name:  bn1.running_var
requires_grad:  False
name:  bn1.weight
requires_grad:  False
name:  bn1.bias
requires_grad:  False
name: fc.weight
requires_grad:  True
name: fc.bias
requires_grad:  True


### 在優化器中加入 filter 進行過濾

In [12]:
model_2 = models.resnet18()
model_state = model_2.state_dict()

pretrained_dict = {k: v for k, v in checkpoint.items() if k in model_state}
model_state.update(pretrained_dict)
model_2.load_state_dict(model_state)

<All keys matched successfully>

In [13]:
parameters_2 = filter(lambda p: p.requires_grad, model_2.parameters())
optimizer_2 = torch.optim.Adam(parameters_2, lr=0.001, weight_decay=1e-5)

### 將 50 層內的 layer 凍結

In [14]:
checkpoint = torch.load('resnet-5c106cde.pth')
for i, (k, v) in enumerate(checkpoint.items()):
    if i < 50:
        v.requires_grad = False

In [15]:
model_3 = models.resnet18()
model_state = model_3.state_dict()

pretrained_dict = {k: v for k, v in checkpoint.items() if k in model_state}
model_state.update(pretrained_dict)
model_3.load_state_dict(model_state)

<All keys matched successfully>

In [16]:
parameters_3 = filter(lambda p: p.requires_grad, model_3.parameters())
optimizer_3 = torch.optim.Adam(parameters_3, lr=0.001, weight_decay=1e-5)

# 解除凍結
### 使用 pytorch 提供的 pre-trained model weight

In [17]:
cnt = 0
for name, param in model_1.named_parameters():
    print("name: ", name)
    print("requires_grad: ", param.requires_grad)
    
    cnt += 1
    if cnt == 5:
        break

name:  conv1.weight
requires_grad:  False
name:  bn1.weight
requires_grad:  False
name:  bn1.bias
requires_grad:  False
name:  layer1.0.conv1.weight
requires_grad:  False
name:  layer1.0.bn1.weight
requires_grad:  False


In [18]:
for name, param in model_1.named_parameters():
    if param.requires_grad == False:
        param.requires_grad = True
        optimizer_1.add_param_group({'params': param})

In [19]:
cnt = 0
for name, param in model_1.named_parameters():
    print("name: ", name)
    print("requires_grad: ", param.requires_grad)
    
    cnt += 1
    if cnt == 5:
        break

name:  conv1.weight
requires_grad:  True
name:  bn1.weight
requires_grad:  True
name:  bn1.bias
requires_grad:  True
name:  layer1.0.conv1.weight
requires_grad:  True
name:  layer1.0.bn1.weight
requires_grad:  True


### 使用自己訓練或下載的權重檔


In [20]:
cnt = 0
for name, param in model_2.named_parameters():
    print("name: ", name)
    print("requires_grad: ", param.requires_grad)
    
    cnt += 1
    if cnt == 5:
        break

name:  conv1.weight
requires_grad:  True
name:  bn1.weight
requires_grad:  True
name:  bn1.bias
requires_grad:  True
name:  layer1.0.conv1.weight
requires_grad:  True
name:  layer1.0.bn1.weight
requires_grad:  True


In [21]:
for name, param in model_2.named_parameters():
    if param.requires_grad == False:
        param.requires_grad = True
        optimizer_2.add_param_group({'params': param})

In [22]:
cnt = 0
for name, param in model_2.named_parameters():
    print("name: ", name)
    print("requires_grad: ", param.requires_grad)
    
    cnt += 1
    if cnt == 5:
        break

name:  conv1.weight
requires_grad:  True
name:  bn1.weight
requires_grad:  True
name:  bn1.bias
requires_grad:  True
name:  layer1.0.conv1.weight
requires_grad:  True
name:  layer1.0.bn1.weight
requires_grad:  True


# 設置不同 learning rate

針對不同層的 layer 設定各自的 learning rate，有三種方法: 直接設定參數名稱、設定某一層的全部參數、將所有 layer 拆分成幾部分後再設定

In [None]:
# 查看 layer 名稱
# for name, param in model_1.named_parameters():
#     print(name)

### 直接設定參數名稱

In [25]:
optimizer_4 = torch.optim.Adam([
    {'params': model_1.fc.weight, 'lr': 1e-5},
    {'params': model_1.fc.bias, 'lr': 1e-3}],
    lr=0.001) # 默認 learning rate 值

### 設定某一層的全部參數

In [26]:
optimizer_5 = torch.optim.Adam([
    {'params': model_1.layer1.parameters(), 'lr': 1e-5},
    {'params': model_1.fc.parameters(), 'lr': 1e-3}],
    lr=0.001) # 默認 learning rate 值

### 將所有layer拆分成幾部分後再設定

In [27]:
fc_layer = list(map(id, model_1.fc.parameters()))
other_layer = filter(lambda p: id(p) not in fc_layer, model_1.parameters())

In [28]:
optimizer_6 = torch.optim.Adam([
    {'params': other_layer, 'lr': 1e-5},
    {'params': model_1.fc.parameters(), 'lr': 1e-3}],
    lr=0.001) # 默認 learning rate 值