### Get Layerwise Params
see more [here](https://discuss.pytorch.org/t/adaptive-learning-rate/320/22) and [here](https://stackoverflow.com/questions/51801648/how-to-apply-layer-wise-learning-rate-in-pytorch)

In [1]:
import torch.nn as nn
import torch.nn.functional as F
from torch import optim

class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5, 1)
        self.conv2 = nn.Conv2d(20, 50, 5, 1)
        self.fc1 = nn.Linear(4*4*50, 500)
        self.fc2 = nn.Linear(500, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 4*4*50)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)
    
    def name(self):
        return "LeNet"
model = LeNet()

In [4]:
for name, param in model.named_parameters():
    if param.requires_grad:
        print(name, ) #param.data

conv1.weight
conv1.bias
conv2.weight
conv2.bias
fc1.weight
fc1.bias
fc2.weight
fc2.bias


In [5]:
model, model.parameters

(LeNet(
   (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
   (conv2): Conv2d(20, 50, kernel_size=(5, 5), stride=(1, 1))
   (fc1): Linear(in_features=800, out_features=500, bias=True)
   (fc2): Linear(in_features=500, out_features=10, bias=True)
 ), <bound method Module.parameters of LeNet(
   (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
   (conv2): Conv2d(20, 50, kernel_size=(5, 5), stride=(1, 1))
   (fc1): Linear(in_features=800, out_features=500, bias=True)
   (fc2): Linear(in_features=500, out_features=10, bias=True)
 )>)

### Param group

In [18]:
lr = 0.003
param_groups = [
    {"params": model.conv1.parameters(), "lr": lr/4},
    {"params": model.conv2.parameters(), "lr": lr/3},
    {"params": model.fc1.parameters(), "lr": lr/2},
    {"params": model.fc2.parameters(), "lr": lr/1},
]
# model.parameters()
optimizer = optim.Adam(param_groups, lr=0.003, weight_decay=0.0) #weight_decay (L2 regularization) makes thing worse

In [19]:
for param_group in optimizer.param_groups:
    print(param_group['lr'])
# we can set learning rate here
#     param_group['lr'] = 0.1

0.00075
0.001
0.0015
0.003


### Children

In [2]:
for i in model.children():
    print(type(i))

<class 'torch.nn.modules.conv.Conv2d'>
<class 'torch.nn.modules.conv.Conv2d'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.linear.Linear'>
