# Different weight regularizations in a sandbox

In [1]:
from models.simple import SimpleCNN
import numpy as np
import torch
import matplotlib.pyplot as plt
from torch_ext import WeightRegularization
from torch_optimizer import SGDW
from optimizer.sgd import SGD
from optimizer.sgdw import SGDW as mySGDW

In [2]:
def reset():
    _imgs = torch.ones(size=(1,1,1,1))
    _true = torch.ones(_imgs.shape[0], dtype=torch.long)
    model = SimpleCNN(img_size=_imgs.shape[-2:], in_channels=_imgs.shape[1], num_classes=2, num_layers=1, num_channels=1, kernel_size=1)
    model.fc.bias = torch.nn.Parameter(torch.zeros((1)))
    model.fc.weight = torch.nn.Parameter(torch.full(size=model.fc.weight.shape, fill_value=-3/np.prod(model.fc.weight.shape), requires_grad=True))
    model.conv1.bias = torch.nn.Parameter(torch.zeros((model.num_channels)))
    model.conv1.weight = torch.nn.Parameter(torch.full(size=model.conv1.weight.shape, fill_value=1.0/np.prod(model.conv1.weight.shape), requires_grad=True))
    return model, _imgs, _true

In [3]:
loss = torch.nn.CrossEntropyLoss()
model, _imgs, _true = reset()

print(model.conv1.weight)
print(model.fc.weight)

Parameter containing:
tensor([[[[1.]]]], requires_grad=True)
Parameter containing:
tensor([[-1.5000],
        [-1.5000]], requires_grad=True)


## L2
### Weight decay in pytorch SGD

In [15]:
model, _imgs, _true = reset()
out = model(_imgs)
l = loss(out, _true)*0
l.backward()
print(model.conv1.weight.grad)
print(model.fc.weight.grad)
optimizer = SGD(model.parameters(), lr=0.1, weight_decay=0.1)
optimizer.step()
print(model.conv1.weight)
print(model.fc.weight)

tensor([[[[0.]]]])
tensor([[0.],
        [0.]])
Parameter containing:
tensor([[[[0.9900]]]], requires_grad=True)
Parameter containing:
tensor([[-1.4850],
        [-1.4850]], requires_grad=True)


### Decoupled Weight decay / L2 norm

In [14]:
model, _imgs, _true = reset()
out = model(_imgs)
l = loss(out, _true)*0
l.backward()
print(model.conv1.weight.grad)
print(model.fc.weight.grad)
optimizer = mySGDW(model.parameters(), lr=0.1, weight_decay=0.1)
optimizer.step()
print(model.conv1.weight)
print(model.fc.weight)

tensor([[[[0.]]]])
tensor([[0.],
        [0.]])
Parameter containing:
tensor([[[[0.9000]]]], requires_grad=True)
Parameter containing:
tensor([[-1.3500],
        [-1.3500]], requires_grad=True)


### External weight regularization

In [6]:
model, _imgs, _true = reset()
regularizer = WeightRegularization(model, weight_decay=0.1)
out = model(_imgs)
l = loss(out, _true)*0 + regularizer.penalty()
print(l)
l.backward()
print(model.conv1.weight.grad)
print(model.fc.weight.grad)
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
optimizer.step()
print(model.conv1.weight)
print(model.fc.weight)

tensor(0.5500, grad_fn=<AddBackward0>)
tensor([[[[0.2000]]]])
tensor([[-0.3000],
        [-0.3000]])
Parameter containing:
tensor([[[[0.9800]]]], requires_grad=True)
Parameter containing:
tensor([[-1.4700],
        [-1.4700]], requires_grad=True)


## L1
### "SGDW" -> implementation is effectively L1

In [7]:
model, _imgs, _true = reset()
out = model(_imgs)
l = loss(out, _true) * 0
l.backward()
print(model.conv1.weight.grad)
print(model.fc.weight.grad)
optimizer = SGDW(model.parameters(), lr=0.1, weight_decay=0.1)
optimizer.step()
print(model.conv1.weight)
print(model.fc.weight)

tensor([[[[0.]]]])
tensor([[0.],
        [0.]])
Parameter containing:
tensor([[[[0.9000]]]], requires_grad=True)
Parameter containing:
tensor([[-1.3500],
        [-1.3500]], requires_grad=True)


### proper L1

In [8]:
model, _imgs, _true = reset()
out = model(_imgs)
l = loss(out, _true) * 0
l.backward()
print(model.conv1.weight.grad)
print(model.fc.weight.grad)
optimizer = mySGDW(model.parameters(), lr=0.1, weight_decay=0.1, weight_norm=1)
optimizer.step()
print(model.conv1.weight)
print(model.fc.weight)

tensor([[[[0.]]]])
tensor([[0.],
        [0.]])
Parameter containing:
tensor([[[[0.9000]]]], requires_grad=True)
Parameter containing:
tensor([[-1.4000],
        [-1.4000]], requires_grad=True)


### external l1

In [13]:
model, _imgs, _true = reset()
regularizer = WeightRegularization(model, weight_decay=0.1, p = 1)
out = model(_imgs)
l = loss(out, _true)*0 + regularizer.penalty()
print(l)
l.backward()
print(model.conv1.weight.grad)
print(model.fc.weight.grad)
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
optimizer.step()
print(model.conv1.weight)
print(model.fc.weight)

tensor(0.4000, grad_fn=<AddBackward0>)
tensor([[[[0.1000]]]])
tensor([[-0.1000],
        [-0.1000]])
Parameter containing:
tensor([[[[0.9900]]]], requires_grad=True)
Parameter containing:
tensor([[-1.4900],
        [-1.4900]], requires_grad=True)
