## Weight Regularization(정형화)

정형화는 제약조건을 추가로 걸어줌으로써 오버피팅을 최소화하는 방법이다.  
제약조건은 주로 손실함수에 추가되며, 대표적으로 L1손실을 조건에 추가하는 L1정형화와 L2손실을 조건에 추가하는 L2정형화가 있다.  
오버피팅된 회귀모델 변수는 주어진 값을 정확히 맞추기위해 파라미터값을 계속 조정하여 그 값이 매우 들쭉날쭉하다.  
따라서 변수들이 매우 비정상적으로 지정되어있는 것을 볼 수 있다.  
정형화의 정도(λ)를 높일수록 이 변수값들이 작아지므로 오버피팅을 막을 수 있다.

#### module

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import torchvision.datasets as dset
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


#### parameters

In [3]:
num_epoch = 10
batch_size = 256
learning_rate = 2e-4

#### data

[pytorch dataset download error solution](https://github.com/pytorch/vision/issues/1938)

In [4]:
from six.moves import urllib
opener = urllib.request.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
urllib.request.install_opener(opener)

In [5]:
# download
mnist_train = dset.MNIST("./", train=True, transform=transforms.ToTensor(), target_transform=None, download=True)
mnist_test = dset.MNIST("./", train=False, transform=transforms.ToTensor(), target_transform=None, download=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(HTML(value=''), FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0…

Extracting ./MNIST/raw/train-images-idx3-ubyte.gz to ./MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(HTML(value=''), FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0…

Extracting ./MNIST/raw/train-labels-idx1-ubyte.gz to ./MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./MNIST/raw/t10k-images-idx3-ubyte.gz



HBox(children=(HTML(value=''), FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0…

Extracting ./MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(HTML(value=''), FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0…

Extracting ./MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNIST/raw
Processing...
Done!


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [6]:
mnist_train.__getitem__(0)[0].size(), mnist_train.__len__()

(torch.Size([1, 28, 28]), 60000)

In [7]:
mnist_test.__getitem__(0)[0].size(), mnist_test.__len__()

(torch.Size([1, 28, 28]), 10000)

In [8]:
train_loader = DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=2, drop_last=True)
test_loader = DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=2, drop_last=True)

#### model

In [9]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.layer = nn.Sequential(
            nn.Conv2d(1, 16, 3, padding=1),
            nn.ReLU(),
            nn.Conv2d(16, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(32, 64, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        self.fc_layer = nn.Sequential(
            nn.Linear(64*7*7, 100),
            nn.ReLU(),
            nn.Linear(100, 10)
        )
        
    def forward(self, x):
        out = self.layer(x)
        out = out.view(batch_size, -1)
        out = self.fc_layer(out)
        return out




In [10]:
model = CNN().to(device)

#### loss

In [11]:
loss_func = nn.CrossEntropyLoss()

#### optimizer

In [12]:
# weight_decay로 정형화를 할 수 있음
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=0.1)

#### train

In [13]:
for i in range(num_epoch):
    for j, [image, label] in enumerate(train_loader):
        x = image.to(device)
        y = label.to(device)
        
        optimizer.zero_grad()
        output = model.forward(x)
        loss = loss_func(output, y)
        loss.backward()
        optimizer.step()
        
    if i%10 == 0:
        print(loss)

tensor(2.2990, device='cuda:0', grad_fn=<NllLossBackward>)


#### test

In [16]:
correct = 0
total = 0

with torch.no_grad():
    for image, label in test_loader:
        x = image.to(device)
        y = label.to(device)
        
        output = model.forward(x)
        _, output_index = torch.max(output, 1)
        
        total += label.size(0)
        correct += (output_index == y).sum().float()
        
    print(f"Accuracy of Test Data: {correct/total*100}")

Accuracy of Test Data: 11.348156929016113
