# Maximum Likelihood Estimation (MLE)
= Gradient Ascent (최대를 찾는 것이기 때문임)

# Overfitting
- Maximum Likelihood Estimation은 숙명적으로 Overfitting이 됨
- 훈련 세트와 테스트 세트로 나누어 확인
- 테스트 세트를 통해 overfitting이 되는 것을 방지함 

<방지>
1. 더 많은 데이터
2. 데이터 분포를 잘 설명하는 특징을 **줄이기**
3. 정규화

# Regularization
- Early Stopping: Validation Loss가 더 이상 낮아지지 않을 때
- Reducing Network Size
- Weight Decay
- Dropout ★
- Batch Normalization ★

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

In [2]:
torch.manual_seed(1)

<torch._C.Generator at 0x1c0fcbd0ab0>

In [4]:
x_train = torch.FloatTensor([[1,2,3],
                             [1,3,2],
                             [1,3,4],
                             [1,5,5],
                             [1,7,5],
                             [1,2,5],
                             [1,6,6],
                             [1,7,7]])
y_train = torch.LongTensor([2,2,2,1,1,1,0,0])

x_test = torch.FloatTensor([[2,1,1],
                            [3,1,2],
                            [3,3,4]])
y_test = torch.LongTensor([2,2,2])

In [8]:
class SoftmaxClassifierModel(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.linear = nn.Linear(3,3)
    def forward(self, x):
        return self.linear(x)

model = SoftmaxClassifierModel()

#optimizer 설정
optimizer = optim.SGD(model.parameters(), lr=0.1)

In [11]:
def train(model, optimizer, x_train, y_train):
    nb_epochs = 20
    for epoch in range(nb_epochs):
        #H(x) 계산
        prediction = model(x_train)
        #cost 계산
        cost = F.cross_entropy(prediction, y_train)

        #cost로 h(x) 개선
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()

        print('Epoch {:4d}/{} Cost: {:.6f}'.format(
            epoch, nb_epochs, cost.item()
        ))

In [12]:
def test(model, optimizer, x_test, y_test):
    prediction = model(x_test)
    predicted_classes = prediction.max(1)[1]
    correct_count = (predicted_classes == y_test).sum().item()
    cost = F.cross_entropy(prediction, y_test)

    print('Accuracy: {}% Cost: {:.6f}'.format(
        correct_count / len(y_test) * 100, cost.item()
    ))

In [13]:
train(model, optimizer, x_train, y_train)
# by MLE

Epoch    0/20 Cost: 2.214913
Epoch    1/20 Cost: 1.277324
Epoch    2/20 Cost: 1.213158
Epoch    3/20 Cost: 1.184269
Epoch    4/20 Cost: 1.167165
Epoch    5/20 Cost: 1.154966
Epoch    6/20 Cost: 1.144420
Epoch    7/20 Cost: 1.134938
Epoch    8/20 Cost: 1.126035
Epoch    9/20 Cost: 1.117556
Epoch   10/20 Cost: 1.109393
Epoch   11/20 Cost: 1.101498
Epoch   12/20 Cost: 1.093841
Epoch   13/20 Cost: 1.086401
Epoch   14/20 Cost: 1.079168
Epoch   15/20 Cost: 1.072130
Epoch   16/20 Cost: 1.065279
Epoch   17/20 Cost: 1.058609
Epoch   18/20 Cost: 1.052113
Epoch   19/20 Cost: 1.045784


In [14]:
test(model, optimizer, x_test, y_test)
#cost가 train Epoch 19보다 큼

Accuracy: 0.0% Cost: 1.278216


learning rate가 너무 크면 diverge 하면서 cost가 점점 늘어난다 (=overshooting)

In [15]:
model = SoftmaxClassifierModel()

In [16]:
optimizer = optim.SGD(model.parameters(), lr=1e5)

In [17]:
train(model, optimizer, x_train, y_train)

Epoch    0/20 Cost: 1.308120
Epoch    1/20 Cost: 1013819.562500
Epoch    2/20 Cost: 1282156.125000
Epoch    3/20 Cost: 1562634.750000
Epoch    4/20 Cost: 1107569.500000
Epoch    5/20 Cost: 1961843.500000
Epoch    6/20 Cost: 240174.937500
Epoch    7/20 Cost: 1161843.500000
Epoch    8/20 Cost: 990382.062500
Epoch    9/20 Cost: 1340759.750000
Epoch   10/20 Cost: 1841531.000000
Epoch   11/20 Cost: 1084132.000000
Epoch   12/20 Cost: 1261843.500000
Epoch   13/20 Cost: 1254822.250000
Epoch   14/20 Cost: 1177882.000000
Epoch   15/20 Cost: 1941531.125000
Epoch   16/20 Cost: 262122.656250
Epoch   17/20 Cost: 1254031.125000
Epoch   18/20 Cost: 1089197.250000
Epoch   19/20 Cost: 1229444.500000


learning rate가 너무 작으면 cost가 거의 줄어들지 않는다.

In [18]:
model = SoftmaxClassifierModel()

In [19]:
optimizer = optim.SGD(model.parameters(), lr=1e-10)
#적절한 learning rate 설정 필수

In [20]:
train(model, optimizer, x_train, y_train)

Epoch    0/20 Cost: 3.185592
Epoch    1/20 Cost: 3.185592
Epoch    2/20 Cost: 3.185592
Epoch    3/20 Cost: 3.185592
Epoch    4/20 Cost: 3.185592
Epoch    5/20 Cost: 3.185592
Epoch    6/20 Cost: 3.185592
Epoch    7/20 Cost: 3.185592
Epoch    8/20 Cost: 3.185592
Epoch    9/20 Cost: 3.185592
Epoch   10/20 Cost: 3.185592
Epoch   11/20 Cost: 3.185592
Epoch   12/20 Cost: 3.185592
Epoch   13/20 Cost: 3.185592
Epoch   14/20 Cost: 3.185592
Epoch   15/20 Cost: 3.185592
Epoch   16/20 Cost: 3.185592
Epoch   17/20 Cost: 3.185592
Epoch   18/20 Cost: 3.185592
Epoch   19/20 Cost: 3.185592


# Data Preprocessing

In [21]:
x_train = torch.FloatTensor([[73, 80, 75],
                             [93, 88, 93],
                             [89, 91, 90], 
                             [96, 98, 100], 
                             [73, 66, 70]])
y_train = torch.FloatTensor([[152], [185], [180], [196], [142]])

In [23]:
# 표준화
mu = x_train.mean(dim=0)
sigma = x_train.std(dim=0)
norm_x_train = (x_train - mu) / sigma
print(norm_x_train)

tensor([[-1.0674, -0.3758, -0.8398],
        [ 0.7418,  0.2778,  0.5863],
        [ 0.3799,  0.5229,  0.3486],
        [ 1.0132,  1.0948,  1.1409],
        [-1.0674, -1.5197, -1.2360]])


In [27]:
class MultivariateLinearRegressionModel(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.linear = nn.Linear(3,1)
    def forward(self, x):
        return self.linear(x)

In [31]:
model = MultivariateLinearRegressionModel()

In [32]:
optimizer = optim.SGD(model.parameters(), lr=1e-1)

In [33]:
def train(model, optimizer, x_train, y_train):
    nb_epochs = 20
    for epoch in range(nb_epochs):
        #H(x) 계산
        prediction = model(x_train)
        #cost 계산
        cost = F.mse_loss(prediction, y_train)
        #cost로 h(x) 개선
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()

        print('Epoch {:4d}/{} Cost: {:.6f}'.format(
            epoch, nb_epochs, cost.item()
        ))

In [34]:
train(model, optimizer, norm_x_train, y_train)
#지금은 y_train이 하나의 값이지만, 여러 차원이면 전처리 필수

Epoch    0/20 Cost: 29729.949219
Epoch    1/20 Cost: 18889.082031
Epoch    2/20 Cost: 12048.978516
Epoch    3/20 Cost: 7699.844727
Epoch    4/20 Cost: 4924.700195
Epoch    5/20 Cost: 3151.020264
Epoch    6/20 Cost: 2016.562866
Epoch    7/20 Cost: 1290.709229
Epoch    8/20 Cost: 826.216003
Epoch    9/20 Cost: 528.952271
Epoch   10/20 Cost: 338.703308
Epoch   11/20 Cost: 216.940033
Epoch   12/20 Cost: 139.007050
Epoch   13/20 Cost: 89.125130
Epoch   14/20 Cost: 57.196125
Epoch   15/20 Cost: 36.757317
Epoch   16/20 Cost: 23.672049
Epoch   17/20 Cost: 15.293421
Epoch   18/20 Cost: 9.927165
Epoch   19/20 Cost: 6.488914
