# Regression

## Data creation

In [None]:
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt
import torch

In [None]:
m = 100
X = 2 * torch.rand(m, 1)
y = 4 + 3 * X + torch.randn(m, 1)

In [None]:
plt.scatter(X, y, s=10)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
plt.scatter(X_train, y_train, s=10)
plt.scatter(X_test, y_test, s=10)
plt.legend(['Training set', 'Test set'])

## Regression model

In [None]:
X_train_3 = X_train[:3]
y_train_3 = y_train[:3]

In [None]:
X_train_3, y_train_3

In [None]:
plt.scatter(X_train_3, y_train_3)

### Hypothesis  
H(x) = Wx+b

In [None]:
W = torch.zeros(1, requires_grad=True)
b = torch.zeros(1, requires_grad=True)
hypothesis = X_train_3 * W + b

In [None]:
hypothesis

### Compute loss

cost(W, b) = mean((H(x) - y)^2)

In [None]:
cost = torch.mean((hypothesis - y_train_3) ** 2)

In [None]:
cost

### Gradient descent

#### 미분으로 계산

In [None]:
y_train_3

In [None]:
## dC/dW
sum((2/3) * ((W * X_train_3 + b) - y_train_3) * X_train_3)

In [None]:
## dC/db
sum((2/3) * ((W * X_train_3 + b) - y_train_3))

#### torch.optim 라이브러리 활용

In [None]:
import torch.optim as optim

Optimizer 설정 - Stochastic gradient descent 를 활용하여 W와 b를 최적화.  
learning rate=0.01

In [None]:
optimizer = optim.SGD([W, b], lr=0.01)

최적화 과정 - 3가지가 항상 붙어다님. 

In [None]:
hypothesis = X_train_3 * W + b
cost = torch.mean((hypothesis - y_train_3) ** 2)

In [None]:
optimizer.zero_grad() # 모든 gradient를 0으로 초기화
cost.backward(retain_graph=True) # gradient 계산하여 (parameters).grad를 저장
optimizer.step() # step으로 parameter를 개선

gradient 확인

In [None]:
W.grad, b.grad

In [None]:
print(W, b)

#### 1 step이후 확인

In [None]:
hypothesis = X_train_3 * W + b
hypothesis

In [None]:
plt.scatter(X_train_3, y_train_3)
plt.plot(X_train_3, hypothesis.detach().numpy())

### Training with Full code

In [None]:
# Data setup
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Model initialize
W = torch.zeros(1, requires_grad=True)
b = torch.zeros(1, requires_grad=True)

# Set optimizer
optimizer = optim.SGD([W, b], lr=0.01)

nb_epochs = 1000
for epoch in range(nb_epochs + 1):
    # Calculate H(X)
    hypothesis = X_train * W + b
#     hypothesis = X_train_3 * W + b
    
    # Calculate cost
    cost = torch.mean((hypothesis - y_train) ** 2)
#     cost = torch.mean((hypothesis - y_train_3) ** 2)
    
    # Parameter gradient descent
    optimizer.zero_grad()
    cost.backward() 
    optimizer.step() 
    
    if epoch % 20 == 0:
        print('Epoch {:4d}/{} W: {:.3f}, b: {:.3f} Cost: {:.6f}'.format(
            epoch, nb_epochs, W.item(), b.item(), cost.item()
        ))

In [None]:
hx = (X_train * W + b).detach().numpy()

In [None]:
plt.figure(figsize=[6, 6])
plt.scatter(X_train, y_train, s=10)
plt.scatter(X_train, hx, s=20, c='r')

## High level implementation with nn.Module

`nn.module`을 활용하여 모델 구축  
`nn.module`: 신경망 모듈. 각종 레이어(linear, conv, ...)를 지원하며 output을 return하는 forward(input) 메서드를 포함함

In [None]:
from torch import nn as nn
from torch.nn import functional as F

nn.Linear 레이어의 활용

In [None]:
class my_LinearRegression(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(1, 1)

    def forward(self, x):
        return self.linear(x)

In [None]:
model = my_LinearRegression()

In [None]:
model

In [None]:
hypothesis = model(X_train[:3])

In [None]:
hypothesis

In [None]:
hypothesis = model(X_train)
cost = F.mse_loss(hypothesis, y_train)

In [None]:
cost

In [None]:
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [None]:
optimizer.zero_grad()
cost.backward()
optimizer.step()

### Training with Full code

In [None]:
# Data setup
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Model initialize
model = my_LinearRegression()

# Set optimizer
optimizer = optim.SGD(model.parameters(), lr=0.01)

nb_epochs = 1000
for epoch in range(nb_epochs + 1):
    # Calculate H(X)
    hypothesis = model(X_train)
    
    # Calculate cost
    cost = F.mse_loss(hypothesis, y_train)
    
    # Parameter gradient descent
    optimizer.zero_grad()
    cost.backward() 
    optimizer.step() 
    
    if epoch % 20 == 0:
        params = list(model.parameters())
        W = params[0].item()
        b = params[1].item()
        print('Epoch {:4d}/{} W: {:.3f}, b: {:.3f} Cost: {:.6f}'.format(
            epoch, nb_epochs, W, b, cost.item()
        ))

### 결과 확인

In [None]:
hx = (model(X_train)).detach().numpy()

In [None]:
plt.figure(figsize=[6, 6])
plt.scatter(X_train, y_train, s=10)
plt.scatter(X_train, hx, s=20, c='r')

## Multivariate Linear Regression

In [None]:
m = 100
x1 = torch.rand(m, 1)
x2 = 2 * torch.rand(m, 1)
x3 = 3 * torch.rand(m, 1)
X = torch.cat((x1, x2, x3), axis=1)
y = 4 + 3 * x1 + 2 * x2 + 5 * x3 + torch.randn(m, 1)

In [None]:
X.shape, y.shape

In [None]:
class MultivariateLinearRegressionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(3, 1)

    def forward(self, x):
        return self.linear(x)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

model = MultivariateLinearRegressionModel()

# Set optimizer
optimizer = optim.SGD(model.parameters(), lr=0.01)

nb_epochs = 2000
for epoch in range(nb_epochs + 1):
    # Calculate H(X)
    hypothesis = model(X_train)
    
    # Calculate cost
    cost = F.mse_loss(hypothesis, y_train)
    
    # Parameter gradient descent
    optimizer.zero_grad()
    cost.backward() 
    optimizer.step() 
    
    if epoch % 20 == 0:
        params = list(model.parameters())
       
        print('Epoch {:4d}/{} {} Cost: {:.6f}'.format(
            epoch, nb_epochs, list(model.parameters()), cost.item()
        ))

### 결과 확인

In [None]:
from sklearn.decomposition import PCA

In [None]:
pca = PCA(n_components=1)
X_pca = pca.fit_transform(X_train)

In [None]:
hx = model(X_train).detach().numpy()

In [None]:
plt.scatter(X_pca, y_train, s=20)
plt.scatter(X_pca, hx, s=20)