In [1]:
import numpy as np
import pandas as pd

from sklearn import datasets

import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
# data_set = datasets.load_boston()
data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
target = raw_df.values[1::2, 2]

X, y = data, target

In [3]:
X = torch.FloatTensor(X)
# y = torch.FloatTensor(y).unsqueeze(-1)
y = torch.FloatTensor(y).view(-1, 1) # reshape 기능

In [4]:
# standardization - (X - 평균) / 표준편차
X = (X - torch.mean(X)) / torch.std(X)

### 선형 회귀

In [42]:
model = nn.Linear(13, 1) # 선형 회귀 모델

criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr = 0.01)

In [44]:
def train(model, criterion, optimizer, X, y):
    # 초기화
    optimizer.zero_grad()
    
    hypothesis = model(X) # 가설...?
    
    loss = criterion(hypothesis, y)
    loss.backward()
    
    optimizer.step()
    
    return loss.item()

In [45]:
n_epochs = 100

for epoch in range(1, n_epochs+1):
    loss = train(model, criterion, optimizer, X, y)
    
    if epoch % 10 == 0:
        print('epoch: {}, loss: {:.4f}'.format(epoch, loss))

epoch: 10, loss: 114.2711
epoch: 20, loss: 97.4869
epoch: 30, loss: 88.4511
epoch: 40, loss: 82.6769
epoch: 50, loss: 78.9274
epoch: 60, loss: 76.4395
epoch: 70, loss: 74.7396
epoch: 80, loss: 73.5334
epoch: 90, loss: 72.6382
epoch: 100, loss: 71.9405


### 로지스틱 회귀

In [74]:
data_set = datasets.load_breast_cancer()

X, y = data_set['data'], data_set['target']
X = torch.FloatTensor(X)
y = torch.FloatTensor(y).view(-1, 1)

X = (X - torch.mean(X)) / torch.std(X)

In [81]:
model = nn.Sequential(nn.Linear(30, 1),
                      nn.Sigmoid())

criterion = nn.BCELoss() # binary cross entropy
optimizer = optim.SGD(model.parameters(), lr = 0.1)

In [82]:
def train(model, criterion, optimizer, X, y):
    optimizer.zero_grad()
    
    hypothesis = model(X)
    
    loss = criterion(hypothesis, y)
    loss.backward()
    
    optimizer.step()
    
    return loss.item()

In [83]:
n_epochs = 100

for epoch in range(1, n_epochs+1):
    loss = train(model, criterion, optimizer, X, y)
    
    if epoch % 10 == 0:
        print('epoch: {}, loss: {:.4f}'.format(epoch, loss))

epoch: 10, loss: 0.5725
epoch: 20, loss: 0.5093
epoch: 30, loss: 0.4635
epoch: 40, loss: 0.4289
epoch: 50, loss: 0.4019
epoch: 60, loss: 0.3804
epoch: 70, loss: 0.3627
epoch: 80, loss: 0.3481
epoch: 90, loss: 0.3357
epoch: 100, loss: 0.3250


In [84]:
y_predicted = (model(X) >= 0.5).float()

score = (y_predicted == y).float().mean()
print('accuracy: {:.2f}'.format(score))

accuracy: 0.91


### 클래스를 이용한 모델 정의

In [99]:
# 기본적인 선형 회귀 모델
class LinearRegression(nn.Module):
    def __init__(self, num_features):
        # 부모 노드 상속
        super().__init__()
        # 함수를 변수에 저장하는 방식 - self.linear 변수에 nn.Linear 함수를 할당
        self.linear = nn.Linear(num_features, 1)
        
    # 모델의 순전파 정의
    def forward(self, X):
        out = self.linear(X) # 선형 회귀 모델 사용
        
        return out
    
model = LinearRegression(13)

In [None]:
# 로지스틱 회귀 모델
class LogisticRegression(nn.Module):
    def __init__(self, num_features):
        super().__init__()
        
        self.linear = nn.Linear(num_features, 1)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, X):
        # keras에서 레이어를 쌓아가듯이 함수를 쌓아주는 방식
        out = self.linear(X)
        out = self.Sigmoid(out)
        
        return out
    
model = LogisticRregression(30)

### 배치 학습

In [5]:
from sklearn import datasets

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

In [6]:
data_set = datasets.load_breast_cancer()

X, y = data_set['data'], data_set['target']
X = torch.FloatTensor(X)
y = torch.FloatTensor(y).view(-1, 1)

X = (X - torch.mean(X)) / torch.std(X)

In [7]:
# 파이토치의 데이터로더를 사용하기 위해서는 데이터세트 자료구조로 바꿔주어야 한다.
dset = TensorDataset(X, y)

loader = DataLoader(dset, batch_size = 256, shuffle = True)

In [27]:
# https://velog.io/@seoyeonmmn/PyTorch-Tutorial-01.-Linear-Layer-nn.Module

In [28]:
class NeuralNetwork(nn.Module):
    def __init__(self, num_features):
        super().__init__()
        
        self.linear1 = nn.Linear(num_features, 4) # self.linear1 은 클래스에 대한 객체
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(4, 1)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, X):
        out = self.linear1(X) # 선형 회귀
        out = self.relu(out) # 활성화 함수(relu)
        out = self.linear2(out) # 선형 회귀
        out = self.sigmoid(out) # 활성화 함수(sigmoid)
        
        return out

model = NeuralNetwork(30)
criterion = nn.BCELoss() # 이진 크로스 엔트로피 손실함수(Binary Cross Entropy Loss Function)
optimizer = optim.SGD(model.parameters(), lr = 0.1) # 확률적 경사 하강법 (Stochastic Gradient Descent)

In [29]:
optimizer.zero_grad()

In [30]:
optimizer

SGD (
Parameter Group 0
    dampening: 0
    lr: 0.1
    momentum: 0
    nesterov: False
    weight_decay: 0
)

In [33]:
def train(model, criterion, optimizer, loader):
    epoch_loss = 0
    
    for X_batch, y_batch in loader:
        optimizer.zero_grad() # 학습 시작시에 기울기를 초기화하는 작업
        hypothesis = model(X_batch) # 1차적인 모델 학습 결과
        loss = criterion(hypothesis, y_batch) # 1차적인 모델 학습의 결과와 라벨 값을 비교하여 손실값 계산
        loss.backward()
        
        optimizer.step()
        epoch_loss += loss.item()
        
    return epoch_loss / len(loader) # 평균 손실값 리턴

In [34]:
n_epochs = 100

for epoch in range(1, n_epochs + 1):
    loss = train(model, criterion, optimizer, loader)
    
    if epoch % 10 == 0:
        print('epoch: {}, loss: {:.4f}'.format(epoch, loss))

epoch: 10, loss: 0.5859
epoch: 20, loss: 0.4699
epoch: 30, loss: 0.3755
epoch: 40, loss: 0.3269
epoch: 50, loss: 0.2931
epoch: 60, loss: 0.2639
epoch: 70, loss: 0.2439
epoch: 80, loss: 0.2469
epoch: 90, loss: 0.2305
epoch: 100, loss: 0.2109


In [35]:
y_predicted = (model(X) >= 0.5).float()

score = (y_predicted == y).float().mean()
print('accuracy: {:.2f}'.format(score))

accuracy: 0.92


### 모델 저장하고 불러오기

In [41]:
y_predicted_1 = (model(X) >= 0.5).float()

score_of_trained_model = (y_predicted_1 == y).float().mean()
print('accuracy of trained model: {:.2f}'.format(score_of_trained_model))

# 학습한 모델의 가중치 저장
torch.save(model.state_dict(), './data/trained_model.pt')

accuracy of trained model: 0.92


In [45]:
load_model = NeuralNetwork(30)
load_model.load_state_dict(torch.load('./data/trained_model.pt'))

y_predicted_2 = (load_model(X) >= 0.5).float()

score_of_load_model = (y_predicted_2 == y).float().mean()
print('accuracy of loaded model: {:.2f}'.format(score_of_load_model))

# 결국 학습의 결과물은 '가중치'.
# 위에서 학습한 모델의 가중치를 저장한 후에, 해당 가중치를 그대로 읽어와 새로운 모델에 적용했으므로, 둘의 결과는 동일하게 나온다.

accuracy of loaded model: 0.92
