# Pytorch Tutorial 2

## Logistic Regression
Sigmoid 함수를 활용한 학습모형 만들기

In [1]:
import torch
from torch.autograd import Variable
import numpy as np
torch.manual_seed(777)

<torch._C.Generator at 0x7fbd8c0b90d8>

In [2]:
x_data = np.array([[1, 2], [2, 3], [3, 1], [4, 3], [5, 3], [6, 2]], dtype=np.float32)
y_data = np.array([[0], [0], [0], [1], [1], [1]], dtype=np.float32)
X = Variable(torch.from_numpy(x_data))
Y = Variable(torch.from_numpy(y_data))
X.data


 1  2
 2  3
 3  1
 4  3
 5  3
 6  2
[torch.FloatTensor of size 6x2]

In [3]:
Y.data


 0
 0
 0
 1
 1
 1
[torch.FloatTensor of size 6x1]

In [4]:
# Hypothesis using sigmoid
# [텐서플로] tf.div(1., 1. + tf.exp(tf.matmul(X, W))) : 중간에 행렬곱 연산을 직접 입력해야 한다.
linear = torch.nn.Linear(2, 1, bias=True)
sigmoid = torch.nn.Sigmoid()
model = torch.nn.Sequential(linear, sigmoid)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

In [5]:
for step in range(10001):
    optimizer.zero_grad()
    hypothesis = model(X)
    # cost/loss function
    cost = -(Y * torch.log(hypothesis) + (1 - Y)
             * torch.log(1 - hypothesis)).mean()
    cost.backward()
    optimizer.step()

    if step % 2000 == 0:
        print(step, cost.data.numpy())

0 [ 1.34154904]
2000 [ 0.33371782]
4000 [ 0.25374085]
6000 [ 0.20293365]
8000 [ 0.1686267]
10000 [ 0.14417094]


In [6]:
# Accuracy computation
predicted = (model(X).data > 0.5).float()
accuracy = (predicted == Y.data).float().mean()
print("\nHypothesis: ", hypothesis.data.numpy(), "\nCorrect (Y): ", predicted.numpy(), "\nAccuracy: ", accuracy)


Hypothesis:  [[ 0.02853693]
 [ 0.15574944]
 [ 0.2941438 ]
 [ 0.78632414]
 [ 0.94264686]
 [ 0.98120463]] 
Correct (Y):  [[ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 1.]] 
Accuracy:  1.0


## CSV를 활용한 Sigmoid 학습모형

In [7]:
import torch
from torch.autograd import Variable
import numpy as np
torch.manual_seed(777)

<torch._C.Generator at 0x7fbd8c0b90d8>

In [8]:
xy = np.loadtxt('./Lecture/data-03-diabetes.csv',delimiter=',',dtype=np.float32)
print(xy[::200])

[[-0.29411799  0.48743701  0.180328   -0.29292899  0.          0.00149028
  -0.53117001 -0.0333333   0.        ]
 [ 0.          0.0854271   0.114754   -0.59596002  0.         -0.186289
  -0.39453501 -0.63333303  1.        ]
 [-0.52941197  0.155779    0.180328    0.          0.         -0.13859899
  -0.74551702 -0.166667    0.        ]
 [ 0.          0.52763802  0.344262   -0.21212099 -0.35697401  0.23695999
  -0.83603799 -0.80000001  1.        ]]


In [9]:
x_data = xy[:,0:-1]
y_data = xy[:,[-1]]
print('\nx_data :\n', x_data[:1])
print('\ny_data :\n', y_data[:3], '\n',set([y[0]  for y in y_data]))
print('\nx_data : ', x_data.shape, '\ny_data : ', y_data.shape)


x_data :
 [[-0.29411799  0.48743701  0.180328   -0.29292899  0.          0.00149028
  -0.53117001 -0.0333333 ]]

y_data :
 [[ 0.]
 [ 1.]
 [ 0.]] 
 {0.0, 1.0}

x_data :  (759, 8) 
y_data :  (759, 1)


In [10]:
# 파이토치서 분석 가능한 포맷으로 변환
X = Variable(torch.from_numpy(x_data))
Y = Variable(torch.from_numpy(y_data))

In [11]:
# Sigmoid함수를 활용한 가설 만들기
linear = torch.nn.Linear(8, 1, bias=True)
sigmoid = torch.nn.Sigmoid()
model = torch.nn.Sequential(linear, sigmoid)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

In [12]:
for step in range(10001):
    optimizer.zero_grad()
    hypothesis = model(X)
    
    #cost/loss 함수
    cost = -(Y * torch.log(hypothesis) + (1-Y)
               * torch.log(1-hypothesis)).mean()
    cost.backward()
    optimizer.step()
    
    if step % 2000 == 0:
        print(step, cost.data.numpy())

0 [ 0.63136256]
2000 [ 0.54290104]
4000 [ 0.50976342]
6000 [ 0.49440202]
8000 [ 0.48628604]
10000 [ 0.48161238]


In [13]:
# 학습 모델의 정확도(Accuracy) 측정 : 약 76% 정확도.. (아직은 정확도 낮다..)
# [텐서플로와 차이] For 문에 함께 돌리지 않고, 학습이 끝난 뒤에 Predict 와 Accuracy 측정이 가능
# 그러면 대용량 데이터 처리시 메모리 관리는 어떻게 되나?? 계속 누적되어 나아가는 걸까??
predicted = (model(X).data > 0.5).float()
accuracy = (predicted == Y.data).float().mean()
print("\nHypothesis (", len(hypothesis.data.numpy()),") :",hypothesis.data.numpy()[::100], 
     "\nCorrect (" , len(predicted.numpy()),")  :", predicted.numpy()[::100],
     "\nAccuracy : " , accuracy)


Hypothesis ( 759 ) : [[ 0.41512308]
 [ 0.61203277]
 [ 0.7081036 ]
 [ 0.75963616]
 [ 0.72033864]
 [ 0.7712003 ]
 [ 0.3413842 ]
 [ 0.8460204 ]] 
Correct ( 759 )  : [[ 0.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 1.]] 
Accuracy :  0.764163372859025
