### Logistic Regression : 2진 분류

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
tf.random.set_seed(5)

In [2]:
# x_data : [6,2]
x_data = [[1,2],
          [2,3],
          [3,1],
          [4,3],
          [5,3],
          [6,2]]

# y_data : [6,1]
y_data = [[0],
          [0],
          [0],
          [1],
          [1],
          [1]]
x_train = np.array(x_data,dtype=np.float32)
y_train = np.array(y_data,dtype=np.float32)

In [3]:
# 변수 초기화 : weight, bias
# (m,n) * (n,L) = (m,L)
# (25,3) * (3,1) = (25,1)
W = tf.Variable(tf.random.normal([2,1]), name='weight')  # 중요
b = tf.Variable(tf.random.normal([1]), name='bias')

In [4]:
# 예측 함수(hypothesis) : H(X) = sigmoid(W*X + b)
def hypothesis(X):
    return tf.sigmoid(tf.matmul(X,W)) + b  # 내적 곱셈

In [14]:
# 비용함수 : logloss, 2진 분류 모델
def cost_func():
    # cost = tf.reduce_mean(tf.square(hypothesis(x_train) - y_train)) # 회귀모델
    
    cost = -tf.reduce_mean(y_train*tf.math.log(hypothesis(x_train)) +
                         (1-y_train)*tf.math.log(1-hypothesis(x_train)))
    return cost

In [15]:
# 경사 하강법
# Learning_rate(학습율)을 0.01 로 설정하여 optimizer객체를 생성
optimizer = tf.keras.optimizers.Adam(lr=0.01)

In [16]:
# 학습 시작
print('***** Start learning!!')
for step in range(10001):
    # cost를 minimize 한다
    optimizer.minimize(cost_func,var_list=[W,b])
    
    if step % 1000 == 0:
        print('%04d'%step,'cost:[]',cost_func().numpy(),']',
              ' W:',W.numpy(),'b:',b.numpy())    # 몇번째, 코스트, w,b
        
print('***** Learning Finished!!')

***** Start learning!!
0000 cost:[] nan ]  W: [[10.034181]
 [-9.966332]] b: [-0.33174622]
1000 cost:[] nan ]  W: [[ 13.426198]
 [-13.351825]] b: [-0.33333346]
2000 cost:[] nan ]  W: [[ 14.550961]
 [-14.49787 ]] b: [-0.33334312]
3000 cost:[] nan ]  W: [[ 15.329802]
 [-15.261077]] b: [-0.33333412]
4000 cost:[] nan ]  W: [[ 15.965197]
 [-15.813433]] b: [-0.33329475]
5000 cost:[] nan ]  W: [[ 16.487532]
 [-16.437475]] b: [-0.3332533]
6000 cost:[] nan ]  W: [[ 16.81782 ]
 [-16.877998]] b: [-0.3333275]
7000 cost:[] nan ]  W: [[ 17.073895]
 [-17.216143]] b: [-0.3331747]
8000 cost:[] nan ]  W: [[ 17.279501]
 [-17.49194 ]] b: [-0.33333734]
9000 cost:[] nan ]  W: [[ 17.393406]
 [-17.642014]] b: [-0.3334219]
10000 cost:[] nan ]  W: [[ 17.46021 ]
 [-17.733086]] b: [-0.3333604]
***** Learning Finished!!


In [17]:
# 회귀 계수, weight과 bias 출력
print('Weight:',W.numpy())
print('Bias:',b.numpy())

Weight: [[ 17.46021 ]
 [-17.733086]]
Bias: [-0.3333604]


In [19]:
# 정확도 측정 : accuracy computation
def predict(X):
    return tf.cast(hypothesis(x_train) > 0.5,dtype=tf.float32)

# 학습 데이터를 그대로 검증 데이터로 사용하여 예측한 경우
x_test = x_train
y_test = y_train

preds = predict(x_test)
accuracy = tf.reduce_mean(tf.cast(tf.equal(preds,y_test),dtype=tf.float32))

print('Accuracy:',accuracy.numpy()) # Accuracy: 1.0
print('Hypothesis:\n',hypothesis(x_test).numpy())
print('Predict:\n',preds.numpy())

Accuracy: 0.8333333
Hypothesis:
 [[-0.3333604 ]
 [-0.3333604 ]
 [ 0.66663957]
 [ 0.66663957]
 [ 0.66663957]
 [ 0.66663957]]
Predict:
 [[0.]
 [0.]
 [1.]
 [1.]
 [1.]
 [1.]]


In [12]:
# 예측
print('*****Predict')
x_data = [[1,1],
          [2,5],
          [3,2],
          [4,4],
          [5,2],
          [6,6]] 
x_test = np.array(x_data,dtype=np.float32)
preds = predict(x_test)
print(preds.numpy())

*****Predict
[[0.]
 [0.]
 [1.]
 [1.]
 [1.]
 [1.]]
