In [23]:
import numpy as np

def sigmoid(x): 
    return 1. / (1. + np.exp(-x))

def numerical_derivative(f, x):
    delta_x = 1e-4 # 0.0001
    grad = np.zeros_like(x) 
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        idx = it.multi_index 
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + delta_x
        fx1 = f(x) # f(x+delta_x)

        x[idx] = float(tmp_val) - delta_x
        fx2 = f(x) # f(x-delta_x)
        grad[idx] = (fx1 - fx2) / (2*delta_x)

        x[idx] = tmp_val
        it.iternext() # 다음 인덱스로 이동 

    return grad


In [59]:
class diabetes: 
    def __init__(self, xdata, tdata): # xdata, tdata, W, b 초기화
        #self.name = c_name
        self.xdata = xdata.reshape(768, 8) # 입력 데이터 초기화
        self.tdata = tdata.reshape(768, 1) # 정답 데이터 초기화

        # 입력층 노드 8 개, 은닉층 노드 4 개. 은닉층 개수는 적당한 값으로 정핚다.
        self.W2 = np.random.rand(8, 4) 
        self.b2 = np.random.rand(4)
        # 은닉층 노드 4 개, 출력층 노드 1 개
        self.W3 = np.random.rand(4,1)
        self.b3 = np.random.rand(1)
        # 학습률 learning rate 초기화.
        self.learning_rate = 1e-2 

    def feed_forward(self): # 피드포워드 수행하며 손실함수 값 계산
        delta = 1e-7 # log 무핚대 발산 방지
        z2 = np.dot(self.xdata, self.W2) + self.b2 # 은닉층 선형회귀 값
        a2 = sigmoid(z2) # 은닉층 출력
        z3 = np.dot(a2, self.W3) + self.b3 # 출력층 선형회귀 값
        y = a3 = sigmoid(z3) # 출력층 출력
        return -np.sum(self.tdata*np.log(y+delta)+(1-self.tdata)*np.log((1 - y)+delta))

    def loss_val(self): # 손실함수 값 계산
        delta = 1e-7 # log 무핚대 발산 방지
        z2 = np.dot(self.xdata, self.W2) + self.b2 # 은닉층 선형회귀 값
        a2 = sigmoid(z2) # 은닉층 출력
        z3 = np.dot(a2, self.W3) + self.b3 # 출력층 선형회귀 값
        y = a3 = sigmoid(z3) # 출력층 출력
        return -np.sum(self.tdata*np.log(y+delta)+(1-self.tdata)*np.log((1 - y)+delta))

    def train(self ): # 경사하강법 이용하여 W, b 업데이트
        f = lambda x : self.feed_forward() 
        print("Initial loss value = ", self.loss_val())
        for step in range(90001): # 경사하강법을 이용해서 W2, W3 와 바이어스 b2, b3 를 업데이트
            self.W2 -= self.learning_rate * numerical_derivative(f, self.W2) 
            self.b2 -= self.learning_rate * numerical_derivative(f, self.b2) 
            self.W3 -= self.learning_rate * numerical_derivative(f, self.W3) 
            self.b3 -= self.learning_rate * numerical_derivative(f, self.b3) 
            if (step % 1000 == 0):
                print("step = ", step, "loss value = ", self.loss_val()) 

    def predict(self, input_data): # 미래 값 예측
        self.xdata = input_data
        z2 = np.dot(self.xdata, self.W2) + self.b2 # 은닉층 선형회귀 값
        a2 = sigmoid(z2) # 은닉층 출력
        z3 = np.dot(a2, self.W3) + self.b3 # 출력층 선형회귀 값
        y = a3 = sigmoid(z3) # 출력층 출력

        if y > 0.5:
            result = 1 
        else:
            result = 0 

        return y, result 


In [None]:
import pandas as pd
import numpy as np

df = pd.read_csv('../diabetes.csv')

xdata = df
xdata = xdata.drop(['Outcome'], axis=1)
tdata = df['Outcome']

diab = diabetes(xdata.to_numpy(), tdata.to_numpy())

diab.train() 


Initial loss value =  1560.783651896429
step =  0 loss value =  3687.150153694536
step =  1000 loss value =  638.351628318341
step =  2000 loss value =  501.87314518047816
step =  3000 loss value =  1095.7122659129498
step =  4000 loss value =  500.0786605048306
step =  5000 loss value =  498.54714989507687
step =  6000 loss value =  559.0554117765681
step =  7000 loss value =  578.9943077464796
step =  8000 loss value =  498.48440413998253
step =  9000 loss value =  976.0562677266755


  after removing the cwd from sys.path.


step =  10000 loss value =  848.3592691600546
step =  11000 loss value =  549.165063673961
step =  12000 loss value =  580.7293204687135
step =  13000 loss value =  1446.160494606427
step =  14000 loss value =  974.2204735623646
step =  15000 loss value =  833.2621233408315
step =  16000 loss value =  508.6428620633256
step =  17000 loss value =  1005.8870469394171
step =  18000 loss value =  550.4386841306454
step =  19000 loss value =  624.4660730185694
step =  20000 loss value =  504.9741602001114
step =  21000 loss value =  837.1284760218067
step =  22000 loss value =  614.741063337668
step =  23000 loss value =  606.708536525325
step =  24000 loss value =  825.3024630068988
step =  25000 loss value =  778.3759622936713
step =  26000 loss value =  577.2716186195114
step =  27000 loss value =  508.91877962859684
step =  28000 loss value =  838.5029278787895
step =  29000 loss value =  812.5009064665086
step =  30000 loss value =  625.3222858467184
step =  31000 loss value =  2582.95

In [58]:
# 검증

test_data = np.array([[8,154,78,32,0,32.4,0.443,45],
                      [1,128,88,39,110,36.5,1.057,37],
                      [7,137,90,41,0,32,0.391,39],
                      [0,123,72,0,0,36.3,0.258,52],
                      [1,106,76,0,0,37.5,0.197,26]])
for data in test_data:
    sigmoid_val, logical_val = diab.predict(data) # 임의 데이터에 대해 결과 예측
    print(data, " = ", logical_val)


[  8.    154.     78.     32.      0.     32.4     0.443  45.   ]  =  1
[  1.    128.     88.     39.    110.     36.5     1.057  37.   ]  =  1
[  7.    137.     90.     41.      0.     32.      0.391  39.   ]  =  1
[  0.    123.     72.      0.      0.     36.3     0.258  52.   ]  =  1
[  1.    106.     76.      0.      0.     37.5     0.197  26.   ]  =  1


In [None]:
8,154,78,32,0,32.4,0.443,45,1
1,128,88,39,110,36.5,1.057,37,1
7,137,90,41,0,32,0.391,39,0
0,123,72,0,0,36.3,0.258,52,1
1,106,76,0,0,37.5,0.197,26,0