## ver 2의 변화

### 1. ver 1의 경우 training data 전체 행에 대한 오차의 평균을 줄이기 위해 w, b를 업데이트 하는 것이 1 번의 수행이다.
### 그러나 ver 2의 경우 각각의 행에 대해서 오차를 줄이기 위해 한 행에서 w, b를 업데이트 하고 다음 행에 바로 적용하는 방법을 적용한다. 그렇기 때문에 더 빠르게 오차가 줄어들 수 있다.
### 두 방법 모두 결국엔 오차를 줄이는 것이기 때문에 더 빠른 방법을 사용하여 학습 시간을 줄인다.

In [1]:
import numpy as np
from datetime import datetime

In [2]:
def numerical_derivative(f, x):
    delta_x = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    
    while not it.finished:
        idx = it.multi_index        
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + delta_x
        fx1 = f(x) # f(x+delta_x)
        
        x[idx] = tmp_val - delta_x 
        fx2 = f(x) # f(x-delta_x)
        grad[idx] = (fx1 - fx2) / (2*delta_x)
        
        x[idx] = tmp_val 
        it.iternext()   
        
    return grad

def sigmoid(x):
    return 1 / (1+np.exp(-x))

In [3]:
class Diabetes:
    
    def __init__(self, gate_name, i_node, h1_node, o_node, learning_rate):  
        self.name = gate_name
        
        self.W2 = np.random.rand(i_node, h1_node)             
        self.b2 = np.random.rand(h1_node)
        
        self.W3 = np.random.rand(h1_node, o_node) 
        self.b3 = np.random.rand(o_node)
                        
        self.learning_rate = learning_rate
        
        print(self.name + " object is created")

    def feed_forward(self):
        delta = 1e-7   
        
        Z2 = np.dot(self.xdata, self.W2) + self.b2
        A2 = sigmoid(Z2)
        
        Z3 = np.dot(A2, self.W3) + self.b3
        y = sigmoid(Z3)
        
        return  -np.sum( self.tdata*np.log(y + delta) + (1-self.tdata)*np.log((1 - y)+delta ) )

    def loss_val(self):
        delta = 1e-7   
    
        Z2 = np.dot(self.xdata, self.W2) + self.b2    
        A2 = sigmoid(Z2)    
    
        Z3 = np.dot(A2, self.W3) + self.b3    
        y = sigmoid(Z3)    
        
        return  -np.sum( self.tdata*np.log(y + delta) + (1-self.tdata)*np.log((1 - y)+delta ) )
                       
    def train(self, xdata, tdata):
        self.xdata = xdata
        self.tdata = tdata
        
        f = lambda x : self.feed_forward()
                       
        self.W2 -= self.learning_rate * numerical_derivative( f, self.W2 )
        self.b2 -= self.learning_rate * numerical_derivative( f, self.b2 )

        self.W3 -= self.learning_rate * numerical_derivative( f, self.W3 )
        self.b3 -= self.learning_rate * numerical_derivative( f, self.b3 )
                       
    def predict(self, x):
        z2 = np.dot(x, self.W2) + self.b2
        a2 = sigmoid(z2)
    
        z3 = np.dot(a2, self.W3) + self.b3
        y = a3 = sigmoid(z3)
    
        if y > 0.5:
            result = 1  # True
        else:
            result = 0  # False
    
        return y, result
    
    # 정확도 예측 함수
    def accuracy(self, test_xdata, test_tdata):
        matched_list = []
        not_matched_list = []
        index_label_prediction_list = []
        temp_list = []
        
        for index in range(len(test_xdata)):            
            (real_val, logical_val) = self.predict(test_xdata[index])
            
            if logical_val == test_tdata[index]:
                matched_list.append(index)
            else:
                not_matched_list.append(index)
                
                temp_list.append(index)
                temp_list.append(test_tdata[index])
                temp_list.append(logical_val)
                
                index_label_prediction_list.append(temp_list)
                
                temp_list = []
        
        accuracy_result = len(matched_list) / len(test_xdata)
        
        print("\nAccuracy =>", accuracy_result)
        
        return matched_list, not_matched_list, index_label_prediction_list

## Diabetes ver 2

In [8]:
training_data = np.loadtxt('./(191103)diabetes_training.csv', delimiter=',', dtype=np.float32)

xdata = training_data[:, 0:-1]
tdata = training_data[:, [-1]]

i_node = 8
h1_node = 30  # 30
o_node = 1
learning_rate = 1e-2
epochs = 431 # x 행의 개수(500) 번 반복

obj = Diabetes("Diabetes", i_node, h1_node, o_node, learning_rate)

start_time = datetime.now()  
        
for count in  range(epochs):  # 전체 반복 횟수
    for index in range(len(xdata)):  # 행의 길이 만큼 반복
        obj.train(xdata[index], tdata[index])
        
    mid_time = datetime.now()
    if (count % 5 == 0):
        print("count =", count, "loss value =", obj.loss_val(), "time =", (mid_time - start_time))
                
end_time = datetime.now()

print("")
print("Elapsed Time => ", end_time - start_time)

Diabetes object is created
count = 0 loss value = 1.1158664925728112 time = 0:00:05.700258
count = 5 loss value = 0.9434087165285368 time = 0:00:34.472035
count = 10 loss value = 0.7861309320139981 time = 0:01:01.937394
count = 15 loss value = 0.6363027829080186 time = 0:01:29.418192
count = 20 loss value = 0.5198366828927962 time = 0:01:56.966835
count = 25 loss value = 0.4392122908360574 time = 0:02:24.509240
count = 30 loss value = 0.3864925732129897 time = 0:02:52.041690
count = 35 loss value = 0.3528944356530858 time = 0:03:19.590740
count = 40 loss value = 0.3317736903505637 time = 0:03:47.038659
count = 45 loss value = 0.3186677682022413 time = 0:04:14.586831
count = 50 loss value = 0.31069033105208776 time = 0:04:42.138603
count = 55 loss value = 0.3059875628652079 time = 0:05:09.614127
count = 60 loss value = 0.30336395383690284 time = 0:05:37.055668
count = 65 loss value = 0.3020432545621866 time = 0:06:04.647446
count = 70 loss value = 0.3015184437503539 time = 0:06:32.07825

In [9]:
print(obj.name)

test_data = np.loadtxt('./(191103)diabetes_test.csv', delimiter=',', dtype=np.float32)

test_xdata = test_data[:, 0:-1]
test_tdata = test_data[:, -1]

accuracy_ret = obj.accuracy(test_xdata, test_tdata)

print("\nmatched(%d)\n"%len(accuracy_ret[0]), accuracy_ret[0], "\n\nnot matched(%d)\n"%len(accuracy_ret[1]), accuracy_ret[1], "\n\nnot matched list\n", accuracy_ret[2])

Diabetes

Accuracy => 0.803088803088803

matched(208)
 [0, 1, 2, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 28, 29, 30, 31, 35, 36, 37, 38, 39, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 54, 55, 56, 57, 58, 59, 60, 62, 63, 64, 65, 66, 67, 68, 70, 71, 72, 73, 74, 75, 77, 78, 79, 80, 81, 83, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 101, 102, 103, 104, 105, 106, 107, 108, 109, 112, 113, 115, 116, 117, 118, 119, 120, 121, 123, 124, 125, 126, 128, 129, 131, 132, 133, 135, 136, 137, 139, 141, 142, 143, 144, 145, 146, 148, 151, 152, 153, 154, 156, 159, 160, 162, 163, 164, 165, 166, 168, 170, 171, 172, 173, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 188, 189, 190, 191, 193, 194, 195, 196, 198, 199, 201, 202, 203, 204, 205, 206, 207, 208, 209, 211, 212, 214, 215, 216, 217, 218, 219, 220, 223, 224, 225, 226, 227, 228, 229, 231, 232, 233, 236, 238, 239, 242, 243, 244, 245, 246, 247, 249, 250, 251, 252, 253, 254, 255, 256, 258