## ver 1의 경우 다음과 같은 문제들이 있다.

### 1. training data만 있고, test data가 없다. diabetes.csv만 있기 때문에 학습을 마친 후 테스트 할 데이터가 없다. 이로 인해 데이터 파일을 열어 이를 임의로 잘라서 나눠줘야한다.

### 2. training 시간을 확인해보면 너무 오래 걸린다. (6만번 수행 시 3시간 이상) -> 나누기 연산이 포함되기 때문,, 모두 곱하기로 바꿔줘야함

In [5]:
import numpy as np
from datetime import datetime

In [6]:
def numerical_derivative(f, x):
    delta_x = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    
    while not it.finished:
        idx = it.multi_index        
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + delta_x
        fx1 = f(x) # f(x+delta_x)
        
        x[idx] = tmp_val - delta_x 
        fx2 = f(x) # f(x-delta_x)
        grad[idx] = (fx1 - fx2) / (2*delta_x)
        
        x[idx] = tmp_val 
        it.iternext()   
        
    return grad

def sigmoid(x):
    return 1 / (1+np.exp(-x))

In [7]:
class Diabetes:
    
    def __init__(self, gate_name, xdata, tdata, i_node, h1_node, o_node, learning_rate, it_count):  
        self.name = gate_name
        
        self.xdata = xdata
        self.tdata = tdata
        
        self.W2 = np.random.rand(i_node, h1_node)             
        self.b2 = np.random.rand(h1_node)
        
        self.W3 = np.random.rand(h1_node, o_node) 
        self.b3 = np.random.rand(o_node)
                        
        self.learning_rate = learning_rate
        self.it_count = it_count
        
        print(self.name + " object is created")

    def feed_forward(self):
        delta = 1e-7   
        
        Z2 = np.dot(self.xdata, self.W2) + self.b2
        A2 = sigmoid(Z2)
        
        Z3 = np.dot(A2, self.W3) + self.b3
        y = sigmoid(Z3)
        
        return  -np.sum( self.tdata*np.log(y + delta) + (1-self.tdata)*np.log((1 - y)+delta ) )

    def loss_val(self):
        delta = 1e-7   
    
        Z2 = np.dot(self.xdata, self.W2) + self.b2    
        A2 = sigmoid(Z2)    
    
        Z3 = np.dot(A2, self.W3) + self.b3    
        y = sigmoid(Z3)    
        
        return  -np.sum( self.tdata*np.log(y + delta) + (1-self.tdata)*np.log((1 - y)+delta ) )
                       
    def train(self):
        f = lambda x : self.feed_forward()
        
        print('\n=================================================')
        print("Initial loss value =", self.loss_val())
        print('=================================================\n')
        
        start_time = datetime.now()
                       
        for step in  range(self.it_count):
            self.W2 -= self.learning_rate * numerical_derivative( f, self.W2 )
            self.b2 -= self.learning_rate * numerical_derivative( f, self.b2 )

            self.W3 -= self.learning_rate * numerical_derivative( f, self.W3 )
            self.b3 -= self.learning_rate * numerical_derivative( f, self.b3 )
            
            mid_time = datetime.now()
            if (step % 1000 == 0):
                print("step =", step, "loss value =", self.loss_val(), "time =", (mid_time - start_time))
                
        end_time = datetime.now()
        print('\n=================================================')
        print("updated loss value =", self.loss_val())
        print("Elapsed Time =>", end_time - start_time)
        print('=================================================\n')
                       
    def predict(self, x):
        z2 = np.dot(x, self.W2) + self.b2
        a2 = sigmoid(z2)
    
        z3 = np.dot(a2, self.W3) + self.b3
        y = a3 = sigmoid(z3)
    
        if y > 0.5:
            result = 1  # True
        else:
            result = 0  # False
    
        return y, result
    
    # 정확도 예측 함수
    def accuracy(self, test_xdata, test_tdata):
        matched_list = []
        not_matched_list = []
        
        for index in range(len(test_xdata)):            
            (real_val, logical_val) = self.predict(test_xdata[index])
            
            if logical_val == test_tdata[index]:
                matched_list.append(index)
            else:
                not_matched_list.append(index)
        
        accuracy_result = len(matched_list) / len(test_xdata)
        print("\nAccuracy =>", accuracy_result)
        
        return matched_list, not_matched_list

## Diabetes ver 1

In [8]:
training_data = np.loadtxt('./(191103)diabetes_training.csv', delimiter=',', dtype=np.float32)

xdata = training_data[:, 0:-1]
tdata = training_data[:, [-1]]

i_node = 8
h1_node = 30
o_node = 1
learning_rate = 1e-2
it_count = 60001

obj = Diabetes("Diabetes", xdata, tdata, i_node, h1_node, o_node, learning_rate, it_count)
        
obj.train()

Diabetes object is created

Initial loss value = 1214.6855954663142

step = 0 loss value = 289.44219078160666 time = 0:00:00.058870
step = 1000 loss value = 215.41741507469075 time = 0:01:04.663868
step = 2000 loss value = 202.94548299447598 time = 0:02:10.020683
step = 3000 loss value = 181.39415070097886 time = 0:03:15.625479
step = 4000 loss value = 153.40499110880023 time = 0:04:20.958732
step = 5000 loss value = 140.52252488531514 time = 0:05:27.710505
step = 6000 loss value = 126.55278512943586 time = 0:06:34.068145
step = 7000 loss value = 114.93152010129393 time = 0:07:40.651283
step = 8000 loss value = 104.94708525189876 time = 0:08:46.367531
step = 9000 loss value = 95.05334033449752 time = 0:09:53.470593
step = 10000 loss value = 86.28364514065572 time = 0:10:59.339785
step = 11000 loss value = 70.2675061733357 time = 0:12:06.309037
step = 12000 loss value = 56.89053419280073 time = 0:13:13.583786
step = 13000 loss value = 47.31674717864989 time = 0:14:19.614126
step = 14000

In [9]:
print(obj.name)

test_data = np.loadtxt('./(191103)diabetes_test.csv', delimiter=',', dtype=np.float32)

test_xdata = test_data[:, 0:-1]
test_tdata = test_data[:, [-1]]

# for data in test_xdata:
#     (real_val, logical_val) = obj.predict(data)
#     print("real_val =", real_val, ", logical_val =", logical_val)

accuracy_ret = obj.accuracy(test_xdata, test_tdata)

print("\n", accuracy_ret[0], "\n\n", accuracy_ret[1])

Diabetes

Accuracy => 0.6911196911196911

 [0, 1, 2, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 24, 25, 26, 27, 28, 29, 31, 35, 36, 37, 38, 39, 41, 42, 43, 44, 45, 46, 47, 49, 50, 51, 52, 53, 54, 56, 58, 59, 62, 64, 65, 66, 68, 69, 70, 71, 72, 74, 77, 78, 79, 81, 82, 84, 85, 87, 88, 89, 90, 91, 92, 94, 95, 98, 99, 100, 101, 102, 104, 105, 106, 107, 108, 109, 111, 112, 113, 116, 117, 118, 119, 121, 122, 124, 125, 126, 128, 129, 131, 133, 135, 139, 142, 143, 145, 146, 147, 148, 149, 152, 154, 155, 156, 159, 160, 162, 163, 165, 166, 168, 169, 170, 171, 172, 173, 175, 176, 177, 178, 179, 180, 182, 183, 185, 187, 188, 189, 190, 191, 193, 194, 195, 196, 197, 198, 201, 205, 206, 207, 208, 209, 211, 213, 215, 217, 218, 220, 221, 223, 225, 227, 228, 229, 231, 232, 233, 236, 238, 239, 242, 243, 244, 245, 246, 250, 251, 252, 254, 256, 257, 258] 

 [3, 8, 15, 21, 22, 23, 30, 32, 33, 34, 40, 48, 55, 57, 60, 61, 63, 67, 73, 75, 76, 80, 83, 86, 93, 96, 97, 103, 110, 114, 115, 120, 123, 12