In [17]:
import numpy as np

loaded_data = np.loadtxt('data/diabetes.csv', delimiter=',')

# 학습데이터 분리
seperation_rate=0.3 #분리 비율
test_data_num = int(len(loaded_data)*seperation_rate)

np.random.shuffle(loaded_data)

test_data = loaded_data[0:test_data_num]
training_data = loaded_data[test_data_num:]

#학습할 x 데이터, 학습할 t 데이터
training_x_data = training_data[:,0:-1] #맨 마지막 필드 빼고 전부다
training_t_data = training_data[:,[-1]] #맨 마지막 필드

# 테스트할 x 데이터, 테스트할 t 데이터
test_x_data = test_data[:,0:-1]
test_t_data = test_data[:,[-1]]


print('loaded_data.shape=',loaded_data.shape)
print('training_x_data.shape=',training_x_data.shape)
print('training_t_data.shape=',training_t_data.shape)

print('test_x_data.shape=',test_x_data.shape)
print('test_t_data.shape=',test_t_data.shape)

loaded_data.shape= (759, 9)
training_x_data.shape= (532, 8)
training_t_data.shape= (532, 1)
test_x_data.shape= (227, 8)
test_t_data.shape= (227, 1)


In [18]:
W=np.random.rand(8,1)
b= np.random.rand(1)

In [8]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

In [9]:
def loss_func(x,t):
    delta = 1e-7
    z=np.dot(x,W)+b
    y=sigmoid(z)
    
    #cross-entropy
    return -np.sum(t*np.log(y-delta)+(1-t)*np.log((1-y)+delta))

In [10]:
# 미분함수 공식
def numerical_derivative(f,x):
    delta_x=1e-4
    grad=np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    
    while not it.finished:
        idx=it.multi_index
        
        tmp_val=x[idx]
        x[idx]=float(tmp_val)+delta_x
        fx1=f(x) #f(x+delta_x)
        
        x[idx]=tmp_val-delta_x
        fx2=f(x) #f(x-detal_x)
        grad[idx]=(fx1-fx2)/(2*delta_x)
        
        x[idx]=tmp_val
        it.iternext()
        
    return grad

In [11]:
def error_val(x,t):
    delta =1e-7
    
    z=np.dot(x,W)+b
    y=sigmoid(z) 
    #cross-entropy
    return -np.sum(t*np.log(y-delta)+(1-t)*np.log((1-y)+delta))

In [12]:
#학습을 마친 후 임의의 데이터에 대해 미래 값 예측 함수
# 입력 변수 x:  numpy type

def predict(x):
    z=np.dot(x,W)+b
    y=sigmoid(z)
    
    if y>=0.5:
        result=1 #True
    else:
        result=0 #False
        
    return y,result

In [24]:
learning_rate = 1e-2

f=lambda x: loss_func(training_x_data,training_t_data) #f(x) = loss_func(x_data,t_data)
print('Initial error value =', error_val(training_x_data,training_t_data),'Initial W=',W, '\nb=',b)

for step in range(20001):
    W-=learning_rate*numerical_derivative(f,W)
    b-=learning_rate*numerical_derivative(f,b)
    
    if(step%1000==0):
        print('step=',step, 'error value=',error_val(training_x_data,training_t_data),'W=',W,'b=',b)

Initial error value = 242.5699028715818 Initial W= [[-1.02492256]
 [-4.0454218 ]
 [ 0.41814158]
 [ 0.0461035 ]
 [-0.42429879]
 [-2.70041118]
 [-0.78581796]
 [-0.00659473]] 
b= [0.60627796]
step= 0 error value= 242.5699028715818 W= [[-1.02492256]
 [-4.0454218 ]
 [ 0.41814158]
 [ 0.0461035 ]
 [-0.42429879]
 [-2.70041118]
 [-0.78581796]
 [-0.00659473]] b= [0.60627796]
step= 1000 error value= 242.5699028715818 W= [[-1.02492256]
 [-4.0454218 ]
 [ 0.41814158]
 [ 0.0461035 ]
 [-0.42429879]
 [-2.70041118]
 [-0.78581796]
 [-0.00659473]] b= [0.60627796]
step= 2000 error value= 242.5699028715818 W= [[-1.02492256]
 [-4.0454218 ]
 [ 0.41814158]
 [ 0.0461035 ]
 [-0.42429879]
 [-2.70041118]
 [-0.78581796]
 [-0.00659473]] b= [0.60627796]
step= 3000 error value= 242.5699028715818 W= [[-1.02492256]
 [-4.0454218 ]
 [ 0.41814158]
 [ 0.0461035 ]
 [-0.42429879]
 [-2.70041118]
 [-0.78581796]
 [-0.00659473]] b= [0.60627796]
step= 4000 error value= 242.5699028715818 W= [[-1.02492256]
 [-4.0454218 ]
 [ 0.418141

In [25]:
count=0

for idx, val in enumerate(test_x_data):
    (real_val, logical_val) = predict(val)
    print('real_t_data=',test_t_data[idx], 'real_val=',real_val,'logical_val=',logical_val, test_t_data[idx]==logical_val)
    if test_t_data[idx]==logical_val:
        count+=1

        
print('정확도 : ',count/len(test_x_data))

real_t_data= [1.] real_val= [0.74997203] logical_val= 1 [ True]
real_t_data= [1.] real_val= [0.95961503] logical_val= 1 [ True]
real_t_data= [1.] real_val= [0.95112689] logical_val= 1 [ True]
real_t_data= [0.] real_val= [0.19586875] logical_val= 0 [ True]
real_t_data= [1.] real_val= [0.83185734] logical_val= 1 [ True]
real_t_data= [0.] real_val= [0.20601483] logical_val= 0 [ True]
real_t_data= [0.] real_val= [0.09305113] logical_val= 0 [ True]
real_t_data= [1.] real_val= [0.7231487] logical_val= 1 [ True]
real_t_data= [0.] real_val= [0.05236285] logical_val= 0 [ True]
real_t_data= [1.] real_val= [0.96422423] logical_val= 1 [ True]
real_t_data= [0.] real_val= [0.82654952] logical_val= 1 [False]
real_t_data= [0.] real_val= [0.69589295] logical_val= 1 [False]
real_t_data= [0.] real_val= [0.03621003] logical_val= 0 [ True]
real_t_data= [1.] real_val= [0.70659471] logical_val= 1 [ True]
real_t_data= [1.] real_val= [0.60047284] logical_val= 1 [ True]
real_t_data= [1.] real_val= [0.81810976] 