In [1]:
import numpy as np
# Normalize 않되어 있는 ThoracicSurgery.csv 읽어옴
load_data = np.loadtxt('./ThoraricSurgery.csv',delimiter = ',')
print("load_data.shape = " ,load_data.shape)
# 각 열의 최대값을 찾기 위해 행과 열을 바꾸어 줌. 즉 전치향렬을 만들어줌
transpose_load_data = load_data.T

print("transpose_load_data.shape = ", transpose_load_data.shape)
# 전치행렬을 위한 리스트
transpose_normalize_data_list = []
for index in range(len(transpose_load_data)):
    max_value = np.max(transpose_load_data[index, : ])
    
    if max_value > 1.0:
        
        transpose_normalize_data_list.append(transpose_load_data[index, :] / max_value)
    else:
        transpose_normalize_data_list.append(transpose_load_data[index, :])

transpose_normalize_data = np.array(transpose_normalize_data_list)

print(transpose_normalize_data.shape)

normalize_data = transpose_normalize_data.T

print(normalize_data.shape)

np.savetxt('./Normalize_ThoracicSurgery_data.csv', normalize_data, delimiter=',')

load_data = np.loadtxt('./Normalize_ThoracicSurgery_data.csv',delimiter = ',',dtype=np.float32)
print("load_data.shape = ", load_data.shape)
#임시저장 리스트
training_data_list = []
test_data_list = []
#전체 30%라는 것은 전체 길이에 대해 3으로 나누어주면 됨
seperate_rate = 3

for index in range(len(load_data)):
    if (index % seperate_rate) == 0:
        test_data_list.append(load_data[index])
    else:
        training_data_list.append(load_data[index])

training_data = np.array(training_data_list)

test_data = np.array(test_data_list)

print("training_data.shape = ", training_data.shape)
print("test_data.shape = ", test_data.shape)

np.savetxt('./ThoraricSurgery_training_data.csv', training_data,delimiter = ',')
np.savetxt('./ThoraricSurgery_test_data.csv', test_data,delimiter = ',')


load_data.shape =  (470, 18)
transpose_load_data.shape =  (18, 470)
(18, 470)
(470, 18)
load_data.shape =  (470, 18)
training_data.shape =  (313, 18)
test_data.shape =  (157, 18)


In [2]:
import numpy as np
from datetime import datetime

# 수치미분 함수

def numerical_derivative(f, x):
    delta_x = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    
    while not it.finished:
        idx = it.multi_index        
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + delta_x
        fx1 = f(x) # f(x+delta_x)
        
        x[idx] = tmp_val - delta_x 
        fx2 = f(x) # f(x-delta_x)
        grad[idx] = (fx1 - fx2) / (2*delta_x)
        
        x[idx] = tmp_val 
        it.iternext()   
        
    return grad

# sigmoid 함수

def sigmoid(x):
    return 1 / (1+np.exp(-x))

In [6]:
import numpy as np
from datetime import datetime
class ThoracicSurgery:
    def __init__(self,training_data, i_nodes, h_nodes, o_nodes, lr, epochs):
        self.training_data = training_data
        self.x_data = self.training_data[:,0:-1]
        self.t_data = self.training_data[:,[-1]]
        self.i_nodes = i_nodes
        self.h_nodes = h_nodes
        self.o_nodes = o_nodes
        self.lr = lr
        self.epochs = epochs
        self.W2 = np.random.rand(self.i_nodes,self.h_nodes)
        self.b2 = np.random.rand(self.h_nodes)
        
        self.W3 = np.random.rand(self.h_nodes,self.o_nodes)
        self.b3 = np.random.rand(self.o_nodes)
    
    def loss_func(self):
        delta = 1e-7
        z2 = np.dot(self.input_data, self.W2) + self.b2
        a2 = sigmoid(z2)
        
        z3 = np.dot(a2,self.W3) + self.b3
        y = a3 = sigmoid(z3)
        
        return -np.sum(self.target_data * np.log(y + delta) + (1-self.target_data)*np.log((1-y)+delta) )
    
    def predict(self,test_data):
        z2 = np.dot(test_data, self.W2) + self.b2
        a2 = sigmoid(z2)
        
        z3 = np.dot(a2,self.W3) + self.b3
        y = a3 = sigmoid(z3)
        
        if y >= 0.5:
            result = 1
        else:
            result = 0
        return y,result
    
    def accuracy(self, input_data, target_data):
        
        matched_list = []
        not_matched_list = []
        index_label_prediction_list = []
        
        for index in range(len(input_data)):
            (real_val,logical_val) = self.predict(input_data[index])
            if logical_val == target_data[index]:
                matched_list.append(index)
            else:
                not_matched_list.append(index)
            index_label_prediction_list.append([index,target_data[index],logical_val])
            
        accuracy_rate = len(matched_list) / len(input_data)
        print("accuracy_rate : ",accuracy_rate)
        return matched_list, not_matched_list, index_label_prediction_list
    
    def train(self):
        
        f = lambda x : self.loss_func()
        start_time = datetime.now()
        
        for step in range(self.epochs):
            
            for index in range(len(self.training_data)):
                
                self.input_data = self.training_data[index,0:-1]
                self.target_data = self.training_data[index,[-1]]
                
                self.W2 -= self.lr * numerical_derivative(f, self.W2)
                self.b2 -= self.lr * numerical_derivative(f, self.b2)

                self.W3 -= self.lr * numerical_derivative(f, self.W3)
                self.b3 -= self.lr * numerical_derivative(f, self.b3)
            
            if (step % 2) ==0:
                print("epochs = ", step, "error_rate = ",self.loss_func())
        end_time = datetime.now()
        print("")
        print("elapsed time : ",end_time - start_time)

In [7]:
#hyper-parameter
i_nodes = training_data.shape[1] - 1    # input nodes 개수
h1_nodes = 20  # hidden nodes 개수. 
o_nodes = 1    # output nodes 개수
lr = 1e-3      # learning rate. hi_node = 30, 
epochs = 10

# ThoraricSurgery 객체 생성
obj = ThoracicSurgery(training_data, i_nodes, h1_nodes, o_nodes, lr, epochs)

print("Neural Network Learning using Numerical Derivative...")

obj.train()

Neural Network Learning using Numerical Derivative...
epochs =  0 error_rate =  3.0066244540100313
epochs =  2 error_rate =  0.2662648579575356
epochs =  4 error_rate =  0.17763872686446538
epochs =  6 error_rate =  0.16317824064293482
epochs =  8 error_rate =  0.15987452611002803

elapsed time :  0:00:38.784555


In [8]:
test_data = np.loadtxt('./ThoraricSurgery_test_data.csv', delimiter=',', dtype=np.float32)
print("test_data.shape = ", test_data.shape)


test_input_data = test_data[ :, 0:-1 ]
test_target_data = test_data[ :, -1 ]

(true_list_1, false_list_1, index_label_prediction_list) = obj.accuracy(test_input_data, test_target_data) 
print(index_label_prediction_list)

test_data.shape =  (157, 18)
accuracy_rate :  0.8280254777070064
[[0, 0.0, 0], [1, 1.0, 0], [2, 0.0, 0], [3, 0.0, 0], [4, 0.0, 0], [5, 0.0, 0], [6, 0.0, 0], [7, 0.0, 0], [8, 0.0, 0], [9, 0.0, 0], [10, 1.0, 0], [11, 0.0, 0], [12, 1.0, 0], [13, 1.0, 0], [14, 0.0, 0], [15, 1.0, 0], [16, 1.0, 0], [17, 0.0, 0], [18, 0.0, 0], [19, 0.0, 0], [20, 0.0, 0], [21, 0.0, 0], [22, 0.0, 0], [23, 0.0, 0], [24, 1.0, 0], [25, 0.0, 0], [26, 0.0, 0], [27, 0.0, 0], [28, 0.0, 0], [29, 0.0, 0], [30, 0.0, 0], [31, 0.0, 0], [32, 1.0, 0], [33, 0.0, 0], [34, 0.0, 0], [35, 0.0, 0], [36, 1.0, 0], [37, 0.0, 0], [38, 0.0, 0], [39, 0.0, 0], [40, 0.0, 0], [41, 1.0, 0], [42, 0.0, 0], [43, 0.0, 0], [44, 0.0, 0], [45, 0.0, 0], [46, 0.0, 0], [47, 0.0, 0], [48, 0.0, 0], [49, 1.0, 0], [50, 0.0, 0], [51, 0.0, 0], [52, 0.0, 0], [53, 0.0, 0], [54, 0.0, 0], [55, 0.0, 0], [56, 0.0, 0], [57, 0.0, 0], [58, 0.0, 0], [59, 0.0, 0], [60, 1.0, 0], [61, 0.0, 0], [62, 0.0, 0], [63, 0.0, 0], [64, 0.0, 0], [65, 1.0, 0], [66, 1.0, 0], [67, 0