# 예제 2 은닉층 2개

In [1]:
import tensorflow as tf
import numpy as np
import random
from datetime import datetime

### 데이터 분리

In [2]:
class DataGeneration:
    
    # target_position = 0 (첫번째열이 정답데이터), target_position=-1 (마지막열이 정답데이터)
    def __init__(self, name, file_path, seperation_rate, target_position=-1):
        
        self.name = name
        
        self.file_path = file_path
        
        self.seperation_rate = seperation_rate
        
        if (target_position == -1  or  target_position == 0):      
            self.target_position = target_position
        
        else:
            err_str = 'target_position must be -1 or 0'            
            raise Exception(err_str)    
            
    
    # print data target distribution 
    # str_of_kind : 'original data' or  'training data'  or  'test data'
    def print_target_distribution(self, data, str_of_kind='original data'):
        
        print('=======================================================================================================')
        
        target_data = data[ :, self.target_position ]
        
        # numpy.unique() 사용하여 loaded data target 분포 확인
        unique, counts = np.unique(target_data, return_counts=True)

        print('[DataGeneration]  ', str_of_kind, ' target value = ', dict(zip(unique, counts)).items())

        num_zeros = dict(zip(unique, counts))[0.0]  # key 0.0 에 대한 value 값 count 리턴
        num_ones = dict(zip(unique, counts))[1.0]  # key 1.0 에 대한 value 값 count 리턴

        print('[DataGeneration]  ', str_of_kind, ' zeros numbers = ', num_zeros, ', ratio = ', 100 * num_zeros / (data.shape[0]), ' %')
        print('[DataGeneration]  ', str_of_kind, ' ones numbers = ', num_ones, ', ratio = ', 100 * num_ones / (data.shape[0]), '%') 
    
        print('=======================================================================================================')
        
        
    # shuffle 기능을 이용하여 training_data / test_data 생성
    def generate(self):
    
        # 데이터 불러오기, 파일이 없는 경우 exception 발생

        try:
            loaded_data = np.loadtxt(self.file_path, delimiter=',', dtype=np.float32)
            
        except Exception as err:
            print('[DataGeneration::generate()]  ', str(err))
            raise Exception(str(err))

        print("[DataGeneration]  loaded_data.shape = ", loaded_data.shape)
            
        # print the target distribution of original data 
        
        self.print_target_distribution(loaded_data, 'original data')
        
        
        # random.shuffle() 이용한 데이터 인덱스 분리 및 트레이닝/테스트 데이터 생성
        
        # 임시 저장 리스트
        training_data_list = []
        test_data_list = []

        # 분리비율에 맞게 테스트데이터로 분리
        total_data_num = len(loaded_data)
        test_data_num = int(len(loaded_data) * self.seperation_rate)

        #print("[DataGeneration]  total_data_num = ", total_data_num, ", test_data_num = ", test_data_num)

        # 전체 데이터 인덱스를 가지고 있는 리스트 생성
        total_data_index_list = [ index for index in range(total_data_num) ]

        # random.shuffle 을 이용하여 인덱스 리스트 생성
        random.shuffle(total_data_index_list)  # 전체 인덱스가 랜덤하게 섞여진 리스트로 변형된다

        # test data 를 위한 인덱스는 total_data_index_list 로뷰터 앞에서 분리비율(seperation_rate)의 데이터 인덱스
        test_data_index_list = total_data_index_list[ 0:test_data_num ]

        #print("[DataGeneration]  length of test_data_index_list = ", len(test_data_index_list))

        # training data 를 위한 인덱스는 total_data_index_list 에서 test data 인덱스를 제외한 나머지 부분
        training_data_index_list = total_data_index_list[ test_data_num: ]

        #print("[DataGeneration]  length of training_data_index_list = ", len(training_data_index_list))

        # training data 구성
        for training_data_index in training_data_index_list:
    
            training_data_list.append(loaded_data[training_data_index])

        # test data 구성
        for test_data_index in test_data_index_list:
    
            test_data_list.append(loaded_data[test_data_index])

        # generate training data from training_data_list using np.arrya(...)
        training_data = np.array(training_data_list)

        # generate test data from test_data_list using np.arrya(...)
        test_data = np.array(test_data_list)

        # verification shape
        #print("[DataGeneration]  training_data.shape = ", training_data.shape)
        #print("[DataGeneration]  test_data.shape = ", test_data.shape)

        # print target distribution of generated data 
        
        self.print_target_distribution(training_data, 'training data')
        
        self.print_target_distribution(test_data, 'test data')
        
        
        # save training & test data (.csv)
        training_data_save_path = './' + self.name + '_training_data.csv'
        test_data_save_path = './' + self.name + '_test_data.csv'
        
        # 저장공간이 없거나 파일 write 실패시 exception 발생
        try:
            np.savetxt(training_data_save_path, training_data, delimiter=',')
            np.savetxt(test_data_save_path, test_data, delimiter=',')
            
        except Exception as err:
            print('[DataGeneration::generate()]  ', str(err))
            raise Exception(str(err))
        
        return training_data, test_data

In [3]:
# DataGeneration 객체 생성.
# 생성된 training data / test data 에 대한 정답(target) 분포 확인

seperation_rate = 0.4 # 분리비율
target_position = -1

try:
    data_obj = DataGeneration('Diabetes', './diabetes.csv', seperation_rate, target_position)
    (training_data, test_data) = data_obj.generate()    
    
except Exception as err:
    
    print('Exception Occur !!')
    print(str(err))

[DataGeneration]  loaded_data.shape =  (759, 9)
[DataGeneration]   original data  target value =  dict_items([(0.0, 263), (1.0, 496)])
[DataGeneration]   original data  zeros numbers =  263 , ratio =  34.65085638998683  %
[DataGeneration]   original data  ones numbers =  496 , ratio =  65.34914361001317 %
[DataGeneration]   training data  target value =  dict_items([(0.0, 157), (1.0, 299)])
[DataGeneration]   training data  zeros numbers =  157 , ratio =  34.42982456140351  %
[DataGeneration]   training data  ones numbers =  299 , ratio =  65.5701754385965 %
[DataGeneration]   test data  target value =  dict_items([(0.0, 106), (1.0, 197)])
[DataGeneration]   test data  zeros numbers =  106 , ratio =  34.98349834983498  %
[DataGeneration]   test data  ones numbers =  197 , ratio =  65.01650165016501 %


### TensorFlow - 노드 / 연산 정의

In [4]:
training_x_data = training_data[:, 0:-1]
training_t_data = training_data[:, [-1]]

test_x_data = test_data[:, 0:-1]
test_t_data = test_data[:, [-1]]

print("training_x_data.shape =", training_x_data.shape, ", training_t_data.shape =", training_t_data.shape)
print("test_x_data.shape =", test_x_data.shape, ", test_t_data.shape =", test_t_data.shape)

training_x_data.shape = (456, 8) , training_t_data.shape = (456, 1)
test_x_data.shape = (303, 8) , test_t_data.shape = (303, 1)


##### 여기부터 달라짐

In [19]:
# Hyper-Parameter Definition
learning_rate = 1e-1
input_nodes = training_x_data.shape[1]  
hidden_1_nodes = 4
hidden_2_nodes = 4
output_nodes = 1
epochs = 1

In [14]:
################## 기존 ##################
# W = tf.Variable(tf.random_normal([8, 1]))
# b = tf.Variable(tf.random_normal([1]))

# X = tf.placeholder(tf.float32, [None, 8])
# T = tf.placeholder(tf.float32, [None, 1])
##########################################

# 입력과 출력을 위한 플레이스홀더 정의
X_DATA = tf.placeholder(tf.float32, [None, input_nodes])  
T_DATA = tf.placeholder(tf.float32, [None, output_nodes])  

W2 = tf.Variable(tf.random_normal([input_nodes, hidden_1_nodes]))  # 은닉층 1가중치 노드
b2 = tf.Variable(tf.random_normal([hidden_1_nodes]))               # 은닉층 1바이어스 노드

W3 = tf.Variable(tf.random_normal([hidden_1_nodes, hidden_2_nodes]))  # 은닉층 2가중치 노드
b3 = tf.Variable(tf.random_normal([hidden_2_nodes]))               # 은닉층 2바이어스 노드

W4 = tf.Variable(tf.random_normal([hidden_2_nodes, output_nodes])) # 출력층 가중치 노드
b4 = tf.Variable(tf.random_normal([output_nodes]))               # 출력층 바이어스 노드

In [15]:
###################### 기존 #######################
# z = tf.matmul(X, W) + b  # 선형회귀 값 z

# y = tf.sigmoid(z)  # 시그모이드로 계산 값

# # 손실 함수는 Cross-Entropy
# loss = -tf.reduce_mean(T*tf.log(y) + (1-T)*tf.log(1-y))
###################################################

# feed forward
Z2 = tf.matmul(X_DATA, W2) + b2
A2 = tf.sigmoid(Z2)

Z3 = tf.matmul(A2, W3) + b3
A3 = tf.sigmoid(Z3)

Z4 = tf.matmul(A3, W4) + b4  
y = A4 = tf.sigmoid(Z4)    # 최종 계산값

# 손실함수는 Cross-Entropy 
loss = -tf.reduce_mean( T_DATA*tf.log(y) + (1-T_DATA)*tf.log(1-y) ) 

In [16]:
#learning_rate = 1e-2

# GradientDescent 
optimizer = tf.train.GradientDescentOptimizer(learning_rate)  # 앞으로 이 부분만 바뀔 것임

train = optimizer.minimize(loss)

In [17]:
# 정확성 검사, True if y > 0.5 else False
predicted = tf.cast(y > 0.5, dtype=tf.float32)

accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, T_DATA), dtype=tf.float32))

### TensorFlow - 노드 / 연산 실행

In [20]:
with tf.Session() as sess:  # with를 쓰면 close 하지 않아도 됨
    sess.run(tf.global_variables_initializer())  # 변수 노드 (tf.Variable) 초기화
    
    start_time = datetime.now()
    
    for i in range(epochs):
        for step in range(10001):
            loss_val, _ = sess.run([loss, train], feed_dict={X_DATA: training_x_data, T_DATA: training_t_data})    
        
            if step % 1000 == 0:
                print("epochs = ", i, ", step = ", step, ", loss_val = ", loss_val)             
    
    end_time = datetime.now()
    
    print("")
    print("Elapsed Time => ", end_time-start_time)
    
    # Accuracy 확인
    y_val, predicted_val, accuracy_val = sess.run([y, predicted, accuracy], feed_dict={X_DATA: test_x_data, T_DATA: test_t_data})
    
    print("\ny_val.shape", y_val.shape, ", predicted_val =", predicted_val.shape)
    print("\nAccuracy =", accuracy_val)

epochs =  0 , step =  0 , loss_val =  0.72214365
epochs =  0 , step =  1000 , loss_val =  0.6462125
epochs =  0 , step =  2000 , loss_val =  0.64389163
epochs =  0 , step =  3000 , loss_val =  0.6418403
epochs =  0 , step =  4000 , loss_val =  0.6399084
epochs =  0 , step =  5000 , loss_val =  0.6379881
epochs =  0 , step =  6000 , loss_val =  0.6359804
epochs =  0 , step =  7000 , loss_val =  0.6337837
epochs =  0 , step =  8000 , loss_val =  0.6312836
epochs =  0 , step =  9000 , loss_val =  0.6283421
epochs =  0 , step =  10000 , loss_val =  0.62478507

Elapsed Time =>  0:00:08.046284

y_val.shape (303, 1) , predicted_val = (303, 1)

Accuracy = 0.650165
