In [1]:
import tensorflow as tf
import numpy as np
import random
from datetime import datetime

In [23]:
class DataGeneration:
    
    def __init__(self, name, file_path, seperation_rate, is_normalized = False):
        self.name = name
        self.file_path = file_path
        self.seperation_rate = seperation_rate
        self.is_normalized = is_normalized
        print("DataGeneration object is created !!")
        
    def data_normalize_using_min_max(self, loaded_data):
        transpose_loaded_data = loaded_data.T
        
        print("transpose_loaded_data.shape = ,", transpose_loaded_data.shape)
        
        transpose_normalize_data_list = []
        
        for index in range(len(transpose_loaded_data)):
            max_value = np.max(transpose_loaded_data[index, :]) # 각 행의 최대값을 찾음
            min_value = np.min(transpose_loaded_data[index, :]) # 각 행의 최대값을 찾음
            transpose_normalize_data_list.append((transpose_loaded_data[index, :] - min_value)  / (max_value - min_value))
                                                     
            
        transpose_normalize_data = np.array(transpose_normalize_data_list)

        print(transpose_normalize_data.shape)

        normalize_data = transpose_normalize_data.T

        print(normalize_data.shape)

        data_save_path = './Normalize_' + self.name + '_data.csv'

        np.savetxt(data_save_path, normalize_data, delimiter= ',')

        return normalize_data
        
    def generate(self):
        loaded_data = np.loadtxt(self.file_path, delimiter=',', dtype=np.float32)
        print("loaded_data.shape = ", loaded_data.shape)

        if(self.is_normalized == True):
            loaded_data = self.data_normalize_using_min_max(loaded_data)

        training_data_list = []
        test_data_list = []

        total_data_num = len(loaded_data)
        test_data_num = int(len(loaded_data) * self.seperation_rate)

        total_data_index_list = [index for index in range(total_data_num)]

        random.shuffle(total_data_index_list)

        test_data_index_list = total_data_index_list[ 0:test_data_num ]

        print("length of test_data_index_list = ", len(test_data_index_list))

        # training data 를 위한 인덱스는 total_data_index_list 에서 test data 인덱스를 제외한 나머지 부분
        training_data_index_list = total_data_index_list[ test_data_num: ]

        print("length of training_data_index_list = ", len(training_data_index_list))

        # training data 구성
        for training_data_index in training_data_index_list:

            training_data_list.append(loaded_data[training_data_index])

        # test data 구성
        for test_data_index in test_data_index_list:

            test_data_list.append(loaded_data[test_data_index])

        # generate training data from training_data_list using np.arrya(...)
        training_data = np.array(training_data_list)

        # generate test data from test_data_list using np.arrya(...)
        test_data = np.array(test_data_list)

        # verification shape
        print("training_data.shape = ", training_data.shape)
        print("test_data.shape = ", test_data.shape)

        # save training & test data (.csv)
        training_data_save_path = './random_' + self.name + '_training_data.csv'
        test_data_save_path = './random_' + self.name + '_test_data.csv'

        np.savetxt(training_data_save_path, training_data, delimiter=',')
        np.savetxt(test_data_save_path, test_data, delimiter=',')

        return training_data, test_data


In [24]:
seperation_rate = 0.3
data_obj = DataGeneration('ThoracicSurgery', './ThoracicSurgery.csv', seperation_rate,True)

(training_data, test_data) = data_obj.generate()

print("training_data.shape = ", training_data.shape)
print("test_data.shape = ", test_data.shape)


DataGeneration object is created !!
loaded_data.shape =  (470, 18)
transpose_loaded_data.shape = , (18, 470)
(18, 470)
(470, 18)
length of test_data_index_list =  141
length of training_data_index_list =  329
training_data.shape =  (329, 18)
test_data.shape =  (141, 18)
training_data.shape =  (329, 18)
test_data.shape =  (141, 18)


In [25]:
training_x_data = training_data[ :, 0:-1]
training_t_data = training_data[ :, [-1]]

print("training_x_data.shape = ", training_x_data.shape)
print("training_t_data.shape = ", training_t_data.shape)

test_x_data = test_data[ :, 0:-1]
test_t_data = test_data[ :, [-1]]

print("test_x_data.shape = ", test_x_data.shape)
print("test_x_data.shape = ", test_x_data.shape)

training_x_data.shape =  (329, 17)
training_t_data.shape =  (329, 1)
test_x_data.shape =  (141, 17)
test_x_data.shape =  (141, 17)


In [26]:
learning_rate = 0.01
input_nodes = training_x_data.shape[1]
hidden_nodes = 10
output_nodes = 1
epochs = 1

In [27]:
X = tf.placeholder(tf.float32, [None, input_nodes])
T = tf.placeholder(tf.float32, [None, output_nodes])

W2 = tf.Variable(tf.random_normal([input_nodes, hidden_nodes]))
b2 = tf.Variable(tf.random_normal([hidden_nodes]))

W3 = tf.Variable(tf.random_normal([hidden_nodes, output_nodes]))
b3 = tf.Variable(tf.random_normal([output_nodes]))
    

In [28]:
Z2 = tf.matmul(X, W2) + b2
A2 = tf.sigmoid(Z2)

Z3 = tf.matmul(A2, W3) + b3
y = A3 = tf.sigmoid(Z3)

loss = -tf.reduce_mean(T * tf.log(y) + (1-T)*tf.log(1-y))

In [29]:
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train = optimizer.minimize(loss)

In [30]:
predicted = tf.cast(y > 0.5, dtype = tf.float32)

accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, T), dtype = tf.float32))

In [32]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    start_time = datetime.now()
    
    for i in range(epochs):
        for step in range(10001):
            loss_val, _ = sess.run([loss, train], feed_dict = {X : training_x_data, T : training_t_data})
            
            if step % 500 == 0:
                print("epochs : ", epochs, "step = ", step, "loss_val = ", loss_val)
    end_time = datetime.now()
    
    print("")
    print("Elapsed time : ", end_time - start_time)
    
    y_val , predicted_val, accuracy_val = sess.run([y, predicted, accuracy], feed_dict = {X:test_x_data, T: test_t_data})
    
    print("y_val.shape = ", y_val.shape, ", predicted_val = ", predicted_val.shape)
    print("accuracy_val = ", accuracy_val)

epochs :  1 step =  0 loss_val =  0.43519452
epochs :  1 step =  500 loss_val =  0.42628673
epochs :  1 step =  1000 loss_val =  0.4244888
epochs :  1 step =  1500 loss_val =  0.42278
epochs :  1 step =  2000 loss_val =  0.42113966
epochs :  1 step =  2500 loss_val =  0.419559
epochs :  1 step =  3000 loss_val =  0.41803128
epochs :  1 step =  3500 loss_val =  0.4165525
epochs :  1 step =  4000 loss_val =  0.41511962
epochs :  1 step =  4500 loss_val =  0.41373137
epochs :  1 step =  5000 loss_val =  0.41238722
epochs :  1 step =  5500 loss_val =  0.4110869
epochs :  1 step =  6000 loss_val =  0.40983054
epochs :  1 step =  6500 loss_val =  0.4086182
epochs :  1 step =  7000 loss_val =  0.40744963
epochs :  1 step =  7500 loss_val =  0.40632415
epochs :  1 step =  8000 loss_val =  0.405241
epochs :  1 step =  8500 loss_val =  0.4041985
epochs :  1 step =  9000 loss_val =  0.40319517
epochs :  1 step =  9500 loss_val =  0.40222904
epochs :  1 step =  10000 loss_val =  0.4012983

Elapsed