In [4]:
import tensorflow as tf
import numpy as np
import random
from datetime import datetime

In [6]:
class DataGeneration:
    
    def __init__(self, name, file_path, seperation_rate, is_normalized = False):
        self.name = name
        self.file_path = file_path
        self.seperation_rate = seperation_rate
        self.is_normalized = is_normalized
        print("DataGeneration object is created !!")
        
    def data_normalized_using_min_max(self, loaded_data):
        transpose_loaded_data = loaded_data.T
        
        print("transpose_loaded_data.shape = ", transpose_loaded_data)
        
        transpose_normalized_data_list = []
        
        for index in range(len(transpose_loaded_data)):
            max_value = np.max(transpose_loaded_data[index, :])
            min_value = np.min(transpose_loaded_data[index, :])
            transpose_normalized_data_list.append((transpose_loaded_data[index, :] - min_value)/(max_value - min_value))
        
        transpose_normalized_data = np.array(transpose_normalized_data_list)
        print("transpose_normalized_data.shape = ",transpose_normalized_data.shape)
        
        normalized_data = transpose_normalized_data.T
        print("normalized_data.shape = ", normalized_data.shape)
        
        data_save_path = './Normalized_' + self.name + '_data.csv'
        
        np.savetxt(data_save_path, normalized_data, delimiter= ',')
        
        return normalized_data
    
    def generate(self):
        loaded_data = np.loadtxt(self.file_path, delimiter= ',', dtype = np.float32)
        print("loaded_data.shape = ", loaded_data.shape)
        
        if self.is_normalized == True:
            loaded_data = self.data_normalized_using_min_max(loaded_data)
        
        training_data_list = []
        test_data_list = []
        
        total_data_num = len(loaded_data)
        test_data_num = int(len(loaded_data) * self.seperation_rate)
        
        total_data_index_list = [index for index in range(total_data_num)]
        
        random.shuffle(total_data_index_list)
        
        ########################### test data와 training data 나누기 ######################################
        
        test_data_index_list = total_data_index_list[ 0: test_data_num]
        
        print("length of test_data_index_list = ", len(test_data_index_list))
        
        #training data 를 위한 인덱스는 total_data_index_list 에서 test data인덱스를 제외한 나머지 부분
        training_data_index_list = total_data_index_list[ test_data_num : ]
        
        print("length of training_data_index_list = ", len(training_data_index_list))
        
        # training data 구성
        for training_data_index in training_data_index_list:
            training_data_list.append(loaded_data[training_data_index])
        
        # test data 구성
        for test_data_index in test_data_index_list:
            test_data_list.append(loaded_data[test_data_index])
            
        #generate training data from training_data list using np.array(___)
        training_data = np.array(training_data_list)
        
        #generate test data from test_data list using np.array(___)
        test_data = np.array(test_data_list)
        
        #verification shape
        print("training_data.shape = ", training_data.shape)
        print("test_data.shape = ", test_data.shape)
        
        #save training & test data(.csv)
        training_data_save_path = './random_' + self.name + '_training_data.csv'
        test_data_save_path = './random_' + self.name + '_test_data.csv'
        
        np.savetxt(training_data_save_path, training_data, delimiter= ',')
        np.savetxt(test_data_save_path, test_data, delimiter=',')
        
        return training_data, test_data

In [None]:
seperation_rate = 0.3
data_obj = DataGeneration('ThoracicSurgery', './ThoracicSurgery.csv', seperation_rate, True)

(training_data, test_data) = data_obj.generate()

print("training_data.shape = ", training_data.shape)
print("test_data.shape = ")