# Automatically upgrade code to Tensorflow
# 텐서플로 2.0으로 코드 업그레이드
### 참고 : 텐서플로 공식 홈페이지
#### (한글) : https://www.tensorflow.org/guide/upgrade?hl=ko
#### (영문) : https://www.tensorflow.org/guide/upgrade?hl=en

In [2]:
import tensorflow as tf
import numpy as np
import random

### Tensorflow 버전 확인

In [3]:
print(tf.__version__)

2.0.0


### 1.x 일 경우 다음 명령어로 Tensorflow 2.0 버전으로 업그레이드 (설치 후 kernel 재시작 필요) 
#### 명령어 : !pip install --upgrade tensorflow
### <-> 2.0에서 1.15.0 버전으로 다운그레이드 할 경우 다음 명령어 사용.
#### 명령어 : !pip install --upgrade tensorflow==1.15

In [5]:
!pip install --upgrade tensorflow

Collecting tensorflow
[?25l  Downloading https://files.pythonhosted.org/packages/2c/72/6b3264aa2889b7dde7663464b99587d95cd6a5f3b9b30181f14d78a63e64/tensorflow-2.0.0-cp37-cp37m-macosx_10_11_x86_64.whl (102.7MB)
[K     |████████████████████████████████| 102.7MB 3.8MB/s eta 0:00:01    |█████▎                          | 16.8MB 3.3MB/s eta 0:00:27     |█████████████████████▌          | 69.0MB 14.9MB/s eta 0:00:03
Collecting tensorboard<2.1.0,>=2.0.0
[?25l  Downloading https://files.pythonhosted.org/packages/76/54/99b9d5d52d5cb732f099baaaf7740403e83fe6b0cedde940fabd2b13d75a/tensorboard-2.0.2-py3-none-any.whl (3.8MB)
[K     |████████████████████████████████| 3.8MB 49.8MB/s eta 0:00:01
Collecting tensorflow-estimator<2.1.0,>=2.0.0
[?25l  Downloading https://files.pythonhosted.org/packages/fc/08/8b927337b7019c374719145d1dceba21a8bb909b93b1ad6f8fb7d22c1ca1/tensorflow_estimator-2.0.1-py2.py3-none-any.whl (449kB)
[K     |████████████████████████████████| 450kB 41.5MB/s eta 0:00:01
Collecting

### Tensorflow 버전 확인

In [4]:
print(tf.__version__)

2.0.0


In [5]:
class DataGeneration:

    # target_position = 0 (첫번째열이 정답데이터), target_position=-1 (마지막열이 정답데이터)
    def __init__(self, name, file_path, seperation_rate, target_position=-1):

        self.name = name

        self.file_path = file_path

        self.seperation_rate = seperation_rate

        if (target_position == -1  or  target_position == 0):
            self.target_position = target_position

        else:
            err_str = 'target_position must be -1 or 0'
            raise Exception(err_str)


    # print data target distribution
    # str_of_kind : 'original data' or  'training data'  or  'test data'
    def print_target_distribution(self, data, str_of_kind='original data'):

        print('=======================================================================================================')

        target_data = data[ :, self.target_position ]

        # numpy.unique() 사용하여 loaded data target 분포 확인
        unique, counts = np.unique(target_data, return_counts=True)

        print('[DataGeneration]  ', str_of_kind, ' target value = ', dict(zip(unique, counts)).items())

        num_zeros = dict(zip(unique, counts))[0.0]  # key 0.0 에 대한 value 값 count 리턴
        num_ones = dict(zip(unique, counts))[1.0]  # key 1.0 에 대한 value 값 count 리턴

        print('[DataGeneration]  ', str_of_kind, ' zeros numbers = ', num_zeros, ', ratio = ', 100 * num_zeros / (data.shape[0]), ' %')
        print('[DataGeneration]  ', str_of_kind, ' ones numbers = ', num_ones, ', ratio = ', 100 * num_ones / (data.shape[0]), '%')

        print('=======================================================================================================')


    # shuffle 기능을 이용하여 training_data / test_data 생성
    def generate(self):

        # 데이터 불러오기, 파일이 없는 경우 exception 발생

        try:
            loaded_data = np.loadtxt(self.file_path, delimiter=',', dtype=np.float32)

        except Exception as err:
            print('[DataGeneration::generate()]  ', str(err))
            raise Exception(str(err))

        print("[DataGeneration]  loaded_data.shape = ", loaded_data.shape)

        # print the target distribution of original data

        self.print_target_distribution(loaded_data, 'original data')


        # random.shuffle() 이용한 데이터 인덱스 분리 및 트레이닝/테스트 데이터 생성

        # 임시 저장 리스트
        training_data_list = []
        test_data_list = []

        # 분리비율에 맞게 테스트데이터로 분리
        total_data_num = len(loaded_data)
        test_data_num = int(len(loaded_data) * self.seperation_rate)

        #print("[DataGeneration]  total_data_num = ", total_data_num, ", test_data_num = ", test_data_num)

        # 전체 데이터 인덱스를 가지고 있는 리스트 생성
        total_data_index_list = [ index for index in range(total_data_num) ]

        # random.shuffle 을 이용하여 인덱스 리스트 생성
        random.shuffle(total_data_index_list)  # 전체 인덱스가 랜덤하게 섞여진 리스트로 변형된다

        # test data 를 위한 인덱스는 total_data_index_list 로뷰터 앞에서 분리비율(seperation_rate)의 데이터 인덱스
        test_data_index_list = total_data_index_list[ 0:test_data_num ]

        #print("[DataGeneration]  length of test_data_index_list = ", len(test_data_index_list))

        # training data 를 위한 인덱스는 total_data_index_list 에서 test data 인덱스를 제외한 나머지 부분
        training_data_index_list = total_data_index_list[ test_data_num: ]

        #print("[DataGeneration]  length of training_data_index_list = ", len(training_data_index_list))

        # training data 구성
        for training_data_index in training_data_index_list:

            training_data_list.append(loaded_data[training_data_index])

        # test data 구성
        for test_data_index in test_data_index_list:

            test_data_list.append(loaded_data[test_data_index])

        # generate training data from training_data_list using np.arrya(...)
        training_data = np.array(training_data_list)

        # generate test data from test_data_list using np.arrya(...)
        test_data = np.array(test_data_list)

        # verification shape
        #print("[DataGeneration]  training_data.shape = ", training_data.shape)
        #print("[DataGeneration]  test_data.shape = ", test_data.shape)

        # print target distribution of generated data

        self.print_target_distribution(training_data, 'training data')

        self.print_target_distribution(test_data, 'test data')


        # save training & test data (.csv)
        training_data_save_path = './' + self.name + '_training_data.csv'
        test_data_save_path = './' + self.name + '_test_data.csv'

        # 저장공간이 없거나 파일 write 실패시 exception 발생
        try:
            np.savetxt(training_data_save_path, training_data, delimiter=',')
            np.savetxt(test_data_save_path, test_data, delimiter=',')

        except Exception as err:
            print('[DataGeneration::generate()]  ', str(err))
            raise Exception(str(err))

        return training_data, test_data

In [6]:
# DataGeneration 객체 생성
seperation_rate = 0.4
data_obj = DataGeneration('Diabetes', './(191117)diabetes.csv', seperation_rate)

# training_data, test_data 생성
(training_data, test_data) = data_obj.generate()

print("training_data.shape = ", training_data.shape)
print("test_data.shape = ", test_data.shape)

[DataGeneration]  loaded_data.shape =  (759, 9)
[DataGeneration]   original data  target value =  dict_items([(0.0, 263), (1.0, 496)])
[DataGeneration]   original data  zeros numbers =  263 , ratio =  34.65085638998683  %
[DataGeneration]   original data  ones numbers =  496 , ratio =  65.34914361001317 %
[DataGeneration]   training data  target value =  dict_items([(0.0, 157), (1.0, 299)])
[DataGeneration]   training data  zeros numbers =  157 , ratio =  34.42982456140351  %
[DataGeneration]   training data  ones numbers =  299 , ratio =  65.5701754385965 %
[DataGeneration]   test data  target value =  dict_items([(0.0, 106), (1.0, 197)])
[DataGeneration]   test data  zeros numbers =  106 , ratio =  34.98349834983498  %
[DataGeneration]   test data  ones numbers =  197 , ratio =  65.01650165016501 %
training_data.shape =  (456, 9)
test_data.shape =  (303, 9)


In [7]:
training_x_data = training_data[ :, 0:-1]
training_t_data = training_data[ :, [-1]]

print("training_x_data.shape = ", training_x_data.shape)
print("training_t_data.shape = ", training_t_data.shape)

test_x_data = test_data[ :, 0:-1]
test_t_data = test_data[ :, [-1]]

print("test_x_data.shape = ", test_x_data.shape)
print("test_x_data.shape = ", test_x_data.shape)

training_x_data.shape =  (456, 8)
training_t_data.shape =  (456, 1)
test_x_data.shape =  (303, 8)
test_x_data.shape =  (303, 8)


### Tensorflow 2.0 에서는 즉시실행 모드 때문에 placeholder 사용시 런타임 에러 발생
### error message : RuntimeError: tf.placeholder() is not compatible with eager execution.

### 해결책 : 다음 문장을 placeholder 실행 전에 execute한다.
#### statement : tf.compat.v1.disable_eager_execution()

In [10]:
# RuntimeError : tf.placeholder() is not compatible with eager execution. 해결
tf.compat.v1.disable_eager_execution()

In [11]:
X = tf.compat.v1.placeholder(tf.float32, [None, 8])  # 8개 입력노드
T = tf.compat.v1.placeholder(tf.float32, [None, 1])  # 1개 정답노드

W = tf.Variable(tf.random.normal([8, 1]))  # 8X1 가중치노드
b = tf.Variable(tf.random.normal([1]))     # 1개 바이어스 노드

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


In [12]:
z = tf.matmul(X, W) + b  # 선형회귀 값 z

y = tf.sigmoid(z)    # 시그모이드로 계산 값

# 손실함수는 Cross-Entropy
loss = -tf.reduce_mean( input_tensor=T*tf.math.log(y) + (1-T)*tf.math.log(1-y) )

In [13]:
learning_rate = 0.01    # 학습율

optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate)

train = optimizer.minimize(loss)

In [14]:
# 정확성 검사, True if y > 0.5 else False

predicted = tf.cast(y > 0.5, dtype=tf.float32)


# predicted 와 T 같으면 True 를 리턴하므로 cast 에 의해서 1로 강제 변환,
accuracy = tf.reduce_mean(input_tensor=tf.cast(tf.equal(predicted, T), dtype=tf.float32))

In [15]:
with  tf.compat.v1.Session()  as sess:

    sess.run(tf.compat.v1.global_variables_initializer())  # 변수 노드(tf.Variable) 초기화

    for step in range(20001):

        loss_val, _ = sess.run([loss, train], feed_dict={X: training_x_data, T: training_t_data})


        if step % 500 == 0:
            print("step = ", step, ", loss_val = ", loss_val)

    # Accuracy 확인
    y_val, predicted_val, accuracy_val = sess.run([y, predicted, accuracy], feed_dict={X: test_x_data, T: test_t_data})


    print("\ny_val.shape = ", y_val.shape, ", predicted_val = ", predicted_val.shape)
    print("\nAccuracy = ", accuracy_val)

step =  0 , loss_val =  0.9230915
step =  500 , loss_val =  0.7347302
step =  1000 , loss_val =  0.6825774
step =  1500 , loss_val =  0.6418764
step =  2000 , loss_val =  0.6093674
step =  2500 , loss_val =  0.58329374
step =  3000 , loss_val =  0.5622561
step =  3500 , loss_val =  0.54516196
step =  4000 , loss_val =  0.5311673
step =  4500 , loss_val =  0.5196229
step =  5000 , loss_val =  0.5100283
step =  5500 , loss_val =  0.50199604
step =  6000 , loss_val =  0.49522516
step =  6500 , loss_val =  0.48948014
step =  7000 , loss_val =  0.4845752
step =  7500 , loss_val =  0.480363
step =  8000 , loss_val =  0.47672573
step =  8500 , loss_val =  0.47356862
step =  9000 , loss_val =  0.4708151
step =  9500 , loss_val =  0.4684023
step =  10000 , loss_val =  0.466279
step =  10500 , loss_val =  0.4644027
step =  11000 , loss_val =  0.46273842
step =  11500 , loss_val =  0.46125653
step =  12000 , loss_val =  0.4599326
step =  12500 , loss_val =  0.45874575
step =  13000 , loss_val =  