<br>

## 1. Data loading & preprocessing

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets, preprocessing

In [2]:
x_data = datasets.load_boston().data 
y_data = datasets.load_boston().target # 집의 가격 (in $1,000s)

sc = preprocessing.StandardScaler() # Apply standard scaling on x_data
x_data = sc.fit_transform(x_data)
y_data = np.reshape(y_data, (len(y_data), 1))

print(x_data.shape)
print(y_data.shape)

(506, 13)
(506, 1)


<br>

## 2. Devide dataframe into X & Y -> Train X / Test X / Train Y / Test Y

In [3]:
from sklearn import model_selection
train_data, test_data, train_label, test_label = model_selection.train_test_split(x_data, y_data,
                                                                                 test_size=0.3,
                                                                                 random_state=0)
print(train_data.shape)
print(test_data.shape)
print(train_label.shape)
print(test_label.shape)

(354, 13)
(152, 13)
(354, 1)
(152, 1)


<br>

## 3. Build & Train the model 

In [4]:
import tensorflow as tf
from tensorflow import layers

import os
tf.logging.set_verbosity(tf.logging.ERROR)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 

In [5]:
X = tf.placeholder(tf.float32, [None, 13]) # number of X_data's columns
Y = tf.placeholder(tf.float32, [None, 1]) # Regression's Y_data has just one column (real value)

dropout_sign = tf.placeholder(tf.bool) # layers.dropout() 은 True/False 로 Training/Testing 여부를 결정해 줄 수 있습니다.

In [6]:
L1 = layers.dense(X, 32, activation=tf.nn.elu, kernel_initializer=tf.keras.initializers.he_normal())
L1 = layers.dropout(L1, rate=0.2, training=dropout_sign) # layers.dropout()의 "rate"는 keeping rate가 아닌 dropping rate이며, default 값은 0.5입니다.

L2 = layers.dense(L1, 32, activation=tf.nn.elu, kernel_initializer=tf.keras.initializers.he_normal()) 
L2 = layers.dropout(L2, rate=0.2, training=dropout_sign) 

model = layers.dense(L2, 1, activation=None) # Regression's Y_data has just one column (real value)

In [7]:
cost = tf.losses.mean_squared_error(Y, model)
optimizer = tf.train.AdamOptimizer(1e-2).minimize(cost) # 1e-2 == 0.01

In [8]:
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

In [9]:
total_epoch = 1000

for epoch in range(total_epoch):
    
    # 매 Epoch마다 Total cost를 출력합니다.  
    _, cost_val = sess.run([optimizer, cost], feed_dict={X: train_data, Y: train_label, dropout_sign: False}) # , dropout_sign: True
    training_cost = cost_val 
    test_cost = sess.run(cost, feed_dict={X:test_data, Y: test_label, dropout_sign: False})
    
    if epoch % 200 == 0:
        print('Epoch:', '%04d' % (epoch + 1), 
              '|| Avg. cost =', '{:.3f}'.format(training_cost), 
              '|| Test cost =', '{:.3f}'.format(test_cost))

print('Learning process is completed!')

Epoch: 0001 || Avg. cost = 586.163 || Test cost = 522.337
Epoch: 0201 || Avg. cost = 6.663 || Test cost = 16.112
Epoch: 0401 || Avg. cost = 3.395 || Test cost = 13.835
Epoch: 0601 || Avg. cost = 2.187 || Test cost = 14.997
Epoch: 0801 || Avg. cost = 1.548 || Test cost = 16.308
Learning process is completed!


In [10]:
# Test accuracy 를 출력합니다. dropout_sign을 False로 바꾸어 dropout을 걷어내줘야 합니다.
print('Test cost : {}'.format(sess.run(cost, 
                                           feed_dict={
                                               X: test_data, 
                                               Y: test_label, dropout_sign: False}))) # , dropout_sign: False

Test cost : 17.07024574279785


In [11]:
# 모델이 실제로 예측한 값을 출력합니다.
predicted_labels = sess.run(model, feed_dict={X: test_data, dropout_sign: False}) # test_label 불필요
print(predicted_labels[:10])

sess.close()

[[21.882969]
 [27.912704]
 [24.403309]
 [ 9.489595]
 [20.954891]
 [19.789492]
 [21.551228]
 [20.032963]
 [20.335062]
 [31.653019]]
