In [1]:
from sklearn import datasets, metrics, preprocessing
import tensorflow as tf
from tensorflow.contrib import learn

In [2]:
boston = datasets.load_boston()

Boston House Prices dataset
===========================

Notes
------
Data Set Characteristics:  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive
    
    :Median Value (attribute 14) is usually the target

    :Attribute Information (in order):
        - CRIM  :   per capita crime rate by town
        - ZN     :  proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS :   proportion of non-retail business acres per town
        - CHAS  :   Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX   :   nitric oxides concentration (parts per 10 million)
        - RM     :  average number of rooms per dwelling
        - AGE    :  proportion of owner-occupied units built prior to 1940
        - DIS     : weighted distances to five Boston employment centres
        - RAD    :  index of accessibility to radial highways
        - TAX    :  full-value property-tax rate per $10,000
        - PTRATIO : pupil-teacher ratio by town
        - B       : 1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town
        - LSTAT   : % lower status of the population
        - MEDV   :  Median value of owner-occupied homes in $1000's

    :Missing Attribute Values: None

    :Creator: Harrison, D. and Rubinfeld, D.L.

This is a copy of UCI ML housing dataset.
http://archive.ics.uci.edu/ml/datasets/Housing


This dataset was taken from the StatLib library which is maintained at Carnegie Mellon University.

The Boston house-price data of Harrison, D. and Rubinfeld, D.L. 'Hedonic
prices and the demand for clean air', J. Environ. Economics & Management,
vol.5, 81-102, 1978.   Used in Belsley, Kuh & Welsch, 'Regression diagnostics
...', Wiley, 1980.   N.B. Various transformations are used in the table on
pages 244-261 of the latter.

The Boston house-price data has been used in many machine learning papers that address regression
problems.   
     
**References**

   - Belsley, Kuh & Welsch, 'Regression diagnostics: Identifying Influential Data and Sources of Collinearity', Wiley, 1980. 244-261.
   - Quinlan,R. (1993). Combining Instance-Based and Model-Based Learning. In Proceedings on the Tenth International Conference of Machine Learning, 236-243, University of Massachusetts, Amherst. Morgan Kaufmann.
   - many more! (see http://archive.ics.uci.edu/ml/datasets/Housing)


# Data Explore

In [3]:
print(type(boston), list(boston.keys()))
print('*'*100)
print(boston['feature_names'])
print('*'*100)
print(boston['target'][:10])  # target 10개만 보기
print('*'*100)
print(boston['data'][:3])     # Data 3개만 보기
print('*'*100)
print(type(boston['data']), boston['data'].shape, boston['target'].shape)


<class 'sklearn.utils.Bunch'> ['data', 'target', 'feature_names', 'DESCR']
****************************************************************************************************
['CRIM' 'ZN' 'INDUS' 'CHAS' 'NOX' 'RM' 'AGE' 'DIS' 'RAD' 'TAX' 'PTRATIO'
 'B' 'LSTAT']
****************************************************************************************************
[24.  21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9]
****************************************************************************************************
[[6.3200e-03 1.8000e+01 2.3100e+00 0.0000e+00 5.3800e-01 6.5750e+00
  6.5200e+01 4.0900e+00 1.0000e+00 2.9600e+02 1.5300e+01 3.9690e+02
  4.9800e+00]
 [2.7310e-02 0.0000e+00 7.0700e+00 0.0000e+00 4.6900e-01 6.4210e+00
  7.8900e+01 4.9671e+00 2.0000e+00 2.4200e+02 1.7800e+01 3.9690e+02
  9.1400e+00]
 [2.7290e-02 0.0000e+00 7.0700e+00 0.0000e+00 4.6900e-01 7.1850e+00
  6.1100e+01 4.9671e+00 2.0000e+00 2.4200e+02 1.7800e+01 3.9283e+02
  4.0300e+00]]
********************************

In [7]:
print(boston['data'][:3])     # Data 3개만 보기
print('*'*100)
x_data = preprocessing.StandardScaler().fit_transform(boston.data)
print(x_data[:3])     # Data 3개만 보기



[[6.3200e-03 1.8000e+01 2.3100e+00 0.0000e+00 5.3800e-01 6.5750e+00
  6.5200e+01 4.0900e+00 1.0000e+00 2.9600e+02 1.5300e+01 3.9690e+02
  4.9800e+00]
 [2.7310e-02 0.0000e+00 7.0700e+00 0.0000e+00 4.6900e-01 6.4210e+00
  7.8900e+01 4.9671e+00 2.0000e+00 2.4200e+02 1.7800e+01 3.9690e+02
  9.1400e+00]
 [2.7290e-02 0.0000e+00 7.0700e+00 0.0000e+00 4.6900e-01 7.1850e+00
  6.1100e+01 4.9671e+00 2.0000e+00 2.4200e+02 1.7800e+01 3.9283e+02
  4.0300e+00]]
****************************************************************************************************
[[-0.41771335  0.28482986 -1.2879095  -0.27259857 -0.14421743  0.41367189
  -0.12001342  0.1402136  -0.98284286 -0.66660821 -1.45900038  0.44105193
  -1.0755623 ]
 [-0.41526932 -0.48772236 -0.59338101 -0.27259857 -0.74026221  0.19427445
   0.36716642  0.55715988 -0.8678825  -0.98732948 -0.30309415  0.44105193
  -0.49243937]
 [-0.41527165 -0.48772236 -0.59338101 -0.27259857 -0.74026221  1.28271368
  -0.26581176  0.55715988 -0.8678825  -0.9873294

In [7]:
x_data = preprocessing.StandardScaler().fit_transform(boston.data)
y_data = boston.target

In [37]:
NUM_STEPS = 200
MINIBATCHSIZE = 506
feature_columns = learn.infer_real_valued_columns_from_input(x_data)
reg = learn.LinearRegressor(feature_columns = feature_columns,
                            optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.1))
reg.fit(x_data, boston.target, steps=NUM_STEPS, batch_size=MINIBATCHSIZE)
MSE = reg.evaluate(x_data, boston.target, steps=1)
print(MSE)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_task_type': None, '_task_id': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f0dacc13908>, '_master': '', '_num_ps_replicas': 0, '_num_worker_replicas': 0, '_environment': 'local', '_is_chief': True, '_evaluation_master': '', '_train_distribute': None, '_device_fn': None, '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1.0
}
, '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_log_step_count_steps': 100, '_session_config': None, '_save_checkpoints_steps': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_model_dir': '/tmp/tmpbrmyfdct'}
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmpbrmyfdct/model.ckpt.
INFO:tensorflow:loss = 592.1469, st