In [1]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals
# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

In [2]:
import tensorflow as tf
import numpy as np
import pandas as pd

  from ._conv import register_converters as _register_converters


In [3]:
from sklearn.datasets import fetch_california_housing
reset_graph()
housing = fetch_california_housing()
m, n = housing.data.shape


from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_housing_data = scaler.fit_transform(housing.data)
scaled_housing_data_plus_bias = np.c_[np.ones((m,1)), scaled_housing_data]

In [4]:
housing_cal = pd.read_csv('./california-housing-prices/housing_numric.csv')
housing_cal=housing_cal.dropna(axis=0)
housing_cal.isnull().sum(axis=0)
housing_cal.median_house_value=housing_cal.median_house_value/100000
housing_cal.isnull().sum(axis=0)
housing_cal=housing_cal.astype('float64')

from sklearn.model_selection import train_test_split
train, test = train_test_split(housing_cal, test_size=0.2)

x_train, y_train = train, train.pop("median_house_value")
x_test, y_test = test, test.pop("median_house_value")

feature_cols = [tf.feature_column.numeric_column("X", shape=[9])]

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.fit_transform(x_test)
m, n =x_train.shape
m1, n1 = x_test.shape
x_train_scaled_plus_bias = np.c_[np.ones((m, 1)), x_train_scaled]
x_test_scaled_plus_bias = np.c_[np.ones((m1, 1)), x_test_scaled]
y_train, y_test= y_train.to_numpy().reshape(-1,1), y_test.to_numpy().reshape(-1,1)

In [5]:
n_inputs = 8
n_hidden1 = 1024
n_hidden2 = 512
n_hidden3 = 256
n_outputs = 1

In [6]:
X = tf.placeholder(tf.float64, shape=(None, n_inputs+1), name="X")
y = tf.placeholder(tf.float64, shape=(None,1), name="y")

In [7]:
with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, name="hidden1",
                              activation=tf.nn.relu)
    hidden2 = tf.layers.dense(hidden1, n_hidden2, name="hidden2",
                              activation=tf.nn.relu)
    hidden3 = tf.layers.dense(hidden2, n_hidden3, name="hidden3",
                              activation=tf.nn.relu)
    output = tf.layers.dense(hidden3, n_outputs, name="output")

In [8]:
with tf.name_scope("loss"):
    mse = tf.losses.mean_squared_error(labels=y, predictions=output)   # compute cost

In [9]:
learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(mse)

In [10]:
init = tf.global_variables_initializer()

In [11]:
n_epochs = 30
batch_size = 50

In [12]:
#from book: hands on machine learning and tensorflow
def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

In [13]:
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs+1):
        for X_batch, y_batch in shuffle_batch(x_train_scaled_plus_bias, y_train, batch_size):
            train, mse_, pred = sess.run([training_op, mse, output], feed_dict={X: X_batch, y: y_batch})
        if epoch % 10 == 0:
            print("Epoch", epoch, "MSE =", mse.eval(feed_dict={X: X_batch, y: y_batch}))
    print('mse_test :',sess.run(mse, feed_dict={X:x_test_scaled_plus_bias, y:y_test}))
    
    pred_test =sess.run(output, feed_dict={X:x_test_scaled_plus_bias})

Epoch 0 MSE = 0.26852173
Epoch 10 MSE = 0.18047287
Epoch 20 MSE = 0.16375984
Epoch 30 MSE = 0.17355014
mse_test : 0.31315097


In [16]:
pred_test[500:515]

array([[2.26694183],
       [1.00785001],
       [3.28916842],
       [1.84469159],
       [1.83710414],
       [3.04596103],
       [0.62423059],
       [4.75254152],
       [1.10005477],
       [2.20350022],
       [1.83657066],
       [1.27862244],
       [0.60975983],
       [1.00363277],
       [1.29436435]])

In [17]:
y_test[500:515]

array([[1.919  ],
       [0.993  ],
       [3.545  ],
       [1.808  ],
       [1.727  ],
       [2.824  ],
       [0.47   ],
       [5.00001],
       [1.08   ],
       [2.825  ],
       [2.012  ],
       [1.653  ],
       [0.661  ],
       [0.725  ],
       [1.139  ]])