In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import random
import math
from sklearn.datasets import fetch_california_housing
from IPython.display import clear_output, Image, display, HTML

###### Do not modify here ###### 
def strip_consts(graph_def, max_const_size=32):
    """Strip large constant values from graph_def."""
    strip_def = tf.GraphDef()
    for n0 in graph_def.node:
        n = strip_def.node.add() 
        n.MergeFrom(n0)
        if n.op == 'Const':
            tensor = n.attr['value'].tensor
            size = len(tensor.tensor_content)
            if size > max_const_size:
                tensor.tensor_content = "<stripped %d bytes>"%size
    return strip_def

def show_graph(graph_def, max_const_size=32):
    """Visualize TensorFlow graph."""
    if hasattr(graph_def, 'as_graph_def'):
        graph_def = graph_def.as_graph_def()
    strip_def = graph_def
    #strip_def = strip_consts(graph_def, max_const_size=max_const_size)
    code = """
        <script>
          function load() {{
            document.getElementById("{id}").pbtxt = {data};
          }}
        </script>
        <link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
        <div style="height:600px">
          <tf-graph-basic id="{id}"></tf-graph-basic>
        </div>
    """.format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))

    iframe = """
        <iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{}"></iframe>
    """.format(code.replace('"', '&quot;'))
    display(HTML(iframe))
###### Do not modify  here ######

###### Implement Data Preprocess here ######
housing = fetch_california_housing()
print("Shape of dataset:", housing.data.shape)
print("Shape of label:", housing.target.shape)
print(housing.DESCR)
print("Features:", housing.feature_names)


###### Implement Data Preprocess here ######

Shape of dataset: (20640, 8)
Shape of label: (20640,)
California housing dataset.

The original database is available from StatLib

    http://lib.stat.cmu.edu/

The data contains 20,640 observations on 9 variables.

This dataset contains the average house value as target variable
and the following input variables (features): average income,
housing average age, average rooms, average bedrooms, population,
average occupation, latitude, and longitude in that order.

References
----------

Pace, R. Kelley and Ronald Barry, Sparse Spatial Autoregressions,
Statistics and Probability Letters, 33 (1997) 291-297.


Features: ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']


In [2]:
#Prepare train and test data
split_point = housing.data.shape[0]*9//10
housing.target = housing.target.reshape(housing.target.shape[0], 1)
x_train, x_test = housing.data[:split_point], housing.data[split_point:]
y_train, y_test = housing.target[:split_point], housing.target[split_point:]

In [3]:
print(x_train.data.shape)
print(y_train.data.shape)
print(x_test.data.shape)
print(y_test.data.shape)

(18576, 8)
(18576, 1)
(2064, 8)
(2064, 1)


In [6]:
x_train_modified = np.concatenate((x_train, np.ones([x_train.shape[0], 1])), axis=1)
x_test_modified = np.concatenate((x_test, np.ones([x_test.shape[0], 1])), axis=1)

In [7]:
print(x_train_modified.data.shape)

(18576, 9)


In [8]:
# Clear graph
tf.reset_default_graph()

# Input of TF model
x = tf.placeholder(tf.float64, shape = [None, x_train_modified.shape[1]], name = "x")
y = tf.placeholder(tf.float64, shape = [None, 1], name = "y")
W = tf.placeholder(tf.float64, shape = [x_train_modified.shape[1], 1], name = "weight")

# Calculate W
# Transpose the input matrix x
x_trans = tf.transpose(x, name = "x_transpose")
M = tf.matmul(tf.matrix_inverse(tf.matmul(x_trans, x, name="XT_by_x"), name = "inverse_XT_by_x"), x_trans)
W_true = tf.matmul(M, y, name = "weight_answers")

#Construct the model
y_ = tf.matmul(x, W, name = "linear_regression")

#The loss(cost) function. Minimize the mean squared errors.
loss = tf.reduce_mean(tf.abs((y_ - y) / y), name = "mean_error_rate")

In [9]:
# initialize the variables
init = np.ones((x_train_modified.shape[1], 1), dtype=np.float64)

###### Start TF session ######
with tf.Session() as sess:
    res = sess.run(W_true, {x: x_train_modified, y: y_train, W: init})
    print(res)
    print("Training Error Rate: ", sess.run(loss, {x: x_train_modified, y: y_train, W: res}))
    print("Testing Error Rate: ", sess.run(loss, {x: x_test_modified, y: y_test, W: res}))
###### Start TF session ######

[[  4.42440871e-01]
 [  9.84845976e-03]
 [ -1.13854700e-01]
 [  6.81118426e-01]
 [ -1.69407374e-06]
 [ -5.61851833e-03]
 [ -4.31983935e-01]
 [ -4.46590962e-01]
 [ -3.80329231e+01]]
Training Error Rate:  0.316859910108
Testing Error Rate:  0.344205405992


In [10]:
###### Start TF session ######
with tf.Session() as sess:
    show_graph(tf.get_default_graph().as_graph_def())

In [11]:
x_train_pd = pd.DataFrame(x_train, columns=housing.feature_names)
x_train_pd.describe()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
count,18576.0,18576.0,18576.0,18576.0,18576.0,18576.0,18576.0,18576.0
mean,3.917221,29.118594,5.403187,1.095654,1433.185831,3.020782,35.430371,-119.409006
std,1.932617,12.65881,2.54363,0.485395,1142.294876,6.078652,2.061388,1.993468
min,0.4999,1.0,0.846154,0.375,3.0,0.692308,32.54,-124.35
25%,2.585725,19.0,4.39144,1.005917,793.0,2.421582,33.9,-121.54
50%,3.5719,30.0,5.188982,1.048474,1170.0,2.819232,34.16,-118.37
75%,4.8036,38.0,6.031433,1.098765,1727.0,3.296951,37.58,-117.95
max,15.0001,52.0,141.909091,34.066667,35682.0,599.714286,41.95,-114.31


In [12]:
x_train_data = pd.DataFrame(x_train_pd.values, columns=housing.feature_names)

In [13]:
x_train_data

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,8.3252,41.0,6.984127,1.023810,322.0,2.555556,37.88,-122.23
1,8.3014,21.0,6.238137,0.971880,2401.0,2.109842,37.86,-122.22
2,7.2574,52.0,8.288136,1.073446,496.0,2.802260,37.85,-122.24
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25
5,4.0368,52.0,4.761658,1.103627,413.0,2.139896,37.85,-122.25
6,3.6591,52.0,4.931907,0.951362,1094.0,2.128405,37.84,-122.25
7,3.1200,52.0,4.797527,1.061824,1157.0,1.788253,37.84,-122.25
8,2.0804,42.0,4.294118,1.117647,1206.0,2.026891,37.84,-122.26
9,3.6912,52.0,4.970588,0.990196,1551.0,2.172269,37.84,-122.25


In [17]:
from sklearn import preprocessing
from sklearn.preprocessing import PolynomialFeatures

In [18]:
quantileTransformer = preprocessing.QuantileTransformer(n_quantiles=2)
x_train__ = quantileTransformer.fit_transform(x_train)
x_test__ = quantileTransformer.transform(x_test)

poly = PolynomialFeatures(2)
x_train__ = poly.fit_transform(x_train__)
x_test__ = poly.transform(x_test__)

In [19]:
# Clear graph
tf.reset_default_graph()

# Define variables to take input feature x, label y
x = tf.placeholder(tf.float32, shape = [None, x_train__.shape[1]], name="Input")
y = tf.placeholder(tf.float32, shape = [None, 1], name="Y")
W = tf.placeholder(tf.float32, shape = [x_train__.shape[1], 1], name="W")

# Calculate W
# Transpose the input matrix x
x_trans = tf.transpose(x, name = "x_transpose")
M = tf.matmul(tf.matrix_inverse(tf.matmul(x_trans, x, name="XT_by_x"), name = "inverse_XT_by_x"), x_trans)
W_true = tf.matmul(M, y, name = "weight_answers")

#Construct the model
y_ = tf.matmul(x, W, name = "linear_regression")

#The loss(cost) function. Minimize the mean squared errors.
loss = tf.reduce_mean(tf.abs((y_ - y) / y), name = "mean_error_rate")

In [21]:
# initialize the variables
init = np.ones((x_train__.shape[1], 1), dtype=np.float64)

###### Start TF session ######
with tf.Session() as sess:
    res = sess.run(W_true, {x: x_train__, y: y_train, W: init})
    print("Training Error Rate: ", sess.run(loss, {x: x_train__, y: y_train, W: res}))
    print("Testing Error Rate: ", sess.run(loss, {x: x_test__, y: y_test, W: res}))
###### Start TF session ######

Training Error Rate:  0.264081
Testing Error Rate:  0.292116
