# Linear Regression: Boston Housing Dataset

The second regression example uses a real dataset with 13 features.
- The output **label** is the house price

In [4]:
import tensorflow as tf
import numpy as np

from sklearn.datasets import load_boston
from sklearn.preprocessing import scale

###### Initialize constants

In [5]:
lr = 1e-2
n_epochs = 1000

###### load and pre-process the dataset

In [33]:
train_ratio = .6
valid_ratio = .15

boston = load_boston()
x = boston.data
y = boston.target

data_size = x.shape[0]
n_train = int(train_ratio * data_size)
n_valid = int(valid_ratio * data_size) + n_train


x_train = scale(x[:n_train,])
y_train = y[:n_train,]

x_valid = scale(x[n_train:n_valid])
y_valid = y[n_train:n_valid]

x_test = scale(x[n_valid:])
y_test = y[n_valid:,]

Scaling the data ensures gives a zero mean and unit variance.
This helps in convergence

In [34]:
def loss(x,  y, weights, bias):
    """
        Finds loss using Mean Square Error 
    """
    err = weights * x + bias
    sq_err = tf.square(err)
    return tf.sqrt(tf.reduce_mean(input_tensor=sq_err))

In [35]:
def grad(x, y, weights, bias):
    """
        Gets the derivative of the loss w.r.t `w` and `b`
    """
    with tf.GradientTape() as tape:
        loss_v = loss(x, y, weights, bias)
    return tape.gradient(loss_v, [weights, bias])

#### Initialize training parameters

In [37]:
W = tf.Variable(tf.random.normal([13, 1])) # (13, 1)
b = tf.Variable(tf.zeros(1)) # (1,)

<tf.Variable 'Variable:0' shape=(13, 1) dtype=float32, numpy=
array([[ 1.6218798 ],
       [ 0.8597148 ],
       [-0.8638655 ],
       [ 0.7555596 ],
       [-3.451998  ],
       [ 0.02551743],
       [ 0.43267754],
       [ 1.6368196 ],
       [ 1.1372801 ],
       [-1.8005888 ],
       [ 1.1697638 ],
       [ 1.2778884 ],
       [ 0.40321383]], dtype=float32)>