# Linear Regression: Boston Housing Dataset

#### Issue

The second regression example uses a real dataset with 13 features.
- The output **label** is the house price

In [None]:
import tensorflow as tf
import numpy as np

from sklearn.datasets import load_boston
from sklearn.preprocessing import scale

###### Initialize constants

In [18]:
lr = 1e-2
n_epochs = 2000

###### load and pre-process the dataset

In [19]:
train_ratio = .6
valid_ratio = .15

boston = load_boston()
x = boston.data
y = boston.target

data_size = x.shape[0]
n_features = x.shape[1]
n_train = int(train_ratio * data_size)
n_valid = int(valid_ratio * data_size) + n_train


x_train = scale(x[:n_train,])
y_train = y[:n_train,]

x_valid = scale(x[n_train:n_valid])
y_valid = y[n_train:n_valid]

x_test = scale(x[n_valid:])
y_test = y[n_valid:,]

Scaling the data ensures gives a zero mean and unit variance.
This helps in convergence

In [None]:
def loss(x,  y, weights, bias):
    """
        Finds loss using Mean Square Error 
    """
    y_pred = weights * x + bias
    err = y_pred - y
    sq_err = tf.square(err)

    return tf.sqrt(tf.reduce_mean(input_tensor=sq_err))

In [None]:
def grad(x, y, weights, bias):
    """
        Gets the derivative of the loss w.r.t `w` and `b`
    """
    with tf.GradientTape() as tape:
        loss_v = loss(x, y, weights, bias)
    return tape.gradient(loss_v, [weights, bias])

#### Initialize training parameters

In [116]:
W = tf.Variable(tf.random.normal([n_features, 1])) # (13, 1)
B = tf.Variable(tf.zeros(1)) # (1,)

In [117]:
print(W.numpy().shape)
print(B.numpy().shape)

(13, 1)
(1,)


 ##### train

In [None]:
display_epoch = 20
for epoch in range(n_epochs):
    dw, db = grad(x_train, y_train, W, B)
    W_hat, b_hat = dw * lr, db * lr
    
    W.assign_sub(W_hat)
    B.assign_sub(b_hat)
    
    #loss_ = loss(x_train, y_train, W, B)
    if not epoch % display_epoch:
        print(f'epoch: {epoch}, loss: {loss_}')

#### Predict on test data

In [None]:
y_pred = W * x + B