In [1]:
import numpy as np

## Boston Housing Dataset
  * see [this link](https://www.cs.toronto.edu/~delve/data/boston/bostonDetail.html)

In [2]:
from sklearn.datasets import load_boston

boston = load_boston()

In [3]:
X = boston["data"]
print(X.shape)
X[0]

(506, 13)


array([  6.32000000e-03,   1.80000000e+01,   2.31000000e+00,
         0.00000000e+00,   5.38000000e-01,   6.57500000e+00,
         6.52000000e+01,   4.09000000e+00,   1.00000000e+00,
         2.96000000e+02,   1.53000000e+01,   3.96900000e+02,
         4.98000000e+00])

In [4]:
y = boston["target"]
print(y.shape)
y[0:10]

(506,)


array([ 24. ,  21.6,  34.7,  33.4,  36.2,  28.7,  22.9,  27.1,  16.5,  18.9])

In [5]:
import pandas as pd

data = pd.DataFrame(X, columns=boston["feature_names"])
data["PRICE"] = y

print(data.shape)
data.head()

(506, 14)


Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


### Gradient Descent

In [6]:
x1 = data["CRIM"].values
x2 = data["ZN"].values
x3 = data["INDUS"].values
x4 = data["CHAS"].values
x5 = data["NOX"].values
x6 = data["RM"].values
x7 = data["AGE"].values
x8 = data["DIS"].values
x9 = data["RAD"].values
x10 = data["TAX"].values
x11 = data["PTRATIO"]
x12 = data["B"].values
x13 = data["LSTAT"].values

In [7]:
num_epoch = 10000
learning_rate = 0.000005

w1 = np.random.uniform(low=0.0, high=1.0)
w2 = np.random.uniform(low=0.0, high=1.0)
w3 = np.random.uniform(low=0.0, high=1.0)
w4 = np.random.uniform(low=0.0, high=1.0)
w5 = np.random.uniform(low=0.0, high=1.0)
w6 = np.random.uniform(low=0.0, high=1.0)
w7 = np.random.uniform(low=0.0, high=1.0)
w8 = np.random.uniform(low=0.0, high=1.0)
w9 = np.random.uniform(low=0.0, high=1.0)
w10 = np.random.uniform(low=0.0, high=1.0)
w11 = np.random.uniform(low=0.0, high=1.0)
w12 = np.random.uniform(low=0.0, high=1.0)
w13 = np.random.uniform(low=0.0, high=1.0)

b = np.random.uniform(low=0.0, high=1.0)

for epoch in range(num_epoch):
    y_predict = x1 * w1 + \
                x2 * w2 + \
                x3 * w3 + \
                x4 * w4 + \
                x5 * w5 + \
                x6 * w6 + \
                x7 * w7 + \
                x8 * w8 + \
                x9 * w9 + \
                x10 * w10 + \
                x11 * w11 + \
                x12 * w12 + \
                x13 * w13 + \
                b
    
    error = np.abs(y_predict - y).mean()
    if error < 5:
        break

    w1 = w1 - learning_rate * ((y_predict - y) * x1).mean()
    w2 = w2 - learning_rate * ((y_predict - y) * x2).mean()
    w3 = w3 - learning_rate * ((y_predict - y) * x3).mean()
    w4 = w4 - learning_rate * ((y_predict - y) * x4).mean()
    w5 = w5 - learning_rate * ((y_predict - y) * x5).mean()
    w6 = w6 - learning_rate * ((y_predict - y) * x6).mean()
    w7 = w7 - learning_rate * ((y_predict - y) * x7).mean()
    w8 = w8 - learning_rate * ((y_predict - y) * x8).mean()
    w9 = w9 - learning_rate * ((y_predict - y) * x9).mean()
    w10 = w10 - learning_rate * ((y_predict - y) * x10).mean()
    w11 = w11 - learning_rate * ((y_predict - y) * x11).mean()
    w12 = w12 - learning_rate * ((y_predict - y) * x12).mean()
    w13 = w13 - learning_rate * ((y_predict - y) * x13).mean()
    
    b = b - learning_rate * (y_predict - y).mean()

    if epoch % 1000 == 0:
        print("{0:5} error = {1:.5f}".format(epoch, error))
    
print("----" * 10)
print("{0:5} error = {1:.5f}".format(epoch, error))

    0 error = 516.04072
 1000 error = 8.16409
 2000 error = 6.86033
 3000 error = 6.23430
 4000 error = 5.79922
 5000 error = 5.49615
 6000 error = 5.28611
 7000 error = 5.15346
 8000 error = 5.06198
----------------------------------------
 8938 error = 4.99998


In [8]:
result = data.copy()

y_predict = x1 * w1 + \
            x2 * w2 + \
            x3 * w3 + \
            x4 * w4 + \
            x5 * w5 + \
            x6 * w6 + \
            x7 * w7 + \
            x8 * w8 + \
            x9 * w9 + \
            x10 * w10 + \
            x11 * w11 + \
            x12 * w12 + \
            x13 * w13 + \
            b

result["PRICE(predict)"] = y_predict
result.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE,PRICE(predict)
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0,25.194561
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6,25.795048
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7,27.360527
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4,28.193665
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2,27.839913


### Gradient Descent (using dot product)
  * see [this link](https://www.mathsisfun.com/algebra/matrix-multiplying.html)

In [13]:
num_epoch = 100000
learning_rate = 0.00000001

w = np.random.uniform(low=0.0, high=1.0, size=13)
b = np.random.uniform(low=0.0, high=1.0)

for epoch in range(num_epoch):
    y_predict = X.dot(w) + b

    error = np.abs(y_predict - y).mean()
    if error < 5:
        break

    w = w - learning_rate * X.T.dot((y_predict - y))
    b = b - learning_rate * (y_predict - y).mean()

    if epoch % 10000 == 0:
        print("{0:4} error = {1:.5f}".format(epoch, error))
    
print("----" * 10)
print("{0:4} error = {1:.5f}".format(epoch, error))

   0 error = 136.47206
10000 error = 5.30551
20000 error = 5.08598
----------------------------------------
29064 error = 4.99999


In [14]:
result = data.copy()

y_predict = X.dot(w) + b

result["PRICE(predict)"] = y_predict
result.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE,PRICE(predict)
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0,27.601609
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6,26.802447
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7,29.395525
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4,29.287225
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2,28.427056
