## Boston Housing Price Prediction 

#### via Single Layer Perceptron

### Import Packages

In [1]:
from sklearn.datasets import load_boston
import numpy as np
import pandas as pd

### Loading Datasets

In [2]:
boston = load_boston()


In [3]:
boston.keys()


dict_keys(['data', 'target', 'feature_names', 'DESCR', 'filename'])

 :Attribute Information (in order):
- ```CRIM```:     per capita crime rate by town
- ```ZN```:       proportion of residential land zoned for lots over 25,000 sq.ft.
- ```INDUS```:    proportion of non-retail business acres per town
- ```CHAS```:     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
- ```NOX```:      nitric oxides concentration (parts per 10 million)
- ```RM```:       average number of rooms per dwelling
- ```AGE```:      proportion of owner-occupied units built prior to 1940
- ```DIS```:      weighted distances to five Boston employment centres
- ```RAD```:      index of accessibility to radial highways
- ```TAX```:      full-value property-tax rate per $10,000$
- ```PTRATIO```:  pupil-teacher ratio by town
- ```B```:        1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town
- ```LSTAT```:    % lower status of the population
- ```MEDV```:     Median value of owner-occupied homes in $1000's

In [4]:
for key in boston.keys():
    print(key)
    print(boston[key])
    print('\n')
    

data
[[6.3200e-03 1.8000e+01 2.3100e+00 ... 1.5300e+01 3.9690e+02 4.9800e+00]
 [2.7310e-02 0.0000e+00 7.0700e+00 ... 1.7800e+01 3.9690e+02 9.1400e+00]
 [2.7290e-02 0.0000e+00 7.0700e+00 ... 1.7800e+01 3.9283e+02 4.0300e+00]
 ...
 [6.0760e-02 0.0000e+00 1.1930e+01 ... 2.1000e+01 3.9690e+02 5.6400e+00]
 [1.0959e-01 0.0000e+00 1.1930e+01 ... 2.1000e+01 3.9345e+02 6.4800e+00]
 [4.7410e-02 0.0000e+00 1.1930e+01 ... 2.1000e+01 3.9690e+02 7.8800e+00]]


target
[24.  21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 15.  18.9 21.7 20.4
 18.2 19.9 23.1 17.5 20.2 18.2 13.6 19.6 15.2 14.5 15.6 13.9 16.6 14.8
 18.4 21.  12.7 14.5 13.2 13.1 13.5 18.9 20.  21.  24.7 30.8 34.9 26.6
 25.3 24.7 21.2 19.3 20.  16.6 14.4 19.4 19.7 20.5 25.  23.4 18.9 35.4
 24.7 31.6 23.3 19.6 18.7 16.  22.2 25.  33.  23.5 19.4 22.  17.4 20.9
 24.2 21.7 22.8 23.4 24.1 21.4 20.  20.8 21.2 20.3 28.  23.9 24.8 22.9
 23.9 26.6 22.5 22.2 23.6 28.7 22.6 22.  22.9 25.  20.6 28.4 21.4 38.7
 43.8 33.2 27.5 26.5 18.6 19.3 20.1 19.5 19.5

In [5]:
X = boston['data']
print(X.shape)
X[:0]


(506, 13)


array([], shape=(0, 13), dtype=float64)

In [6]:
y = boston['target']
print(y.shape)
y[:0]


(506,)


array([], dtype=float64)

In [7]:
data = pd.DataFrame(X, columns = boston['feature_names'])

data['MEDV'] = y
print(data.shape)
data



(506, 14)


Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.0900,1.0,296.0,15.3,396.90,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.90,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.90,5.33,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0.0,0.573,6.593,69.1,2.4786,1.0,273.0,21.0,391.99,9.67,22.4
502,0.04527,0.0,11.93,0.0,0.573,6.120,76.7,2.2875,1.0,273.0,21.0,396.90,9.08,20.6
503,0.06076,0.0,11.93,0.0,0.573,6.976,91.0,2.1675,1.0,273.0,21.0,396.90,5.64,23.9
504,0.10959,0.0,11.93,0.0,0.573,6.794,89.3,2.3889,1.0,273.0,21.0,393.45,6.48,22.0


## Gradient Descent

In [8]:

x1 = X[:, 0] # CRIM
x2 = X[:, 1] # ZN
x3 = X[:, 2] # INDUS
x4 = X[:, 3] # CHAS
x5 = X[:, 4] # NOX
x6 = X[:, 5] # RM
x7 = X[:, 6] # AGE
x8 = X[:, 7] # DIS
x9 = X[:, 8] # RAD
x10 = X[:, 9] # TAX
x11 = X[:, 10] # PTRATIO
x12 = X[:, 11] # B
x13 = X[:, 12] # LSTAT


In [9]:
num_epoch = 30000
learning_rate = 0.000006

w1 = np.random.uniform(low = -1.0, high = +1.0)
w2 = np.random.uniform(low = -1.0, high = +1.0)
w3 = np.random.uniform(low = -1.0, high = +1.0)
w4 = np.random.uniform(low = -1.0, high = +1.0)
w5 = np.random.uniform(low = -1.0, high = +1.0)
w6 = np.random.uniform(low = -1.0, high = +1.0)
w7 = np.random.uniform(low = -1.0, high = +1.0)
w8 = np.random.uniform(low = -1.0, high = +1.0)
w9 = np.random.uniform(low = -1.0, high = +1.0)
w10 = np.random.uniform(low = -1.0, high = +1.0)
w11 = np.random.uniform(low = -1.0, high = +1.0)
w12 = np.random.uniform(low = -1.0, high = +1.0)
w13 = np.random.uniform(low = -1.0, high = +1.0)

b = np.random.uniform(low = -1.0, high = +1.0)


for epoch in range(num_epoch):

    y_predict = (w1 * x1 + w2 * x2 + w3 * x3 + w4 * x4 + w5 * x5 
                 + w6 * x6 + w7 * x7 + w8 * x8 + w9 * x9 + w10 * x10 
                 + w11 * x11 + w12 * x12 + w13 * x13 + b)
                 
    error = np.abs(y_predict - y).mean()
    
    if error < 5 : break
    
    if epoch % 1000 == 0: 
        print(f"{epoch}, error = {error:.6f}")
    
    w1 = w1 - learning_rate * ((y_predict - y) * x1).mean()
    w2 = w2 - learning_rate * ((y_predict - y) * x2).mean()
    w3 = w3 - learning_rate * ((y_predict - y) * x3).mean()
    w4 = w4 - learning_rate * ((y_predict - y) * x4).mean()
    w5 = w5 - learning_rate * ((y_predict - y) * x5).mean()
    w6 = w6 - learning_rate * ((y_predict - y) * x6).mean()
    w7 = w7 - learning_rate * ((y_predict - y) * x7).mean()
    w8 = w8 - learning_rate * ((y_predict - y) * x8).mean()
    w9 = w9 - learning_rate * ((y_predict - y) * x9).mean()
    w10 = w10 - learning_rate * ((y_predict - y) * x10).mean()
    w11 = w11 - learning_rate * ((y_predict - y) * x11).mean()
    w12 = w12 - learning_rate * ((y_predict - y) * x12).mean()
    w13 = w13 - learning_rate * ((y_predict - y) * x13).mean()

    b = b - learning_rate * (y_predict - y).mean()

print("----" * 20)
print(f"{epoch}, error = {error:.6f}")


0, error = 311.438776
1000, error = 6.907881
2000, error = 6.575133
3000, error = 6.394085
4000, error = 6.244195
5000, error = 6.114397
6000, error = 6.002845
7000, error = 5.902888
8000, error = 5.815486
9000, error = 5.738618
10000, error = 5.671275
11000, error = 5.611451
12000, error = 5.555681
13000, error = 5.504998
14000, error = 5.459794
15000, error = 5.418834
16000, error = 5.382402
17000, error = 5.350223
18000, error = 5.320722
19000, error = 5.292859
20000, error = 5.266337
21000, error = 5.241092
22000, error = 5.217453
23000, error = 5.195458
24000, error = 5.174803
25000, error = 5.155340
26000, error = 5.137194
27000, error = 5.119929
28000, error = 5.103339
29000, error = 5.087392
--------------------------------------------------------------------------------
29999, error = 5.071926


In [10]:
print(f"w1 = {w1:.6f}, w2 = {w2:.6f}, w3 = {w3:.6f}, \n"
      f"w4 = {w4:.6f}, w5 = {w5:.6f}, w6 = {w6:.6f}, \n"
      f"w7 = {w7:.6f}, w8 = {w8:.6f}, w9 = {w9:.6f}, \n"
      f"w10 = {w10:.6f}, w11 = {w11:.6f}, w12 = {w12:.6f}, \n"
      f"w13 = {w13:.6f}, , b = {b:.6f}, error = {error:.6f}")

w1 = -0.090959, w2 = 0.115478, w3 = 0.062498, 
w4 = -0.596278, w5 = -0.964605, w6 = 0.570575, 
w7 = 0.116662, w8 = 0.074754, w9 = 0.170511, 
w10 = -0.007118, w11 = 0.510526, w12 = 0.030062, 
w13 = -0.844324, , b = 1.015451, error = 5.071926


## Gradient Descent (using dot product)

In [40]:
X = np.array([x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13])
print(X.T.shape)
print(X[1].shape)


(506, 13)
(506,)


In [37]:
w = np.array(np.random.uniform(low = -0.1, high = +0.1, size = (13,1)))
print(w.shape, y.shape)
w

(13, 1) (506,)


array([[-0.01434678],
       [ 0.0610277 ],
       [ 0.00956727],
       [-0.08912895],
       [-0.05725328],
       [-0.06408393],
       [ 0.09039182],
       [-0.04735311],
       [-0.01888419],
       [-0.05864137],
       [-0.08230922],
       [ 0.06060631],
       [-0.01754794]])

In [24]:
b = np.random.uniform(low = -0.1, high = 0.1)
b


-0.05812751323903625

In [43]:
num_epoch = 100000
learning_rate = 0.000002
num_data = X[1].shape

w = np.array(np.random.uniform(low = -0.1, high = +0.1, size = 13))
b = np.random.uniform(low = -0.1, high = 0.1)


for epoch in range(num_epoch):
   
    y_predict = X.T.dot(w)  + b
    error = np.abs(y_predict - y).mean()
    if error < 5: break
    if epoch % 10000 == 0:
        print(f"epoch = {epoch}, error = {error:.6f}")

    w = w - learning_rate * (np.dot(X, (y_predict - y))) / num_data
    b = b - learning_rate * (y_predict - y).mean()

    
print("----" * 10)
print(f"epoch = {epoch}, w = {w}, b = {b:.6f}, error = {error:.6f}")



epoch = 0, error = 25.574837
epoch = 10000, error = 5.679418
epoch = 20000, error = 5.412690
epoch = 30000, error = 5.297905
epoch = 40000, error = 5.215988
epoch = 50000, error = 5.147892
epoch = 60000, error = 5.086544
epoch = 70000, error = 5.031622
----------------------------------------
epoch = 76439, w = [-0.08729043  0.10416889 -0.03930278  0.12058797  0.08658318  0.73283842
  0.11458623  0.10990745  0.08598251 -0.00107371  0.41730775  0.03028461
 -0.79545974], b = -0.019629, error = 5.000000


In [None]:
# From Gradient Descent
# w1 = -0.042623, w2 = 0.161520, w3 = 0.241758, 
# w4 = 0.128965, w5 = -0.387602, w6 = 0.995259, 
# w7 = 0.069357, w8 = -0.921506, w9 = 0.203637, 
# w10 = -0.019277, w11 = 0.955265, w12 = 0.026578, 
# w13 = -0.881175, , b = 0.519338, error = 4.999995

## Applying to existing data

In [49]:
data['MEDV_prediction'] = X.T.dot(w) + b
data.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV,MEDV_prediction
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0,28.761153
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6,26.122719
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7,28.584515
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4,28.410057
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2,27.64617
