In [12]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [13]:
from sklearn.datasets import load_boston

boston = load_boston()

data = pd.DataFrame(boston.data, columns = boston.feature_names)
data['PRICE'] = boston.target

In [14]:
correlations = data.corr()['PRICE']

features = correlations[abs(correlations) > 0.35]

featureNames = [name for name, cor in features.iteritems()]
featureNames.pop()

print(f'Features to use : {featureNames}')

Features to use : ['CRIM', 'ZN', 'INDUS', 'NOX', 'RM', 'AGE', 'RAD', 'TAX', 'PTRATIO', 'LSTAT']


In [15]:
from sklearn.model_selection import train_test_split

X = data[featureNames]
y = data['PRICE']

featuresMean = X.mean()
featuresSigma = X.std()

X = (X - featuresMean) / featuresSigma

X_train = X.iloc[:456]
X_test = X.iloc[456:]
y_train = y.iloc[:456]
y_test = y.iloc[456:]

(m, n) = X_train.shape

lam = 10000

In [16]:
def hypothesis(theta, x) : 
    xP = np.ones(11)
    xP[1:] = x
    return np.dot(theta, xP)

In [17]:
def costFunction(theta) : 
    J, m = 0, len(X_train)
    for i in range(m) : 
        J += (hypothesis(theta, X_train.iloc[i]) - y_train[i]) ** 2
    reg = 0
    for j in range(1, n) : 
        reg += theta[j] ** 2
    J += (reg * lam)
    J = J / (2 * m)
    return J

In [18]:
def differentialCostFunction(theta, alpha, j) : 
    d = 0
    for i in range(m) : 
        if j == 0 : 
            d += (hypothesis(theta, X_train.iloc[i]) - y_train.iloc[i])
        else : 
            d += (hypothesis(theta, X_train.iloc[i]) - y_train.iloc[i]) * X_train.iloc[i][j]
    if j != 0 : 
        d += (lam * theta[j])
    return d

In [19]:
def gradientDescent() : 
    theta, alpha = np.zeros(n + 1), 0.01
    J = costFunction(theta)
    while True : 
        print(J)
        newTheta = np.zeros(n + 1)
        for j in range(n) : 
            newTheta[j] = theta[j] - (alpha / m) * differentialCostFunction(theta, alpha, j)
        newJ = costFunction(newTheta)
        if newJ >= J or abs(newJ - J) < 0.01 : 
            break
        else : 
            theta = newTheta
            J = newJ
    print(J)
    return theta

In [None]:
theta = gradientDescent()

308.1056907894736
302.5160261391971
297.6793188471298
293.21754670639405
288.93072991886197
284.71886105719744
280.53689443979647
276.36903492927456
272.2142444715617
268.07822928081794
263.9691389706344
259.8953659326794
255.86451207207747
251.8829872031419
247.9559349492108
244.08731625478723
240.28005790142035
236.5362174405616
232.8571406578313
229.24360123833773
225.69591948979414
222.21406057901316
218.79771435501576
215.44635938921903
212.15931388084059
208.93577584292393
205.77485465740497
202.6755957469264
199.63699979506725
196.65803767078506
193.73766198056842
190.87481598077554
188.06844042800523
185.3174788215475
182.62088139366793
179.97760812585358
177.38663100812224
174.84693571066447
172.3575227996825
169.91740860008352
167.52562578492277
165.18122375374304
162.88326884815015
160.63084444221485
158.42305093692312
156.2590056813946
154.13784283852215
152.05871320875062
150.02078402264905
148.0232387105465
146.06527665564283
144.14611293557186
142.2649780562609
140.42111

48.966046319439194
48.96520424255786
48.96437670428759
48.96356343759699
48.962764180543495
48.96197867617443
48.961206672429846
48.96044792204772
48.95970218247035
48.958969215753314
48.95824878847534
48.957540671651024
48.95684464064428
48.95616047508408
48.95548795878169
48.95482687964936
48.9541770296208
48.953538204573384
48.952910204251495
48.952292832191404
48.95168589564823
48.95108920552341
48.950502576294305
48.94992582594499
48.94935877589814
48.94880125094848
48.94825307919794
48.94771409199111
48.947184123852836
48.946663012426775
48.946150598414654
48.945646725517925
48.945151240379026
48.94466399252501
48.94418483431171
48.94371362086943
48.94325021004873
48.942794462368624
48.94234624096467
48.94190541153867
48.941471842309326
48.941045403963415
48.94062596960855
48.940213414726365
48.93980761712693
48.93940845690375
48.93901581639006
48.93862958011551
48.93824963476403
48.93787586913256
48.93750817409012
48.93714644253829
48.936790569372
48.93644045144141
48.9360959875

In [None]:
theta

In [None]:
v = 102

print(hypothesis(theta, X_train.iloc[v]), y_train.iloc[v])