# Boston House Price prediction implemented using a self-written gradient descent algorithm

In [511]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

### Fetching the dataset

In [512]:
from sklearn.datasets import load_boston

boston = load_boston()

data = pd.DataFrame(boston.data, columns = boston.feature_names)
data['PRICE'] = boston.target

### Eliminating features with low correlation value with the target

In [513]:
correlations = data.corr()['PRICE']

features = correlations[abs(correlations) > 0.2]

featureNames = [name for name, cor in features.iteritems()]
featureNames.pop()

print(f'Features to use : {featureNames}')

Features to use : ['CRIM', 'ZN', 'INDUS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT']


### Modifying the dataset for gradient descent

In [514]:
from sklearn.model_selection import train_test_split

X = data[featureNames]
y = data['PRICE']

m = X.shape[0]

# scaling the features

featuresMean = X.mean()
featuresSigma = X.std()
X = (X - featuresMean) / featuresSigma

# inserting the bias column with ones

X.insert(0, 'BIAS', np.ones(m))

n = X.shape[1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 3)

m_train = X_train.shape[0]
m_test = X_test.shape[0]

# regularization term (lambda)
lam = 100

### Hypothesis function for prediction

In [515]:
def hypothesis(theta, x) : 
    h = np.dot(x, theta)
    return h

### Cost function which calculates mean squares residual

In [516]:
def costFunction(theta) : 
    J = (1 / m_train) * (sum((hypothesis(theta, X_train) - y_train) ** 2) + (lam * sum(theta[1:] ** 2)))
    return J

### Differential of cost function to tweak the value of theta

In [517]:
def differentialCostFunction(theta) : 
    d = np.sum(((hypothesis(theta, X_train) - y_train).T * X_train.T), axis = 1) + (lam * np.append(np.ones(1), theta[1:]))
    return d

### Gradient descent which minimizes the cost of the model

In [518]:
def gradientDescent() : 
    theta, alpha = np.zeros(n), 0.01
    J = costFunction(theta)
    while True : 
        newTheta = theta - ((alpha / m_train) * differentialCostFunction(theta))
        newJ = costFunction(newTheta)
        if newJ >= J or abs(newJ - J) < 0.01 : 
            break
        else : 
            theta = newTheta
            J = newJ
    return theta

In [519]:
theta = gradientDescent()

J = costFunction(theta)

print(f'Cost of the model : {J}')

Cost of the model : 33.09182091529209


In [520]:
predictions_train = hypothesis(theta, X_train)
predictions_test = hypothesis(theta, X_test)

correct_train = len(predictions_train[abs(predictions_train - y_train) <= 5])
correct_test = len(predictions_test[abs(predictions_test - y_test) <= 5])

print(f'''
Model efficiency for the training set : {correct_train * 100 / m_train}%
Model efficiency for the testing set : {correct_test * 100 / m_test}%
''')


Model efficiency for the training set : 81.2664907651715%
Model efficiency for the testing set : 84.25196850393701%

