In [1]:
import numpy as np

In [2]:
import pandas as pd

In [3]:
import matplotlib as plt

In [4]:
from sklearn.linear_model import LinearRegression

In [5]:
def price_prediction(coefficients,params): ## A coefficients calculated based on the dataset
    return np.dot(coefficients,params)     ## parameters = [area,bedrooms,bathrooms]
                                           ## return a house price for the predefined parameters based on the coefficients

In [6]:
housing = pd.read_csv("Housing.csv")

In [7]:
housing.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [8]:
y_values = housing["price"].values ## dependendent variable

In [9]:
A = housing[["area","bedrooms","bathrooms"]].values ## design matrix

In [10]:
## assume linear dependence y_values = A.coefficients

In [11]:
## calculate coefficients manually using pseudoinverse matrix
coefficients = np.matmul(np.linalg.pinv(A),y_values)

In [12]:
coefficients ## calculated parameters for the dataset predicting house price

array([  3.72448352e+02,   3.68974672e+05,   1.37031315e+06])

In [13]:
def mean_squared_error(actual_y,assumed_y):
    return np.sum((actual_y-assumed_y)**2)/len(actual_y)

In [14]:
#### make a one step of the gradient descent

In [15]:
coefficients_assumption = np.array([1792.5,0,0]) 

In [16]:
y_predicted = np.matmul(A,coefficients_assumption)

In [17]:
cost = mean_squared_error(y_values,y_predicted)

In [18]:
## define a gradient descent rate
descent_rate = 0.001

In [19]:
coefficients_first_step = coefficients - descent_rate * 2 * np.matmul(A.T,y_values-y_predicted)

In [20]:
coefficients_first_step

array([  3.18879135e+10,   1.47257369e+07,   7.51407006e+06])

In [21]:
regression = LinearRegression()

In [22]:
regression.fit(A,y_values)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [23]:
pandas_coefficient = regression.coef_

In [24]:
pandas_coefficient

array([  3.78762754e+02,   4.06820034e+05,   1.38604950e+06])

In [25]:
#### conclusion

In [26]:
print(f"manually calculated coefficient {coefficients}")

manually calculated coefficient [  3.72448352e+02   3.68974672e+05   1.37031315e+06]


In [27]:
print(f"one step gradient descent {coefficients_first_step}")

one step gradient descent [  3.18879135e+10   1.47257369e+07   7.51407006e+06]


In [28]:
print(f"pandas coefficient {pandas_coefficient}")

pandas coefficient [  3.78762754e+02   4.06820034e+05   1.38604950e+06]


In [29]:
pd.DataFrame({"manual":coefficients,"pandas":pandas_coefficient,"gradient descent(first step)":coefficients_first_step})

Unnamed: 0,gradient descent(first step),manual,pandas
0,31887910000.0,372.4484,378.7628
1,14725740.0,368974.7,406820.0
2,7514070.0,1370313.0,1386049.0
