In [19]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

df = pd.read_csv('dataset.csv')

df = df.drop('Id', axis=1)

locations = { # Ordered them relatively to their avg prices
    'Rural' : 0,
    'Suburban' : 1,
    'Urban' : 2, 
    'Downtown' : 3
}
df['Location'] = [locations[x] for x in df['Location']]

conditions = {
    'Poor' : 0,
    'Fair' : 1,
    'Good' : 2,
    'Excellent' : 3
}
df['Condition'] = [conditions[x] for x in df['Condition']]

df['Garage'] = [int(x == 'Yes') for x in df['Garage']]

In [31]:
X = np.array(df.drop('Price', axis=1))
y = np.array(df['Price'])

# Normalization of values
"""
def normalise(x):
    mu = x.mean()
    std = x.std(ddof=0)
    return (x - mu) / std

X[:, 0] = normalise(X[:, 0])
X[:, 3] = normalise(X[:, 3])
"""
m = X.shape[0]
n = X.shape[1]

scaler = StandardScaler()
X = scaler.fit_transform(X)

print(y.shape)
y = y.reshape(-1, 1)
y_scaler = StandardScaler()
y = y_scaler.fit_transform(y)
y = y.reshape(-1)
print(y.shape)

(2000,)
(2000,)


In [32]:
def f(w, b, x):
    return np.dot(w, x) + b

def cost(w, b):
    return ((X.dot(w) + b - y) ** 2).sum() / (2 * m)

def gradient(w, b, lambda_):
    dw, db = np.zeros(n), 0
    dif = X.dot(w) + b - y
    dw = (X.T.dot(dif) + w * lambda_) / m
    db = dif.sum() / m
    return dw, db

In [35]:
def gradient_descent(iter, alpha, lambda_):
    w = np.zeros(n)
    b = 0
    for _ in range(iter):
        dw, db = gradient(w, b, lambda_)
        if _ % 100 == 0:
            print(f"Iteration {_}: Current cost: {cost(w, b)}")
        w -= alpha * dw
        b -= alpha * db
    return w, b

In [39]:
iter = 100001
alpha = 0.001
lambda_ = 0
w, b = gradient_descent(iter, alpha, lambda_)

print(f"My algos final cost: {cost(w, b)}")

Iteration 0: Current cost: 0.5
Iteration 100: Current cost: 0.4996499505327446
Iteration 200: Current cost: 0.4993629452492883
Iteration 300: Current cost: 0.4991276140847756
Iteration 400: Current cost: 0.4989346405048991
Iteration 500: Current cost: 0.4987763900758719
Iteration 600: Current cost: 0.49864660631764396
Iteration 700: Current cost: 0.4985401616334365
Iteration 800: Current cost: 0.4984528533268094
Iteration 900: Current cost: 0.4983812365319105
Iteration 1000: Current cost: 0.49832248736687673
Iteration 1100: Current cost: 0.4982742908347181
Iteration 1200: Current cost: 0.49823474898960396
Iteration 1300: Current cost: 0.4982023056994791
Iteration 1400: Current cost: 0.4981756850012416
Iteration 1500: Current cost: 0.49815384058919693
Iteration 1600: Current cost: 0.4981359144231261
Iteration 1700: Current cost: 0.49812120280706845
Iteration 1800: Current cost: 0.49810912858849504
Iteration 1900: Current cost: 0.4980992183719837
Iteration 2000: Current cost: 0.498091083

In [40]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

model = LinearRegression()
model.fit(X, y)
y_pred = model.predict(X)
mse = mean_squared_error(y, y_pred)

print(f"Cost of scikit learn's is {mse / 2}")

Cost of scikit learn's is 0.49805375465893165
