In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv('dataset.csv')

df = df.drop('Id', axis=1)

locations = { # Ordered them relatively to their avg prices
    'Rural' : 0,
    'Suburban' : 1,
    'Urban' : 2, 
    'Downtown' : 3
}
df['Location'] = [locations[x] for x in df['Location']]

conditions = {
    'Poor' : 0,
    'Fair' : 1,
    'Good' : 2,
    'Excellent' : 3
}
df['Condition'] = [conditions[x] for x in df['Condition']]

df['Garage'] = [int(x == 'Yes') for x in df['Garage']]

In [2]:
X = np.array(df.drop('Price', axis=1))
y = np.array(df['Price'])

# Normalization of values
def normalise(x):
    mu = x.mean()
    std = x.std(ddof=0)
    return (x - mu) / std

m = X.shape[0]
n = X.shape[1]

X[:, 0] = normalise(X[:, 0])
X[:, 3] = normalise(X[:, 3])

In [3]:
def f(w, b, x):
    return np.dot(w, x) + b

def cost(w, b):
    return ((X.dot(w) + b - y) ** 2).sum() / (2 * m)

def gradient(w, b, lambda_):
    dw, db = np.zeros(n), 0
    dif = X.dot(w) + b - y
    dw = (X.T.dot(dif) + w * lambda_) / m
    db = dif.sum() / m
    return dw, db

In [4]:
def gradient_descent(iter, alpha, lambda_):
    w = np.zeros(n)
    b = 0
    for _ in range(iter):
        dw, db = gradient(w, b, lambda_)
        """
        if _ % 100 == 0:
            print(f"Iteration {_}: Current cost: {cost(w, b)}")
        """
        w -= alpha * dw
        b -= alpha * db
    return w, b

In [16]:
iter = 100001
alpha = 0.0000001
lambda_ = 0
w, b = gradient_descent(iter, alpha, lambda_)

print(f"My algos final cost: {cost(w, b)}")

My algos final cost: 38220428901.81065


In [7]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

model = LinearRegression()
model.fit(X, y)
y_pred = model.predict(X)
mse = mean_squared_error(y, y_pred)

print(f"Cost of scikit learn's is {mse / 2}")

Cost of scikit learn's is 38034985140.22446
