# House Price Prediction using Linear Regression


In [23]:
import pandas as pd

from src.linear_regression import GDLinearRegression, LinearRegression
from sklearn.linear_model import LinearRegression as SklearnLinearRegression
from src.tester import base_test

## Load the data

In [24]:
df = pd.read_csv("data/house-price-data.csv")
X, y = df.loc[:, df.columns != "price"], df["price"]


## Preprocess the data

In [25]:
X = X.drop(["date", "street", "statezip", "country"], axis=1, inplace=False)
X = (X - X.mean()) / X.std()
X = pd.get_dummies(X, columns=["city"])
y = (y - y.mean()) / y.std()
X, y

  X = (X - X.mean()) / X.std()
  X = (X - X.mean()) / X.std()


Unnamed: 0,bathrooms,bedrooms,condition,floors,sqft_above,sqft_basement,sqft_living,sqft_lot,view,waterfront,yr_built,yr_renovated
0,-0.843112,-0.441074,-0.667040,-0.022414,-0.565162,-0.672391,-0.829881,-0.193413,-0.309161,-0.084995,-0.530956,1.221538
1,0.432754,1.759513,2.286168,0.906456,1.789365,-0.069121,1.568358,-0.161700,4.829554,-0.084995,-1.674511,-0.825604
2,-0.205179,-0.441074,0.809564,-0.951284,0.119158,-0.672391,-0.217344,-0.080969,-0.309161,-0.084995,-0.160982,-0.825604
3,0.113788,-0.441074,0.809564,-0.951284,-0.959517,1.482145,-0.144670,-0.190125,-0.309161,-0.084995,-0.261884,-0.825604
4,0.432754,0.659220,0.809564,-0.951284,-0.797135,1.051238,-0.206962,-0.121293,-0.309161,-0.084995,0.175357,1.208264
...,...,...,...,...,...,...,...,...,...,...,...,...
4595,-0.524145,-0.441074,0.809564,-0.951284,-0.367985,-0.672391,-0.653387,-0.236663,-0.309161,-0.084995,-0.564590,1.194991
4596,0.432754,-0.441074,-0.667040,0.906456,-0.425978,-0.672391,-0.705297,-0.202860,-0.309161,-0.084995,0.410795,1.225622
4597,0.432754,-0.441074,-0.667040,0.906456,1.371813,-0.672391,0.903911,-0.218438,-0.309161,-0.084995,1.285278,-0.825604
4598,-0.205179,0.659220,-0.667040,-0.951284,-0.878326,1.525235,-0.051232,-0.229139,-0.309161,-0.084995,0.108089,-0.825604


## Initialize models

In [26]:
models = {
    "GDLinearRegression": GDLinearRegression(learning_rate=0.05, threshold=1e-9),
    "LinearRegression": LinearRegression(),
    "SklearnLinearRegression": SklearnLinearRegression(),
}

## Test each model

In [27]:
for name, model in models.items():
    print(f"Testing {name}")
    base_test(model)

Testing GDLinearRegression
mse: 0.792 +/- 0.937
Testing LinearRegression
mse: 0.982 +/- 0.880
Testing SklearnLinearRegression
mse: 0.793 +/- 0.938
