# House Price Prediction using Linear Regression


In [22]:
import pandas as pd

from src.linear_regression import GDLinearRegression, LinearRegression
from sklearn.linear_model import LinearRegression as SklearnLinearRegression
from src.tester import base_test

## Load the data

In [23]:
df = pd.read_csv("data/house-price-data.csv")
X, y = df.loc[:, df.columns != "price"], df["price"]


## Preprocess the data

In [24]:
X = X.drop(["date", "street", "city", "statezip", "country"], axis=1, inplace=False)
X = (X - X.mean()) / X.std()
y = (y - y.mean()) / y.std()

## Initialize models

In [25]:
models = {
    "GDLinearRegression": GDLinearRegression(learning_rate=0.05, threshold=1e-9),
    "LinearRegression": LinearRegression(),
    "SklearnLinearRegression": SklearnLinearRegression(),
}

## Test each model

In [26]:
for name, model in models.items():
    print(f"Testing {name}")
    base_test(model)

Testing GDLinearRegression
mse: 0.789 +/- 1.448
Testing LinearRegression
mse: 1.039 +/- 1.406
Testing SklearnLinearRegression
mse: 0.789 +/- 1.448
