In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, SGDRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import StandardScaler


df = pd.read_csv("house_price.csv")
df.head()

X = df[['size', 'bedroom']]
y = df['price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
y_pred_lr = lr_model.predict(X_test)

mae_lr = mean_absolute_error(y_test, y_pred_lr)
mse_lr = mean_squared_error(y_test, y_pred_lr)
rmse_lr = np.sqrt(mse_lr)
mape_lr = np.mean(np.abs((y_test - y_pred_lr) / y_test)) * 100

print("Linear Regression Coefficients:", lr_model.coef_)
print("Intercept:", lr_model.intercept_)
print("MAE:", mae_lr)
print("MSE:", mse_lr)
print("RMSE:", rmse_lr)
print("MAPE:", mape_lr)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

sgd_model = SGDRegressor(max_iter=1000, tol=1e-3, random_state=42)
sgd_model.fit(X_train_scaled, y_train)
y_pred_sgd = sgd_model.predict(X_test_scaled)

mae_sgd = mean_absolute_error(y_test, y_pred_sgd)
mse_sgd = mean_squared_error(y_test, y_pred_sgd)
rmse_sgd = np.sqrt(mse_sgd)
mape_sgd = np.mean(np.abs((y_test - y_pred_sgd) / y_test)) * 100

print("SGD Coefficients:", sgd_model.coef_)
print("Intercept:", sgd_model.intercept_)
print("MAE:", mae_sgd)
print("MSE:", mse_sgd)
print("RMSE:", rmse_sgd)
print("MAPE:", mape_sgd)



Linear Regression Coefficients: [   143.21853204 -13512.56442597]
Intercept: 84763.62252219403
MAE: 72334.75360356671
MSE: 8610424544.777674
RMSE: 92792.37331148327
MAPE: 17.46051927831933
SGD Coefficients: [106535.91023723 -10274.95128909]
Intercept: [323155.83200156]
MAE: 72124.60655610403
MSE: 8595003325.390148
RMSE: 92709.24077668929
MAPE: 17.400482776429413


To understand how well the models performed, we looked at a few key evaluation metrics:

1. MAE (Mean Absolute Error) shows the average difference between predicted and actual prices. It’s easy to understand because it’s expressed in dollars, and it’s not too sensitive to large errors.
2. MSE (Mean Squared Error) takes the errors and squares them, which means big mistakes have a bigger impact on the final score.
3. RMSE (Root Mean Squared Error) is just the square root of MSE. Like MAE, it’s measured in dollars, but it still emphasizes larger errors more.
4. MAPE (Mean Absolute Percentage Error) shows how far off the predictions were on average, as a percentage of the actual price. It’s great for comparing performance across different models or datasets, but it can be unreliable when the actual values are small.

In the end, both models gave very similar results. LinearRegression was simple to apply, while SGDRegressor required a bit more setup with feature scaling. However, once that was done, it performed just as well.