In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [4]:
train_df = pd.read_csv('../data/processed/feature_engineered_train.csv')
eval_df = pd.read_csv('../data/processed/feature_engineered_eval.csv')

In [7]:
target = 'price'
X_train = train_df.drop(columns=[target])
y_train = train_df[target]

X_eval = eval_df.drop(columns=[target])
y_eval = eval_df[target]

In [8]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_eval_scaled = scaler.transform(X_eval)

In [9]:
lr = LinearRegression()
lr.fit(X_train_scaled, y_train)
y_pred = lr.predict(X_eval_scaled)

print("Linear Regression Performance on Eval Set:")
print(f"RMSE: {np.sqrt(mean_squared_error(y_eval, y_pred))}")
print(f"MAE: {mean_absolute_error(y_eval, y_pred)}")
print(f"R^2: {r2_score(y_eval, y_pred)}")

Linear Regression Performance on Eval Set:
RMSE: 121370.7897023768
MAE: 54057.850991804415
R^2: 0.886161703938911


In [10]:
ridge = Ridge(alpha=1.0)
ridge.fit(X_train_scaled, y_train)
y_pred_ridge = ridge.predict(X_eval_scaled)
print("\nRidge Regression Performance on Eval Set:")
print(f"RMSE: {np.sqrt(mean_squared_error(y_eval, y_pred_ridge))}")
print(f"MAE: {mean_absolute_error(y_eval, y_pred_ridge)}")
print(f"R^2: {r2_score(y_eval, y_pred_ridge)}")


Ridge Regression Performance on Eval Set:
RMSE: 121373.00945970295
MAE: 54057.962933716255
R^2: 0.8861575399105965


In [11]:
lasso = Lasso(alpha=0.1)
lasso.fit(X_train_scaled, y_train)
y_pred_lasso = lasso.predict(X_eval_scaled)
print("\nLasso Regression Performance on Eval Set:")
print(f"RMSE: {np.sqrt(mean_squared_error(y_eval, y_pred_lasso))}")
print(f"MAE: {mean_absolute_error(y_eval, y_pred_lasso)}")
print(f"R^2: {r2_score(y_eval, y_pred_lasso)}")


Lasso Regression Performance on Eval Set:
RMSE: 121676.85999421311
MAE: 54442.88652670436
R^2: 0.8855868299820233


  model = cd_fast.enet_coordinate_descent(


In [12]:
elastic = ElasticNet(alpha=0.1, l1_ratio=0.5)
elastic.fit(X_train_scaled, y_train)
y_pred_elastic = elastic.predict(X_eval_scaled)
print("\nElasticNet Regression Performance on Eval Set:")
print(f"RMSE: {np.sqrt(mean_squared_error(y_eval, y_pred_elastic))}")
print(f"MAE: {mean_absolute_error(y_eval, y_pred_elastic)}")
print(f"R^2: {r2_score(y_eval, y_pred_elastic)}")


ElasticNet Regression Performance on Eval Set:
RMSE: 122236.80913599543
MAE: 54198.644603174405
R^2: 0.8845313627524546
