In [1]:
# Question 3: Advanced Model Evaluation with Feature Selection for House Prices

# Step 1: Load a house prices dataset from CSV (Assume you have a house_prices.csv ).
# Step 2: Apply feature selection and create a train-test split.
# Step 3: Train a Lasso Regression model.
# Step 4: Perform model evaluation and hyperparameter tuning using GridSearchCV.

In [4]:
import pandas as pd
import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectFromModel
from sklearn.metrics import mean_squared_error, r2_score

# Step 1: Create synthetic house prices dataset
X, y = make_regression(n_samples=500, n_features=15, n_informative=10, noise=25, random_state=1)
df = pd.DataFrame(X, columns=[f"Feature_{i}" for i in range(X.shape[1])])
df["Price"] = y

# Step 2: Split data into train and test sets
X = df.drop("Price", axis=1)
y = df["Price"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Build pipeline with feature selection and Lasso regression
pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("feature_selector", SelectFromModel(Lasso(alpha=0.1))),
    ("regressor", Lasso())
])

# Step 4: Hyperparameter tuning with GridSearchCV
param_grid = {
    "regressor__alpha": [0.01, 0.1, 1, 10, 100]
}

grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

# Evaluate best model
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

print("Best Alpha:", grid_search.best_params_['regressor__alpha'])
print("Test MSE:", mean_squared_error(y_test, y_pred))
print("Test R² Score:", r2_score(y_test, y_pred))


Best Alpha: 0.1
Test MSE: 610.4549093291427
Test R² Score: 0.9769422309236729
