In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error, r2_score

# Load CSV
df = pd.read_csv("../housing.csv")
df.dropna(inplace=True)

# Use only one numeric feature: median_income
X = df[["median_income"]]
y = df["median_house_value"]

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build and train polynomial regression model
model = make_pipeline(PolynomialFeatures(degree=2), LinearRegression())
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Polynomial Regression (median_income only):")
print(f"  MSE: {mse:.2f}")
print(f"  R² Score: {r2:.4f}")


Polynomial Regression (median_income only):
  MSE: 7166094383.14
  R² Score: 0.4760
