In [None]:
# import necessary libraries for data handling and modeling
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, r2_score

# load housing dataset
df = pd.read_csv("https://raw.githubusercontent.com/IBM/ml-learning-path-assets/master/data/predict_home_value.csv")
df = df.drop(['ID'], axis=1)

# log-transform the target variable (SALEPRICE)
df['LOG_SALEPRICE'] = np.log(df['SALEPRICE'])

# Prepare the feature matrix 'X' and the target vector 'y'
X = df[['YEARBUILT']] #'YEARBUILT' as the feature for the model
y = df['LOG_SALEPRICE']#log-transformed sale price as the target

# Polynomial Regression Pipeline
model = Pipeline(steps=[
    ('poly', PolynomialFeatures(degree=2)),
    ('scaler', StandardScaler()),
    ('regressor', Ridge(alpha=1.0))# Ridge regression to prevent overfitting
])

# split the dataset into 80% training and 20% testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#train model
model.fit(X_train, y_train)

#use the trained model to make predictions on the testing set
y_pred = model.predict(X_test)

# output metrics to terminal
print("polynomial regression")
print(f"mean qquared error: {mean_squared_error(y_test, y_pred):.2f}")
print(f"R² Score: {r2_score(y_test, y_pred):.2f}")
