In [1]:
from sklearn.datasets import fetch_california_housing
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [2]:
X, y = fetch_california_housing(as_frame=True, return_X_y = True)

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [4]:
lrp = Pipeline([
    ("feature_scaling", StandardScaler()),
    ("lin_reg", LinearRegression())
])

In [5]:
lrp.fit(X_train, y_train)

In [6]:
lrp.score(X_train, y_train)

0.609873031052925

In [7]:
lrp.score(X_test, y_test)

0.591050979549135

### Polynomial Regression

In [8]:
from sklearn.preprocessing import PolynomialFeatures

In [9]:
pf = PolynomialFeatures(degree=2)

In [10]:
X_train_new = pf.fit_transform(X_train)

In [11]:
X_train.shape

(15480, 8)

In [12]:
X_train_new.shape

(15480, 45)

In [13]:
pf.get_feature_names_out()

array(['1', 'MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population',
       'AveOccup', 'Latitude', 'Longitude', 'MedInc^2', 'MedInc HouseAge',
       'MedInc AveRooms', 'MedInc AveBedrms', 'MedInc Population',
       'MedInc AveOccup', 'MedInc Latitude', 'MedInc Longitude',
       'HouseAge^2', 'HouseAge AveRooms', 'HouseAge AveBedrms',
       'HouseAge Population', 'HouseAge AveOccup', 'HouseAge Latitude',
       'HouseAge Longitude', 'AveRooms^2', 'AveRooms AveBedrms',
       'AveRooms Population', 'AveRooms AveOccup', 'AveRooms Latitude',
       'AveRooms Longitude', 'AveBedrms^2', 'AveBedrms Population',
       'AveBedrms AveOccup', 'AveBedrms Latitude', 'AveBedrms Longitude',
       'Population^2', 'Population AveOccup', 'Population Latitude',
       'Population Longitude', 'AveOccup^2', 'AveOccup Latitude',
       'AveOccup Longitude', 'Latitude^2', 'Latitude Longitude',
       'Longitude^2'], dtype=object)

In [15]:
prp = Pipeline([
    ("feature_scaling", StandardScaler()),
    ("pf", PolynomialFeatures(degree=2)),
    ("lin_reg", LinearRegression())
])

In [16]:
prp.fit(X_train, y_train)

In [17]:
prp.score(X_train, y_train)

0.6829089327528395

In [18]:
prp.score(X_test, y_test)

0.65630058807575