In [2]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import numpy as np

# Example synthetic data (replace with your actual data)
np.random.seed(42)
X = np.random.rand(100, 1) * 10
y = 2 * X.squeeze() ** 2 + 3 * X.squeeze() + 5 + np.random.randn(100) * 10

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create pipeline: scale -> polynomial transform -> linear regression
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('poly', PolynomialFeatures(degree=2)),
    ('linreg', LinearRegression())
])

# Fit the model on training data
pipeline.fit(X_train, y_train)

# Predict and calculate R² score on test data
y_pred = pipeline.predict(X_test)
r2 = r2_score(y_test, y_pred)

print(f"R² score on test data: {r2:.4f}")


R² score on test data: 0.9869
