In [1]:
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
import numpy as np
import plotly.graph_objs as go

In [2]:
def true_fun(X):
    return np.cos(1.5 * np.pi * X)

In [3]:
# Generate data
np.random.seed(0)
n_samples = 30
X = np.sort(np.random.rand(n_samples))
y = true_fun(X) + np.random.randn(n_samples) * 0.1
# plot the data
fig = go.Figure()
fig.add_trace(go.Scatter(x=X, y=y, mode='markers',
              name='Samples', marker=dict(size=8, color='blue')))
fig.add_trace(go.Scatter(x=X, y=true_fun(X),
                mode='lines', name='True function', line=dict(dash='dash')))
fig.show()

degrees = [1,3,  4, 6, 10, 15]
# Initialize Plotly figure
fig = go.Figure()

In [5]:
# Test different polynomial degrees
for degree in degrees:
    # Define pipeline
    polynomial_features = PolynomialFeatures(degree=degree, include_bias=False)
    linear_regression = LinearRegression()
    pipeline = Pipeline([("polynomial_features", polynomial_features),
                         ("linear_regression", linear_regression)])

    # Fit the model
    pipeline.fit(X[:, np.newaxis], y)

    # Evaluate using cross-validation
    scores = cross_val_score(
        pipeline, X[:, np.newaxis], y, scoring="neg_mean_squared_error", cv=10)

    print(f"Cross-validation scores (MSE) for degree {degree}: {-scores}")
    print(f"Mean MSE for degree {degree}: {-scores.mean():.4f}")
    print(f"Standard deviation of MSE for degree {degree}: {scores.std():.4f}")

    # Generate test data for plotting
    X_test = np.linspace(0, 1, 100)
    y_pred = pipeline.predict(X_test[:, np.newaxis])

    # Add model prediction to plot
    fig.add_trace(go.Scatter(x=X_test, y=y_pred, mode='lines',
                  name=f'Degree {degree} (MSE={-scores.mean():.2e})'))

Cross-validation scores (MSE) for degree 1: [1.15358906 0.19957154 0.0442151  0.36298787 0.27884536 0.29893644
 0.17598593 0.00857039 0.24728522 1.3073027 ]
Mean MSE for degree 1: 0.4077
Standard deviation of MSE for degree 1: 0.4255
Cross-validation scores (MSE) for degree 3: [0.02152603 0.00694444 0.02748351 0.03476942 0.00459724 0.00417486
 0.01048676 0.04528292 0.01378305 0.01843119]
Mean MSE for degree 3: 0.0187
Standard deviation of MSE for degree 3: 0.0130
Cross-validation scores (MSE) for degree 4: [0.25174939 0.04193604 0.02725282 0.02934695 0.00485939 0.00487645
 0.01858739 0.03843547 0.01218562 0.00285797]
Mean MSE for degree 4: 0.0432
Standard deviation of MSE for degree 4: 0.0708
Cross-validation scores (MSE) for degree 6: [0.86696496 0.04987463 0.02220756 0.02951756 0.00896087 0.00950919
 0.03294576 0.03444008 0.01234849 0.07667135]
Mean MSE for degree 6: 0.1143
Standard deviation of MSE for degree 6: 0.2516
Cross-validation scores (MSE) for degree 10: [1.51621141e+04 1.7

In [6]:
fig.add_trace(go.Scatter(x=X_test, y=true_fun(X_test),
              mode='lines', name='True function', line=dict(dash='dash')))
fig.add_trace(go.Scatter(x=X, y=y, mode='markers',
              name='Samples', marker=dict(size=8, color='blue')))


fig.show()