In [None]:
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Ridge
from sklearn.kernel_ridge import KernelRidge


num_samples = 1000
num_features = 100

# Create a sample dataset with a leading one for the bias term
X = np.random.randn(num_samples, num_features + 1)
X[:, 0] = 1.0

# Create the labels
y = np.random.randn(num_samples, 1)

# Create a feature transform that transforms the input into a vector that contains all the monomials of degree <= 3
feature_transform = PolynomialFeatures(degree=3, include_bias=False)
X_transformed = feature_transform.fit_transform(X)

print("X_transformed.shape = {}".format(X_transformed.shape))


In [None]:
# Benchmark transforming the features and fitting a linear regression model
%timeit feature_transform.fit_transform(X)
%timeit Ridge().fit(X_transformed, y)

In [None]:
# The kernel matrix is the inner product of the transformed features
# We can create this using the original data
def kernel(x, y):
    """
    Computes the kernel matrix for the polynomial kernel of degree 3

    :param x: The original data matrix (n_samples, n_features)
    :param y: The input sample (n_features, 1)
    """
    k = x @ y
    return 1 + k + k**2 + k**3

def precompute_kernel(X):
    K = X @ X.T
    return 1 + K + K**2 + K**3

# Prec ompute the kernel matrix
alpha = 1.0
K = precompute_kernel(X)
a = np.linalg.solve(K + alpha * np.eye(K.shape[0]), y)

# Benchmark fitting a kernel ridge regression model
%timeit precompute_kernel(X)
%timeit np.linalg.solve(K + alpha * np.eye(K.shape[0]), y)

# Comparing Ridge Regression vs. Kernel Ridge Regression

We saw above that kernels can reduce the complexity of the model, leading to faster training and better generalization. Let's compare both approaches on the digits dataset.

In [None]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the digits dataset
housing = load_digits()
X = housing['data']
y = housing['target']

# Split the dataset into a training and a test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# Fit a kernel ridge regression model
alpha = 1.0
gamma = 1.0
krr = KernelRidge(alpha=alpha, kernel='poly', gamma=gamma)
krr.fit(X_train, y_train)

# Predict the labels
y_pred = krr.predict(X_test)

# Compute the accuracy
accuracy = accuracy_score(y_test, y_pred.round())

print("Kernel Ridge Accuracy = {:.2f}%".format(accuracy * 100))

# Fit a ridge regression model
ridge = Ridge(alpha=alpha)
ridge.fit(X_train, y_train)

# Predict the labels
y_pred = ridge.predict(X_test)

# Compute the accuracy
accuracy = accuracy_score(y_test, y_pred.round())

print("Ridge Regression Accuracy = {:.2f}%".format(accuracy * 100))