In [36]:
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Ridge
from sklearn.kernel_ridge import KernelRidge


num_samples = 1000
num_features = 100

# Create a sample dataset with a leading one for the bias term
X = np.random.randn(num_samples, num_features + 1)
X[:, 0] = 1.0

# Create the labels
y = np.random.randn(num_samples, 1)

# Create a feature transform that transforms the input into a vector that contains all the monomials of degree <= 3
feature_transform = PolynomialFeatures(degree=3, include_bias=False)
X_transformed = feature_transform.fit_transform(X)

print("X_transformed.shape = {}".format(X_transformed.shape))


X_transformed.shape = (1000, 182103)


In [37]:
# Benchmark transforming the features and fitting a linear regression model
%timeit feature_transform.fit_transform(X)
%timeit Ridge().fit(X_transformed, y)

346 ms ± 2.84 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.15 s ± 1.18 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [38]:
# The kernel matrix is the inner product of the transformed features
# We can create this using the original data
def kernel(X, y):
    """
    Computes the kernel matrix for the polynomial kernel of degree 3

    :param X: The original data matrix (n_samples, n_features)
    :param y: The input sample (n_features, 1)
    """
    k = x @ y
    return 1 + k + k**2 + k**3

def precompute_kernel(X):
    K = X @ X.T
    return 1 + K + K**2 + K**3

# Prec ompute the kernel matrix
alpha = 1.0
K = precompute_kernel(X)
a = np.linalg.solve(K + alpha * np.eye(K.shape[0]), y)

# Benchmark fitting a kernel ridge regression model
%timeit precompute_kernel(X)
%timeit np.linalg.solve(K + alpha * np.eye(K.shape[0]), y)

50 ms ± 8.71 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
The slowest run took 62.70 times longer than the fastest. This could mean that an intermediate result is being cached.
148 ms ± 246 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
