In [115]:
import numpy as np
from sklearn.linear_model import Lasso, Ridge
class RBFLassoRegression:

    def __init__(self, rbf_dim=10, alpha=1.0, sigma=1.0, times = 1):
        """Kernel lasso regression using random Binning features.

        rbf_dim : Dimension of random feature.
        alpha :   Regularization strength. Should be a positive float.
        hash_ :   Hash function to use. Should be a function that takes a
                    vector of length D and returns a value in [0, rbf_dim).
        d_ :      p elements, each row of d_'s element represents a random vector of length D, generated from a gamma distribution
        U_ :      p elements, each row of U_'s element represents a random vector of length D, generated from a uniform distribution in [0, d] where d is the corresponding element of d_
        p :       number of times to repeat the random binning
        """
        self.fitted  = False
        self.rbf_dim = rbf_dim
        self.sigma   = sigma
        self.lm      = Ridge(alpha=alpha)
        self.d_      = None
        self.U_      = None
        self.p       = times

    def fit(self, X, y):
        """Fit model with training data X and target y.
        """
        Z, U, d = self._get_rbfs(X, return_vars=True)
        self.lm.fit(Z.T, y)
        self.U_ = U
        self.d_ = d
        self.fitted = True
        return self

    def predict(self, X):
        """Predict using fitted model and testing data X.
        """
        if not self.fitted:
            msg = "Call 'fit' with appropriate arguments first."
            raise NotFittedError(msg)
        Z = self._get_rbfs(X, return_vars=False)
        return self.lm.predict(Z.T)

    def _get_rbfs(self, X, return_vars):
        """Return random Fourier features based on data X, as well as random
        variables W and b.
        """

        N, D = X.shape
        if self.U_ is not None:
            U, d = self.U_, self.d_
        else:
            U = []
            d = []
            for i in range(self.p):
                dd = np.random.gamma(2, self.sigma, size=D)
                UU = np.random.uniform(0, dd)
                U.append(UU)
                d.append(dd)

        for i in range(self.p):
            #generate the random binning features
            m  = np.round((X - U[i])/d[i])
            #hash the random binning features from N * D to N * rbf_dim
            h = np.mod(np.sum(m, axis = 1), self.rbf_dim).astype(int)
            #change h to one-hot encoding
            Z = np.zeros((N, self.rbf_dim))
            Z[list(np.arange(N)), h] = 1
            #generate the random binning features
            if i == 0:
                Z_ = Z
            else:
                Z_ = np.hstack((Z_, Z))
        Z_ = Z_ / np.sqrt(self.p)
        Z_ = Z_.T
        if return_vars:
            return Z_, U, d
        return Z_

In [130]:
#generate the random binning features
x = np.random.normal(0, 1, size = (100, 10))
y = np.random.normal(0, 1, size = (100, 1))
r = RBFLassoRegression(rbf_dim=10, times=10)
r.fit(x, y)
print(r.predict(x))


[[-4.39640114e-01]
 [-6.66289312e-02]
 [-1.23295207e-01]
 [-1.64994041e-01]
 [ 3.28163013e-01]
 [-3.04161019e-01]
 [ 7.34659084e-01]
 [-7.85638696e-01]
 [ 2.04327480e-01]
 [ 4.73705100e-01]
 [-4.18230569e-01]
 [ 1.29214040e-01]
 [ 5.96068230e-01]
 [ 2.81453807e-01]
 [ 7.25584384e-01]
 [ 4.68768500e-01]
 [ 4.88585756e-01]
 [ 8.00207840e-02]
 [ 5.98213176e-02]
 [-6.55148236e-01]
 [ 5.90566133e-01]
 [-4.13387204e-01]
 [-3.23167112e-01]
 [ 3.98342308e-01]
 [ 6.45831750e-01]
 [-5.74826235e-01]
 [-8.87739940e-03]
 [ 2.24790004e-01]
 [-2.33588641e-01]
 [-1.15032267e+00]
 [ 4.11006720e-01]
 [ 1.25700609e-03]
 [-4.00376682e-02]
 [ 3.51984657e-01]
 [-2.54345453e-01]
 [-4.89275288e-01]
 [-2.43664517e-01]
 [ 6.33241967e-01]
 [-8.25833095e-01]
 [-3.81005770e-01]
 [ 5.41129618e-01]
 [-1.16440410e-01]
 [ 9.56682727e-01]
 [ 2.63733215e-01]
 [-5.93756038e-01]
 [-5.79382714e-02]
 [-2.60995840e-01]
 [ 4.06229307e-01]
 [ 1.98531494e-01]
 [-2.04501303e-01]
 [-6.95548426e-01]
 [ 4.86502145e-02]
 [-4.3963572

In [131]:
#check if the random binning features are correct by comparing with the result of Laplacian kernel
from sklearn.metrics.pairwise import laplacian_kernel

z = r._get_rbfs(x, False)
print(z.T@z - laplacian_kernel(x, x))

[[-1.11022302e-16 -4.11235812e-01 -4.43754489e-01 ... -1.90128658e-01
  -3.03949066e-01 -1.94107462e-01]
 [-4.11235812e-01 -1.11022302e-16 -3.51310289e-01 ... -2.24093141e-01
  -1.79317490e-01 -3.88714716e-01]
 [-4.43754489e-01 -3.51310289e-01 -1.11022302e-16 ... -8.51405604e-02
  -3.57344925e-01 -2.71796496e-01]
 ...
 [-1.90128658e-01 -2.24093141e-01 -8.51405604e-02 ... -1.11022302e-16
  -3.50560166e-01 -2.82443703e-01]
 [-3.03949066e-01 -1.79317490e-01 -3.57344925e-01 ... -3.50560166e-01
  -1.11022302e-16 -4.22153705e-01]
 [-1.94107462e-01 -3.88714716e-01 -2.71796496e-01 ... -2.82443703e-01
  -4.22153705e-01 -1.11022302e-16]]


In [132]:
laplacian_kernel(x, x)

array([[1.        , 0.61123581, 0.44375449, ..., 0.39012866, 0.30394907,
        0.29410746],
       [0.61123581, 1.        , 0.45131029, ..., 0.42409314, 0.37931749,
        0.38871472],
       [0.44375449, 0.45131029, 1.        , ..., 0.28514056, 0.35734493,
        0.2717965 ],
       ...,
       [0.39012866, 0.42409314, 0.28514056, ..., 1.        , 0.35056017,
        0.3824437 ],
       [0.30394907, 0.37931749, 0.35734493, ..., 0.35056017, 1.        ,
        0.42215371],
       [0.29410746, 0.38871472, 0.2717965 , ..., 0.3824437 , 0.42215371,
        1.        ]])

In [68]:
Z = np.zeros((100, 10))[:,:]
Z[(0,1)] = 1

Z

array([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 

In [74]:
list(zip(np.arange(10), np.arange(10)))

[(0, 0),
 (1, 1),
 (2, 2),
 (3, 3),
 (4, 4),
 (5, 5),
 (6, 6),
 (7, 7),
 (8, 8),
 (9, 9)]