In [4]:
import numpy as np

    
class RBFEncoder:
    """Radial Basis Function (RBF) Encoder

    This class implements a radial basis function (RBF) encoding for transforming
    a multi-dimensional input array into a new representation in a higher-dimensional
    space. The RBF encoding is computed as the weighted sum of Gaussian functions
    centered at a specified set of basis functions.

    Parameters
    ----------
    centers : array-like, shape (n_centers, n_features)
        The centers of the basis functions.
        
    sigma : float, optional (default=1.0)
        The width of the Gaussian functions.
        
    Attributes
    ----------
    centers_ : array, shape (n_centers, n_features)
        The centers of the basis functions.
        
    sigma_ : float
        The width of the Gaussian functions.
        
    """
    
    def __init__(self, centers, sigma=1.0):
        self.centers = centers
        self.sigma = sigma
        
    def transform(self, X):
        """Apply the radial basis function encoding to a data array

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The input data array to be transformed.

        Returns
        -------
        encoding : array, shape (n_samples, n_centers)
            The radial basis function encoding of the input data.
        """

        n_samples, n_features = X.shape
        n_centers = self.centers.shape[0]
        encoding = np.zeros((n_samples, n_centers))

        for i in range(n_samples):
            for j in range(n_centers):
                diff = X[i] - self.centers[j]
                encoding[i, j] = np.exp(- np.dot(diff, diff) / (2 * self.sigma**2))

        return encoding


In [5]:
import numpy as np

# Define the centers of the basis functions
centers = np.array([[0,0], [1,1], [2,2]])

# Define the width of the Gaussian functions
sigma = 1.0

# Initialize the RBFEncoder
rbf_encoder = RBFEncoder(centers, sigma)

# Define the input data
X = np.array([[-1,-1], [0,0], [1,1], [2,2]])

# Transform the input data using the RBFEncoder
encoded_X = rbf_encoder.transform(X)

print("Input Data: \n", X)
print("Encoded Data: \n", encoded_X)


Input Data: 
 [[-1 -1]
 [ 0  0]
 [ 1  1]
 [ 2  2]]
Encoded Data: 
 [[3.67879441e-01 1.83156389e-02 1.23409804e-04]
 [1.00000000e+00 3.67879441e-01 1.83156389e-02]
 [3.67879441e-01 1.00000000e+00 3.67879441e-01]
 [1.83156389e-02 3.67879441e-01 1.00000000e+00]]


In [6]:
encoded_X.shape

(4, 3)

In [9]:
import numpy as np
from sklearn.datasets import load_iris

# load the iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# define the Gaussian RBF encoder
encoder = RBFEncoder(np.unique(y), sigma=1.0)

# apply the encoding to the data
encoded_data = encoder.transform(y.reshape(-1, 1))

# print the encoded data
print(encoded_data)


[[1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13

In [10]:
class GaussianRBFEncoder:
    
    """Radial Basis Function (RBF) Encoder for Categorical Data

    This class implements a radial basis function (RBF) encoding for transforming
    categorical input data into a new representation in a higher-dimensional
    space. The RBF encoding is computed as the weighted sum of Gaussian functions
    centered at a specified set of basis functions.

    Parameters
    ----------
    categories : array-like, shape (n_categories, n_features)
        The means of the basis functions, representing the categories.
        
    sigma : float, optional (default=1.0)
        The width of the Gaussian functions.
        
    Attributes
    ----------
    categories_ : array, shape (n_categories, n_features)
        The means of the basis functions, representing the categories.
        
    sigma_ : float
        The width of the Gaussian functions.
        
    """
    def __init__(self, categories = None, sigma=1.0):
        self.categories = categories
        self.sigma = sigma
        
    def transform(self, X):
        """Apply the radial basis function encoding to categorical data

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The input categorical data array to be transformed.

        Returns
        -------
        encoding : array, shape (n_samples, n_categories)
            The radial basis function encoding of the input categorical data.
        """
        n_samples, n_features = X.shape
        n_categories = self.categories.shape[0]
        encoding = np.zeros((n_samples, n_categories))
        
        for i in range(n_samples):
            for j in range(n_categories):
                diff = X[i] - self.categories[j]
                encoding[i, j] = np.exp(- np.dot(diff, diff) / (2 * self.sigma**2))
                
        return encoding


In [12]:
import numpy as np

class CategoricalGaussianRBFEncoder:
    def __init__(self, categories, sigma=1.0):
        self.categories = categories
        self.sigma = sigma
        
    def transform(self, X):
        n_samples = X.shape[0]
        n_categories = self.categories.shape[0]
        encoding = np.zeros((n_samples, n_categories))
        
        for i in range(n_samples):
            for j in range(n_categories):
                diff = X[i] - self.categories[j]
                encoding[i, j] = np.exp(- np.dot(diff, diff) / (2 * self.sigma**2))
                
        return encoding

In [13]:
import numpy as np
from sklearn.datasets import load_iris

# load the iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# define the Gaussian RBF encoder
encoder = GaussianRBFEncoder(categories=np.unique(y), sigma=1.0)

# apply the encoding to the data
encoded_data = encoder.transform(y.reshape(-1, 1))

# print the encoded data
print(encoded_data)


[[1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13533528]
 [1.         0.60653066 0.13

In [14]:
import numpy as np

def rbf_encoding(X, categories, sigma=1):
    """
    X: array-like, shape (n_samples, n_features)
        The input data, where n_samples is the number of samples
        and n_features is the number of features.
    
    categories: list of lists
        A list of lists, where each inner list represents a unique category 
        and contains the indices of samples that belong to that category.
    
    sigma: float, optional (default=1)
        The standard deviation for the Gaussian radial basis function.
    
    Returns
    -------
    X_rbf: array-like, shape (n_samples, n_categories)
        The Gaussian radial basis function encoding of the input data.
    """
    n_samples, n_features = X.shape
    n_categories = len(categories)
    
    X_rbf = np.zeros((n_samples, n_categories))
    
    for i, category in enumerate(categories):
        category_mean = np.mean(X[category], axis=0)
        for j in range(n_samples):
            X_rbf[j, i] = np.exp(-np.linalg.norm(X[j] - category_mean) ** 2 / (2 * sigma ** 2))
    
    return X_rbf


In [15]:
import pandas as pd
import numpy as np

# Load a sample data into a Pandas dataframe
df = pd.DataFrame({
    "feature_1": ["A", "A", "B", "B", "C", "C"],
    "feature_2": [1, 2, 3, 4, 5, 6]
})

# Convert categorical features into numerical encoding
df = pd.get_dummies(df, columns=["feature_1"])

# Extract the input data as a numpy array
X = df.values

# Define a list of categories, where each inner list contains the indices of samples that belong to that category
categories = [list(df[df["feature_1_A"] == 1].index), 
              list(df[df["feature_1_B"] == 1].index), 
              list(df[df["feature_1_C"] == 1].index)]

# Compute the Gaussian radial basis function encoding
X_rbf = rbf_encoding(X, categories, sigma=1)

# Print the result
print("Original data:")
print(df)
print("RBF encoding:")
print(X_rbf)



Original data:
   feature_2  feature_1_A  feature_1_B  feature_1_C
0          1            1            0            0
1          2            1            0            0
2          3            0            1            0
3          4            0            1            0
4          5            0            0            1
5          6            0            0            1
RBF encoding:
[[8.82496903e-01 1.61634946e-02 1.47391992e-05]
 [8.82496903e-01 1.19432968e-01 8.04733010e-04]
 [1.19432968e-01 8.82496903e-01 1.61634946e-02]
 [1.61634946e-02 8.82496903e-01 1.19432968e-01]
 [8.04733010e-04 1.19432968e-01 8.82496903e-01]
 [1.47391992e-05 1.61634946e-02 8.82496903e-01]]
