In [2]:
import numpy as np
import pandas as pd

## Kernels

$$k(\cdot|x_1) = \sigma(y_{i}) + 0.1 \cdot x_1$$

$y_{i} \sim \mathcal{N}(2x_1, 3x_1)$

$$k(\cdot|x_2) = \frac{1}{\sqrt{y_{i}}} \cdot \text{abs}(x_2)$$

$y_{i} \sim \mathcal{N}(x_2^2, 3x_2)$

In [18]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [None]:
def generate_kernel_data(alpha1, beta1, alpha2, beta2, num_samples=100, samples_per_kernel=4):
    
    data_list = []
    
    for i in range(num_samples):
        
        #  Sample Seeds (x1 and x2) 
        x1 = np.random.normal(loc=alpha1, scale=beta1)
        x2 = np.random.normal(loc=alpha2, scale=beta2)
        
        # Process x1 Kernel 
        
        # Parameters for the N(mu, sigma) distribution for x1
        mu_1 = 2 * x1
        sigma_1 = abs(3 * x1) 
        
        # Sample 4 values (y) from the distribution
        y_samples_1 = np.random.normal(loc=mu_1, scale=sigma_1, size=samples_per_kernel)
        
        # Apply the kernel function
        k_values_1 = sigmoid(y_samples_1) + 0.1 * x1
        
        # Process x2 Kernel 
        
        # Parameters for the N(mu, sigma) distribution for x2
        mu_2 = x2**2
        sigma_2 = abs(3 * x2)
        
        # Sample 4 values (y) from the distribution
        y_samples_2 = np.random.normal(loc=mu_2, scale=sigma_2, size=samples_per_kernel)
        
        # This replaces any non-positive y with a small positive number (1e-6)
        y_samples_2[y_samples_2 <= 0] = 1e-6 
        
        # Apply the kernel function: (1 / sqrt(y)) * abs(x2)
        k_values_2 = (1 / np.sqrt(y_samples_2)) * abs(x2)
        
        row_data = {
            'x1_seed': x1,
            'x2_seed': x2,
        }
        
        for j in range(samples_per_kernel):
            row_data[f'k1_output_{j+1}'] = k_values_1[j]
            row_data[f'k2_output_{j+1}'] = k_values_2[j]
            
        data_list.append(row_data)

    df = pd.DataFrame(data_list)
    return df

In [15]:
final_data = generate_kernel_data(alpha1=5, beta1=10, alpha2=15, beta2=20, num_samples=100, samples_per_kernel=4)

In [19]:
final_data.head()

Unnamed: 0,x1_seed,x2_seed,k1_output_1,k2_output_1,k1_output_2,k2_output_2,k1_output_3,k2_output_3,k1_output_4,k2_output_4
0,16.974569,-8.726524,2.697454,1.042234,2.697457,0.861392,1.697462,1.066583,2.697457,0.81843
1,-8.55055,15.657805,0.140813,0.856635,-0.855055,0.959655,-0.855055,0.906347,-0.855055,0.887822
2,7.493067,15.674365,0.76073,0.982754,1.749302,1.141796,0.74992,0.887147,1.749302,0.964876
3,1.156695,17.911992,1.102111,0.847279,1.112339,0.993034,1.091787,0.855107,0.751819,1.01522
4,-9.550308,-5.750766,-0.955024,1.023174,0.044969,0.951915,-0.949947,0.882685,-0.948986,0.688759


In [20]:
# use different distribution