In [1]:
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression

In [18]:
import numpy as np 
X, y = make_classification(n_samples=1000, n_features=4, n_informative=4, n_redundant=0, random_state=42)
 
# Train a Logistic Regression model (or any other classifier)
model = LogisticRegression()
model.fit(X, y)

In [None]:
def compute_decision_boundary_points_cpu(model, X, resolution=100, epsilon=0.01):
    """
    Compute decision boundary points in the high-dimensional feature space.
    Args:
        model: Trained classifier.
        X: Input data (n_samples, n_features).
        resolution: Number of points to sample along each feature axis.
        epsilon: Small step size to detect class changes.
    Returns:
        boundary_points: Array of points near the decision boundary.
    """
    n_features = X.shape[1]
    boundary_points = []
 
    # Create a grid for all features
    grid = np.zeros((resolution ** n_features, n_features))
    for i, feature in enumerate(range(n_features)):
        np_array = np.linspace(X[:, feature].min() - 1, X[:, feature].max() + 1, resolution ** (n_features)).reshape(-1)
        grid[:, i] = np_array 

    # Predict the class for each point in the grid
    Z = model.predict(grid)
 
    # Find points near the decision boundary
    for i in range(len(grid) - 1):
        for j in range(i + 1, len(grid)):
            if np.linalg.norm(grid[i] - grid[j]) < epsilon and Z[i] != Z[j]:
                boundary_points.append((grid[i] + grid[j]) / 2)  # Midpoint between two classes
 
    return np.array(boundary_points)

In [3]:
# Compute decision boundary points considering all features
boundary_points = compute_decision_boundary_points_cpu(model, X, resolution=12, epsilon=0.1)
 
# Print the decision boundary points
print("Decision Boundary Points (All Features):")
print(boundary_points)
print(boundary_points.shape)

Decision Boundary Points (All Features):
[[-0.59537255 -0.34083565 -0.32703044  0.25246708]
 [-0.59512088 -0.34058681 -0.3267609   0.25275028]
 [-0.59486922 -0.34033797 -0.32649137  0.25303347]
 ...
 [-0.54906595 -0.29504871 -0.27743642  0.30457496]
 [-0.54881428 -0.29479987 -0.27716689  0.30485815]
 [-0.54856262 -0.29455102 -0.27689736  0.30514134]]
(4465, 4)


In [4]:
print("Decision Boundary Points (Probability Across Both classes):")
model.predict_proba(boundary_points) 

Decision Boundary Points (Probability Across Both classes):


array([[0.50653839, 0.49346161],
       [0.50646862, 0.49353138],
       [0.50639885, 0.49360115],
       ...,
       [0.49369896, 0.50630104],
       [0.49362919, 0.50637081],
       [0.49355942, 0.50644058]])

In [29]:
from scipy.interpolate import griddata

n_features = X.shape[1]
print(n_features)
ranges = [np.linspace(X[:, j].min() - 1, X[:, j].max() + 1, 100) for j in range(n_features)]
grids = np.meshgrid(*ranges)
new_points = np.random.rand(1000, n_features-1)
X_vals, y_vals = boundary_points[:,0:n_features-1], boundary_points[:,-1] 

interpolated_values = griddata(X_vals, y_vals, new_points, method='cubic')

4


ValueError: Unknown interpolation method 'cubic' for 3 dimensional data

In [2]:
from scipy.interpolate import RBFInterpolator 
print(boundary_points.shape)
n_features = X.shape[1]
X_vals, y_vals = boundary_points[:,0:n_features-1], np.reshape(boundary_points[:,-1], (-1,1)) 
print(y_vals.shape)
interpolator = RBFInterpolator(X_vals, y_vals, kernel='cubic', smoothing=1e-12)

NameError: name 'boundary_points' is not defined

In [31]:
n_features = X.shape[1]
ranges = [np.linspace(X[:, j].min() - 1, X[:, j].max() + 1, 15) for j in range(n_features)]
grids = np.meshgrid(*ranges) 

grid_points = np.vstack([g.ravel() for g in grids]).T

In [32]:
print(grid_points.shape)

(50625, 4)


In [33]:
eval_coords = grid_points[:,0:n_features-1]
print(eval_coords.shape)
eval_values = interpolator(eval_coords)
add_boundary_pts = np.hstack((eval_coords, eval_values))
print(eval_coords.shape)
print(eval_values.shape)
print(add_boundary_pts.shape)

(50625, 3)
(50625, 3)
(50625, 1)
(50625, 4)


In [34]:
model.predict_proba(add_boundary_pts)

array([[0.99712035, 0.00287965],
       [0.99712035, 0.00287965],
       [0.99712035, 0.00287965],
       ...,
       [0.00324154, 0.99675846],
       [0.00324154, 0.99675846],
       [0.00324154, 0.99675846]])