In [1]:
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression

X, y = make_classification(n_samples=1000, n_features=4, n_informative=4, n_redundant=0, random_state=42)

In [9]:
import cupy as cp 

class LogisticRegressionGPU:
    def __init__(self, learning_rate=0.01, n_iterations=1000):
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.weights = None
        self.bias = None
        
    def sigmoid(self, z):
        return 1 / (1 + cp.exp(-z))
    
    def fit(self, X, y):
        n_samples, n_features = X.shape
        
        # Initialize parameters
        self.weights = cp.zeros(n_features)
        self.bias = cp.float64(0)
        
        # Gradient descent
        for _ in range(self.n_iterations):
            # Forward pass
            linear_model = cp.dot(X, self.weights) + self.bias
            y_predicted = self.sigmoid(linear_model)
            
            # Compute gradients
            dw = (1 / n_samples) * cp.dot(X.T, (y_predicted - y))
            db = (1 / n_samples) * cp.sum(y_predicted - y)
            
            # Update parameters
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db
    
    def predict(self, X):
        linear_model = cp.dot(X, self.weights) + self.bias
        y_predicted = self.sigmoid(linear_model)
        return cp.where(y_predicted >= 0.5, 1, 0)
    
    def predict_proba(self, X):
        linear_model = cp.dot(X, self.weights) + self.bias
        y_predicted = self.sigmoid(linear_model)
        y_predicted = cp.column_stack((y_predicted, 1-y_predicted))
        return y_predicted

In [16]:
model2 = LogisticRegressionGPU(learning_rate=0.1, n_iterations=1000)
model2.fit(cp.array(X), cp.array(y))


def compute_decision_boundary_points_gpu(model, X, resolution=100, epsilon=0.01):
    n_features = X.shape[1]
    boundary_points = cp.zeros((1,n_features))
    
    grid = cp.zeros((resolution ** n_features, n_features))
    for i, feature in enumerate(range(n_features)):
        cp_array = cp.linspace(X[:, feature].min() - 1, X[:, feature].max() + 1, resolution ** (n_features)).reshape(-1)
        grid[:, i] = cp_array

    print(grid.shape)
    Z = cp.asarray(model.predict(grid))

    for i in range(len(grid) - 1):
        # Compute broadcasted points (difference between grid[i,:] and all other points)
        broadcasted_pts = grid[i, :] - grid  # This creates a matrix of differences

        # Compute the row-wise norm of broadcasted points
        norm_broadcasted_pts = cp.linalg.norm(broadcasted_pts, axis=1)

        # Compute the difference in predictions
        Z_vec = Z[i] - Z

        # Create a mask where the norm is less than epsilon and Z_vec equals -1
        mask = (norm_broadcasted_pts < epsilon) & Z_vec != 0

        # Find the indices where the mask is true
        masked_indices = cp.where(mask)[0]  # Use [0] to extract the array of indices from the tuple

        # Compute new points as the midpoint between grid[i,:] and the masked points
        new_pts = (grid[i, :] + grid[masked_indices]) / 2

        # Append new points to the boundary_points array
        boundary_points = cp.append(boundary_points, new_pts, axis=0)
    
    boundary_points = boundary_points[1:,:]
    return boundary_points

n_features = X.shape[1]
total_boundary_pts = cp.zeros((1,n_features))

# Compute decision boundary points considering all features
boundary_points = compute_decision_boundary_points_gpu(model2, X, resolution=15, epsilon=0.1)

# Print the decision boundary points
print("Decision Boundary Points (All Features):")
print(boundary_points)
print(boundary_points.shape)

(50625, 4)
Decision Boundary Points (All Features):
[[-0.59532468 -0.34078832 -0.32697917  0.25252095]
 [-0.5952216  -0.3406864  -0.32686877  0.25263694]
 [-0.59511852 -0.34058447 -0.32675838  0.25275294]
 ...
 [-0.54811423 -0.29410767 -0.27641714  0.3056459 ]
 [-0.54801115 -0.29400575 -0.27630674  0.3057619 ]
 [-0.54790807 -0.29390382 -0.27619635  0.30587789]]
(53592, 4)


In [17]:
model2.predict_proba(boundary_points)

array([[0.49341902, 0.50658098],
       [0.49344773, 0.50655227],
       [0.49347644, 0.50652356],
       ...,
       [0.50656906, 0.49343094],
       [0.50659777, 0.49340223],
       [0.50662648, 0.49337352]])

In [20]:
from scipy.interpolate import RBFInterpolator 
print(boundary_points.shape)
n_features = X.shape[1]
X_vals, y_vals = boundary_points[:,0:n_features-1], cp.reshape(boundary_points[:,-1], (-1,1)) 
print(y_vals.shape)
interpolator = RBFInterpolator(X_vals.get(), y_vals.get(), kernel='cubic', smoothing=1e-12)

(53592, 4)
(53592, 1)


MemoryError: (unable to allocate 18446744072287115536 bytes)