In [1]:
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression

X, y = make_classification(n_samples=1000, n_features=4, n_informative=4, n_redundant=0, random_state=42)

In [50]:
import cupy as cp 

class LogisticRegressionGPU:
    def __init__(self, learning_rate=0.01, n_iterations=1000):
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.weights = None
        self.bias = None
        
    def sigmoid(self, z):
        return 1 / (1 + cp.exp(-z))
    
    def fit(self, X, y):
        n_samples, n_features = X.shape
        
        # Initialize parameters
        self.weights = cp.zeros(n_features)
        self.bias = cp.float64(0)
        
        # Gradient descent
        for _ in range(self.n_iterations):
            # Forward pass
            linear_model = cp.dot(X, self.weights) + self.bias
            y_predicted = self.sigmoid(linear_model)
            
            # Compute gradients
            dw = (1 / n_samples) * cp.dot(X.T, (y_predicted - y))
            db = (1 / n_samples) * cp.sum(y_predicted - y)
            
            # Update parameters
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db
    
    def predict(self, X):
        linear_model = cp.dot(X, self.weights) + self.bias
        y_predicted = self.sigmoid(linear_model)
        return cp.where(y_predicted >= 0.5, 1, 0)
    
    def predict_proba(self, X):
        linear_model = cp.dot(X, self.weights) + self.bias
        y_predicted = self.sigmoid(linear_model)
        y_predicted = cp.column_stack((y_predicted, 1-y_predicted))
        return y_predicted

# Computational Costs with GPU 

GPU: $\textbf{Nvidia Geforce RTX 4070}$

- 25 Resolution (R = 25) 
-  13+ minute runtime

- 20 Resolution (R = 20) 
- 3 minutes 5 seconds runtime

- 18 Resolution (R = 18) 
- 54 second runtime

- 15 Resolution (R = 15) 
- 18.4 second runtime 

- 10 Resolution (R = 10) 
- 1 second runtime 

Runtime: O($R^f$) in the worst case. Broadcasting will be $O(1)$ operation due to parallelization.

In [63]:
model2 = LogisticRegressionGPU(learning_rate=0.01, n_iterations=3000)
model2.fit(cp.array(X), cp.array(y))


def compute_decision_boundary_points_gpu(model, X, resolution=100, epsilon=0.01):
    n_features = X.shape[1]
    boundary_points = cp.zeros((1,n_features))
    
    grid = cp.zeros((resolution ** n_features, n_features))
    for i, feature in enumerate(range(n_features)):
        cp_array = cp.linspace(X[:, feature].min() - 1, X[:, feature].max() + 1, resolution ** (n_features)).reshape(-1)
        grid[:, i] = cp_array

    print(grid.shape)
    Z = cp.asarray(model.predict(grid))

    for i in range(len(grid) - 1):
        # Compute broadcasted points (difference between grid[i,:] and all other points)
        broadcasted_pts = grid[i, :] - grid  # This creates a matrix of differences

        # Compute the row-wise norm of broadcasted points
        norm_broadcasted_pts = cp.linalg.norm(broadcasted_pts, axis=1)

        # Compute the difference in predictions
        Z_vec = Z[i] - Z

        # Create a mask where the norm is less than epsilon and Z_vec equals -1
        mask = (norm_broadcasted_pts < epsilon) & Z_vec != 0

        # Find the indices where the mask is true
        masked_indices = cp.where(mask)[0]  # Use [0] to extract the array of indices from the tuple

        # Compute new points as the midpoint between grid[i,:] and the masked points
        new_pts = (grid[i, :] + grid[masked_indices]) / 2

        # Append new points to the boundary_points array
        boundary_points = cp.append(boundary_points, new_pts, axis=0)
    
    boundary_points = boundary_points[1:,:]
    return boundary_points

n_features = X.shape[1]
total_boundary_pts = cp.zeros((1,n_features))

# Compute decision boundary points considering all features
boundary_points = compute_decision_boundary_points_gpu(model2, X, resolution=15, epsilon=0.1)

# Print the decision boundary points
print("Decision Boundary Points (All Features):")
print(boundary_points)
print(boundary_points.shape)

(50625, 4)
Decision Boundary Points (All Features):
[[-0.58048122 -0.32611143 -0.31108194  0.26922399]
 [-0.58037814 -0.32600951 -0.31097154  0.26933998]
 [-0.58027506 -0.32590759 -0.31086114  0.26945598]
 ...
 [-0.53327077 -0.27943078 -0.26051991  0.32234895]
 [-0.53316769 -0.27932886 -0.26040951  0.32246494]
 [-0.53306461 -0.27922694 -0.26029911  0.32258093]]
(53592, 4)


In [64]:
model2.predict_proba(boundary_points)

array([[0.49357089, 0.50642911],
       [0.49359889, 0.50640111],
       [0.49362689, 0.50637311],
       ...,
       [0.50639736, 0.49360264],
       [0.50642536, 0.49357464],
       [0.50645337, 0.49354663]])

In [65]:
from scipy.interpolate import RBFInterpolator 
print(boundary_points.shape)
n_features = X.shape[1]
X_vals, y_vals = boundary_points[:,0:n_features-1], cp.reshape(boundary_points[:,-1], (-1,1)) 
print(y_vals.shape)
interpolator = RBFInterpolator(X_vals.get(), y_vals.get(), kernel='cubic', smoothing=1e-12)

(53592, 4)
(53592, 1)


MemoryError: (unable to allocate 18446744072287115536 bytes)

In [54]:
resolution = 20
grid_points = cp.zeros((resolution ** n_features, n_features))
for i, feature in enumerate(range(n_features)):
        np_array = cp.linspace(boundary_points[:, feature].min(), boundary_points[:, feature].max(), resolution ** (n_features)).reshape(-1)
        grid_points[:, i] = np_array 

In [55]:
eval_coords = grid_points[:,0:n_features-1]
print(eval_coords.shape)
eval_values = interpolator(eval_coords.get())
add_boundary_pts = cp.hstack((eval_coords, eval_values))
print(eval_coords.shape)
print(eval_values.shape)
print(add_boundary_pts.shape)

(160000, 3)
(160000, 3)
(160000, 1)
(160000, 4)


In [56]:
arr = model2.predict_proba(add_boundary_pts)

In [57]:
average_diff = cp.mean(arr[:, 0] - arr[:, 1])
print("Average difference:", average_diff)

Average difference: -2.164031962484323e-05
