Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Adjust all algorithms to work with CuPy #75

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
57 changes: 36 additions & 21 deletions dask_glm/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from scipy.optimize import fmin_l_bfgs_b


from dask.array.utils import normalize_to_array
from dask_glm.utils import dot, normalize
from dask_glm.families import Logistic
from dask_glm.regularizers import Regularizer
Expand Down Expand Up @@ -97,7 +98,7 @@ def gradient_descent(X, y, max_iter=100, tol=1e-14, family=Logistic, **kwargs):
stepSize = 1.0
recalcRate = 10
backtrackMult = firstBacktrackMult
beta = np.zeros(p)
beta = np.zeros_like(X, shape=(p,))

for k in range(max_iter):
# how necessary is this recalculation?
Expand Down Expand Up @@ -161,7 +162,7 @@ def newton(X, y, max_iter=50, tol=1e-8, family=Logistic, **kwargs):
"""
gradient, hessian = family.gradient, family.hessian
n, p = X.shape
beta = np.zeros(p) # always init to zeros?
beta = np.zeros_like(X, shape=(p,)) # always init to zeros?
Xbeta = dot(X, beta)

iter_count = 0
Expand All @@ -178,8 +179,10 @@ def newton(X, y, max_iter=50, tol=1e-8, family=Logistic, **kwargs):

# should this be dask or numpy?
# currently uses Python 3 specific syntax
step, _, _, _ = np.linalg.lstsq(hess, grad)
beta = (beta_old - step)
step, _, _, _ = np.linalg.lstsq(normalize_to_array(hess), normalize_to_array(grad))
step_like = np.empty_like(X, shape=step.shape)
step_like[:] = step
beta = (beta_old - step_like)

iter_count += 1

Expand Down Expand Up @@ -225,14 +228,19 @@ def admm(X, y, regularizer='l1', lamduh=0.1, rho=1, over_relax=1,
def create_local_gradient(func):
@functools.wraps(func)
def wrapped(beta, X, y, z, u, rho):
return func(beta, X, y) + rho * (beta - z + u)
beta_like = np.empty_like(X, shape=beta.shape)
beta_like[:] = beta
return normalize_to_array(func(beta_like, X, y) + rho *
(beta_like - z + u))
return wrapped

def create_local_f(func):
@functools.wraps(func)
def wrapped(beta, X, y, z, u, rho):
return func(beta, X, y) + (rho / 2) * np.dot(beta - z + u,
beta - z + u)
beta_like = np.empty_like(X, shape=beta.shape)
beta_like[:] = beta
return normalize_to_array(func(beta_like, X, y) + (rho / 2) *
np.dot(beta_like - z + u, beta_like - z + u))
return wrapped

f = create_local_f(pointwise_loss)
Expand All @@ -252,23 +260,23 @@ def wrapped(beta, X, y, z, u, rho):
else:
yD = [y]

z = np.zeros(p)
u = np.array([np.zeros(p) for i in range(nchunks)])
betas = np.array([np.ones(p) for i in range(nchunks)])
z = np.zeros_like(X, shape=(p,))
u = np.stack([np.zeros_like(X, shape=(p,)) for i in range(nchunks)])
betas = np.stack([np.ones_like(X, shape=(p,)) for i in range(nchunks)])

for k in range(max_iter):

# x-update step
new_betas = [delayed(local_update)(xx, yy, bb, z, uu, rho, f=f,
fprime=fprime) for
xx, yy, bb, uu in zip(XD, yD, betas, u)]
new_betas = np.array(da.compute(*new_betas))
new_betas = np.stack(da.compute(*new_betas), axis=0)

beta_hat = over_relax * new_betas + (1 - over_relax) * z

# z-update step
zold = z.copy()
ztilde = np.mean(beta_hat + np.array(u), axis=0)
ztilde = np.mean(beta_hat + u, axis=0)
z = regularizer.proximal_operator(ztilde, lamduh / (rho * nchunks))

# u-update step
Expand All @@ -295,11 +303,14 @@ def local_update(X, y, beta, z, u, rho, f, fprime, solver=fmin_l_bfgs_b):
u = u.ravel()
z = z.ravel()
solver_args = (X, y, z, u, rho)
beta, f, d = solver(f, beta, fprime=fprime, args=solver_args,
beta, f, d = solver(f, normalize_to_array(beta),
fprime=fprime, args=solver_args,
maxiter=200,
maxfun=250)

return beta
beta_like = np.empty_like(X, shape=beta.shape)
beta_like[:] = beta
return beta_like


@normalize
Expand Down Expand Up @@ -335,21 +346,25 @@ def lbfgs(X, y, regularizer=None, lamduh=1.0, max_iter=100, tol=1e-4,
pointwise_gradient = regularizer.add_reg_grad(pointwise_gradient, lamduh)

n, p = X.shape
beta0 = np.zeros(p)
beta0 = np.zeros_like(X, shape=(p,))

def compute_loss_grad(beta, X, y):
loss_fn = pointwise_loss(beta, X, y)
gradient_fn = pointwise_gradient(beta, X, y)
beta_like = np.empty_like(X, shape=beta.shape)
beta_like[:] = beta
loss_fn = pointwise_loss(beta_like, X, y)
gradient_fn = pointwise_gradient(beta_like, X, y)
loss, gradient = compute(loss_fn, gradient_fn)
return loss, gradient.copy()
return normalize_to_array(loss), normalize_to_array(gradient.copy())

with dask.config.set(fuse_ave_width=0): # optimizations slows this down
beta, loss, info = fmin_l_bfgs_b(
compute_loss_grad, beta0, fprime=None,
compute_loss_grad, normalize_to_array(beta0), fprime=None,
args=(X, y),
iprint=(verbose > 0) - 1, pgtol=tol, maxiter=max_iter)

return beta
beta_like = np.empty_like(X, shape=beta.shape)
beta_like[:] = beta
return beta_like


@normalize
Expand Down Expand Up @@ -384,7 +399,7 @@ def proximal_grad(X, y, regularizer='l1', lamduh=0.1, family=Logistic,
stepSize = 1.0
recalcRate = 10
backtrackMult = firstBacktrackMult
beta = np.zeros(p)
beta = np.zeros_like(X, shape=(p,))
regularizer = Regularizer.get(regularizer)

for k in range(max_iter):
Expand Down
Empty file added dask_glm/cupy/__init__.py
Empty file.
114 changes: 114 additions & 0 deletions dask_glm/cupy/datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
import numpy as np
import cupy


def make_classification(n_samples=1000, n_features=100, n_informative=2, scale=1.0):
"""
Generate a dummy dataset for classification tasks.

Parameters
----------
n_samples : int
number of rows in the output array
n_features : int
number of columns (features) in the output array
n_informative : int
number of features that are correlated with the outcome
scale : float
Scale the true coefficient array by this

Returns
-------
X : dask.array, size ``(n_samples, n_features)``
y : dask.array, size ``(n_samples,)``
boolean-valued array

Examples
--------
>>> X, y = make_classification()
>>> X.dtype, X.shape
(dtype('float64'), (1000, 100))
>>> y.dtype, y.shape
(dtype('bool'), (1000,))
"""
X = cupy.random.normal(0, 1, size=(n_samples, n_features))
informative_idx = np.random.choice(n_features, n_informative)
beta = (cupy.random.random(n_features) - 1) * scale
z0 = X[:, informative_idx].dot(beta[informative_idx])
y = cupy.random.random(z0.shape) < 1 / (1 + np.exp(-z0))
return X, y


def make_regression(n_samples=1000, n_features=100, n_informative=2, scale=1.0):
"""
Generate a dummy dataset for regression tasks.

Parameters
----------
n_samples : int
number of rows in the output array
n_features : int
number of columns (features) in the output array
n_informative : int
number of features that are correlated with the outcome
scale : float
Scale the true coefficient array by this

Returns
-------
X : dask.array, size ``(n_samples, n_features)``
y : dask.array, size ``(n_samples,)``
real-valued array

Examples
--------
>>> X, y = make_regression()
>>> X.dtype, X.shape
(dtype('float64'), (1000, 100))
>>> y.dtype, y.shape
(dtype('float64'), (1000,))
"""
X = cupy.random.normal(0, 1, size=(n_samples, n_features))
informative_idx = cupy.random.choice(n_features, n_informative)
beta = (cupy.random.random(n_features) - 1) * scale
z0 = X[:, informative_idx].dot(beta[informative_idx])
y = cupy.random.random(z0.shape)
return X, y


def make_poisson(n_samples=1000, n_features=100, n_informative=2, scale=1.0):
"""
Generate a dummy dataset for modeling count data.

Parameters
----------
n_samples : int
number of rows in the output array
n_features : int
number of columns (features) in the output array
n_informative : int
number of features that are correlated with the outcome
scale : float
Scale the true coefficient array by this

Returns
-------
X : dask.array, size ``(n_samples, n_features)``
y : dask.array, size ``(n_samples,)``
array of non-negative integer-valued data

Examples
--------
>>> X, y = make_poisson()
>>> X.dtype, X.shape
(dtype('float64'), (1000, 100))
>>> y.dtype, y.shape
(dtype('int64'), (1000,))
"""
X = cupy.random.normal(0, 1, size=(n_samples, n_features))
informative_idx = cupy.random.choice(n_features, n_informative)
beta = (cupy.random.random(n_features) - 1) * scale
z0 = X[:, informative_idx].dot(beta[informative_idx])
rate = np.exp(z0)
y = cupy.random.poisson(rate)
return X, y
5 changes: 2 additions & 3 deletions dask_glm/datasets.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import numpy as np
import dask.array as da
from dask_glm.utils import exp


def make_classification(n_samples=1000, n_features=100, n_informative=2, scale=1.0,
Expand Down Expand Up @@ -40,7 +39,7 @@ def make_classification(n_samples=1000, n_features=100, n_informative=2, scale=1
informative_idx = np.random.choice(n_features, n_informative)
beta = (np.random.random(n_features) - 1) * scale
z0 = X[:, informative_idx].dot(beta[informative_idx])
y = da.random.random(z0.shape, chunks=(chunksize,)) < 1 / (1 + da.exp(-z0))
y = da.random.random(z0.shape, chunks=(chunksize,)) < 1 / (1 + np.exp(-z0))
return X, y


Expand Down Expand Up @@ -122,6 +121,6 @@ def make_poisson(n_samples=1000, n_features=100, n_informative=2, scale=1.0,
informative_idx = np.random.choice(n_features, n_informative)
beta = (np.random.random(n_features) - 1) * scale
z0 = X[:, informative_idx].dot(beta[informative_idx])
rate = exp(z0)
rate = np.exp(z0)
y = da.random.poisson(rate, size=1, chunks=(chunksize,))
return X, y
5 changes: 3 additions & 2 deletions dask_glm/estimators.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
"""
Models following scikit-learn's estimator API.
"""
import numpy as np
from sklearn.base import BaseEstimator

from . import algorithms
from . import families
from .utils import (
sigmoid, dot, add_intercept, mean_squared_error, accuracy_score, exp,
sigmoid, dot, add_intercept, mean_squared_error, accuracy_score,
poisson_deviance
)

Expand Down Expand Up @@ -227,7 +228,7 @@ class PoissonRegression(_GLM):

def predict(self, X):
X_ = self._maybe_add_intercept(X)
return exp(dot(X_, self._coef))
return np.exp(dot(X_, self._coef))

def get_deviance(self, X, y):
return poisson_deviance(y, self.predict(X))
13 changes: 7 additions & 6 deletions dask_glm/families.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import absolute_import, division, print_function

from dask_glm.utils import dot, exp, log1p, sigmoid
from dask_glm.utils import dot, sigmoid
import numpy as np


class Logistic(object):
Expand All @@ -20,8 +21,8 @@ def loglike(Xbeta, y):
Xbeta : array, shape (n_samples, n_features)
y : array, shape (n_samples)
"""
enXbeta = exp(-Xbeta)
return (Xbeta + log1p(enXbeta)).sum() - dot(y, Xbeta)
enXbeta = np.exp(-Xbeta)
return (Xbeta + np.log1p(enXbeta)).sum() - dot(y, Xbeta)

@staticmethod
def pointwise_loss(beta, X, y):
Expand Down Expand Up @@ -92,7 +93,7 @@ class Poisson(object):
"""
@staticmethod
def loglike(Xbeta, y):
eXbeta = exp(Xbeta)
eXbeta = np.exp(Xbeta)
yXbeta = y * Xbeta
return (eXbeta - yXbeta).sum()

Expand All @@ -110,11 +111,11 @@ def pointwise_gradient(beta, X, y):

@staticmethod
def gradient(Xbeta, X, y):
eXbeta = exp(Xbeta)
eXbeta = np.exp(Xbeta)
return dot(X.T, eXbeta - y)

@staticmethod
def hessian(Xbeta, X):
eXbeta = exp(Xbeta)
eXbeta = np.exp(Xbeta)
x_diag_eXbeta = eXbeta[:, None] * X
return dot(X.T, x_diag_eXbeta)
Empty file added dask_glm/tests/__init__.py
Empty file.
Empty file added dask_glm/tests/cupy/__init__.py
Empty file.
Loading