Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Adjust all algorithms to work with CuPy #75

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
62 changes: 62 additions & 0 deletions dask_glm/tests/cupy/test_admm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import pytest

from dask import persist
import numpy as np
import cupy

from dask.array.utils import normalize_to_array
from dask_glm.algorithms import admm, local_update
from dask_glm.families import Logistic, Normal
from dask_glm.regularizers import L1
from dask_glm.utils import cupy_make_y


@pytest.mark.parametrize('N', [1000, 10000])
@pytest.mark.parametrize('beta',
[cupy.array([-1.5, 3]),
cupy.array([35, 2, 0, -3.2]),
cupy.array([-1e-2, 1e-4, 1.0, 2e-3, -1.2])])
@pytest.mark.parametrize('family', [Logistic, Normal])
def test_local_update(N, beta, family):
M = beta.shape[0]
X = cupy.random.random((N, M))
y = cupy.random.random(N) > 0.4
u = cupy.zeros(M)
z = cupy.random.random(M)
rho = 1e7

def create_local_gradient(func):
def wrapped(beta, X, y, z, u, rho):
beta_like = np.empty_like(X, shape=beta.shape)
beta_like[:] = beta
return normalize_to_array(func(beta_like, X, y) + rho *
(beta_like - z + u))
return wrapped

def create_local_f(func):
def wrapped(beta, X, y, z, u, rho):
beta_like = np.empty_like(X, shape=beta.shape)
beta_like[:] = beta
return normalize_to_array(func(beta_like, X, y) + (rho / 2) *
np.dot(beta_like - z + u, beta_like - z + u))
return wrapped

f = create_local_f(family.pointwise_loss)
fprime = create_local_gradient(family.pointwise_gradient)

result = local_update(X, y, beta, z, u, rho, f=f, fprime=fprime)

assert np.allclose(result, z, atol=2e-3)


@pytest.mark.parametrize('N', [1000, 10000])
@pytest.mark.parametrize('p', [1, 5, 10])
def test_admm_with_large_lamduh(N, p):
X = cupy.random.random((N, p))
beta = cupy.random.random(p)
y = cupy_make_y(X, beta=cupy.array(beta))

X, y = persist(X, y)
z = admm(X, y, regularizer=L1(), lamduh=1e5, rho=20, max_iter=500)

assert np.allclose(z, np.zeros(p), atol=1e-4)
143 changes: 143 additions & 0 deletions dask_glm/tests/cupy/test_algos_families.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
import pytest

from dask import persist
import numpy as np
import cupy

from dask_glm.algorithms import (newton, lbfgs, proximal_grad,
gradient_descent, admm)
from dask_glm.families import Logistic, Normal, Poisson
from dask_glm.regularizers import Regularizer
from dask_glm.utils import sigmoid, cupy_make_y


def add_l1(f, lam):
def wrapped(beta, X, y):
return f(beta, X, y) + lam * (np.abs(beta)).sum()
return wrapped


def make_intercept_data(N, p, seed=20009):
'''Given the desired number of observations (N) and
the desired number of variables (p), creates
random logistic data to test on.'''

# set the seeds
cupy.random.seed(seed)

X = cupy.random.random((N, p + 1))
col_sums = X.sum(axis=0)
X = X / col_sums[None, :]
X[:, p] = 1
y = cupy_make_y(X, beta=cupy.random.random(p + 1))

return X, y


@pytest.mark.parametrize('opt',
[lbfgs,
newton,
gradient_descent])
@pytest.mark.parametrize('N, p, seed',
[(100, 2, 20009),
(250, 12, 90210),
(95, 6, 70605)])
def test_methods(N, p, seed, opt):
X, y = make_intercept_data(N, p, seed=seed)
coefs = opt(X, y)
p = sigmoid(X.dot(coefs))

y_sum = y.sum()
p_sum = p.sum()
assert np.isclose(y_sum, p_sum, atol=1e-1)


@pytest.mark.parametrize('func,kwargs', [
(newton, {'tol': 1e-5}),
(lbfgs, {'tol': 1e-8}),
(gradient_descent, {'tol': 1e-7}),
])
@pytest.mark.parametrize('N', [1000])
@pytest.mark.parametrize('nchunks', [1, 10])
@pytest.mark.parametrize('family', [Logistic, Normal, Poisson])
def test_basic_unreg_descent(func, kwargs, N, nchunks, family):
beta = cupy.random.normal(size=2)
M = len(beta)
X = cupy.random.random((N, M))
y = cupy_make_y(X, beta=cupy.array(beta))

X, y = persist(X, y)

result = func(X, y, family=family, **kwargs)
test_vec = cupy.random.normal(size=2)

opt = family.pointwise_loss(result, X, y)
test_val = family.pointwise_loss(test_vec, X, y)

assert opt < test_val


@pytest.mark.parametrize('func,kwargs', [
(admm, {'abstol': 1e-4}),
(proximal_grad, {'tol': 1e-7}),
])
@pytest.mark.parametrize('N', [1000])
@pytest.mark.parametrize('nchunks', [1, 10])
@pytest.mark.parametrize('family', [Logistic, Normal, Poisson])
@pytest.mark.parametrize('lam', [0.01, 1.2, 4.05])
@pytest.mark.parametrize('reg', [r() for r in Regularizer.__subclasses__()])
def test_basic_reg_descent(func, kwargs, N, nchunks, family, lam, reg):
beta = cupy.random.normal(size=2)
M = len(beta)
X = cupy.random.random((N, M))
y = cupy_make_y(X, beta=cupy.array(beta))

X, y = persist(X, y)

result = func(X, y, family=family, lamduh=lam, regularizer=reg, **kwargs)
test_vec = cupy.random.normal(size=2)

f = reg.add_reg_f(family.pointwise_loss, lam)

opt = f(result, X, y)
test_val = f(test_vec, X, y)

assert opt < test_val


@pytest.mark.parametrize('func,kwargs', [
(admm, {'max_iter': 2}),
(proximal_grad, {'max_iter': 2}),
(newton, {'max_iter': 2}),
(gradient_descent, {'max_iter': 2}),
])
def test_determinism(func, kwargs):
X, y = make_intercept_data(1000, 10)

a = func(X, y, **kwargs)
b = func(X, y, **kwargs)

assert (a == b).all()


try:
from distributed import Client
from distributed.utils_test import cluster, loop # flake8: noqa
except ImportError:
pass
else:
@pytest.mark.parametrize('func,kwargs', [
(admm, {'max_iter': 2}),
(proximal_grad, {'max_iter': 2}),
(newton, {'max_iter': 2}),
(gradient_descent, {'max_iter': 2}),
])
def test_determinism_distributed(func, kwargs, loop):
with cluster() as (s, [a, b]):
with Client(s['address'], loop=loop) as c:
X, y = make_intercept_data(1000, 10)

a = func(X, y, **kwargs)
b = func(X, y, **kwargs)

assert (a == b).all()
130 changes: 130 additions & 0 deletions dask_glm/tests/cupy/test_regularizers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import numpy as np
import cupy
import pytest
import cupy.testing as cpt
from dask_glm import regularizers as regs
pentschev marked this conversation as resolved.
Show resolved Hide resolved


@pytest.mark.parametrize('beta,expected', [
(cupy.array([0, 0, 0]), 0),
(cupy.array([1, 2, 3]), 7)
])
def test_l2_function(beta, expected):
assert regs.L2().f(beta) == expected


@pytest.mark.parametrize('beta', [
cupy.array([0, 0, 0]),
cupy.array([1, 2, 3])
])
def test_l2_gradient(beta):
cpt.assert_array_equal(regs.L2().gradient(beta), beta)


@pytest.mark.parametrize('beta', [
cupy.array([0, 0, 0]),
cupy.array([1, 2, 3])
])
def test_l2_hessian(beta):
cpt.assert_array_equal(regs.L2().hessian(beta), np.eye(len(beta)))


@pytest.mark.parametrize('beta,expected', [
(cupy.array([0, 0, 0]), cupy.array([0, 0, 0])),
(cupy.array([1, 2, 3]), cupy.array([0.5, 1, 1.5]))
])
def test_l2_proximal_operator(beta, expected):
cpt.assert_array_equal(regs.L2().proximal_operator(beta, 1), expected)


@pytest.mark.parametrize('beta,expected', [
(cupy.array([0, 0, 0]), 0),
(cupy.array([-1, 2, 3]), 6)
])
def test_l1_function(beta, expected):
assert regs.L1().f(beta) == expected


@pytest.mark.parametrize('beta,expected', [
(cupy.array([1, 2, 3]), cupy.array([1, 1, 1])),
(cupy.array([-1, 2, 3]), cupy.array([-1, 1, 1]))
])
def test_l1_gradient(beta, expected):
cpt.assert_array_equal(regs.L1().gradient(beta), expected)


@pytest.mark.parametrize('beta', [
cupy.array([0.00000001, 1, 2]),
cupy.array([-0.00000001, 1, 2]),
cupy.array([0, 0, 0])
])
def test_l1_gradient_raises_near_zero(beta):
with pytest.raises(ValueError):
regs.L1().gradient(beta)


def test_l1_hessian():
cpt.assert_array_equal(regs.L1().hessian(cupy.array([1, 2])),
cupy.array([[0, 0], [0, 0]]))


def test_l1_hessian_raises():
with pytest.raises(ValueError):
regs.L1().hessian(cupy.array([0, 0, 0]))


@pytest.mark.parametrize('beta,expected', [
(cupy.array([0, 0, 0]), cupy.array([0, 0, 0])),
(cupy.array([1, 2, 3]), cupy.array([0, 1, 2]))
])
def test_l1_proximal_operator(beta, expected):
cpt.assert_array_equal(regs.L1().proximal_operator(beta, 1), expected)


@pytest.mark.parametrize('beta,expected', [
(cupy.array([0, 0, 0]), 0),
(cupy.array([1, 2, 3]), 6.5)
])
def test_elastic_net_function(beta, expected):
assert regs.ElasticNet().f(beta) == expected


def test_elastic_net_function_zero_weight_is_l2():
beta = cupy.array([1, 2, 3])
assert regs.ElasticNet(weight=0).f(beta) == regs.L2().f(beta)


def test_elastic_net_function_zero_weight_is_l1():
beta = cupy.array([1, 2, 3])
assert regs.ElasticNet(weight=1).f(beta) == regs.L1().f(beta)


def test_elastic_net_gradient():
beta = cupy.array([1, 2, 3])
cpt.assert_array_equal(regs.ElasticNet(weight=0.5).gradient(beta), cupy.array([1, 1.5, 2]))


def test_elastic_net_gradient_zero_weight_is_l2():
beta = cupy.array([1, 2, 3])
cpt.assert_array_equal(regs.ElasticNet(weight=0).gradient(beta), regs.L2().gradient(beta))


def test_elastic_net_gradient_zero_weight_is_l1():
beta = cupy.array([1, 2, 3])
cpt.assert_array_equal(regs.ElasticNet(weight=1).gradient(beta), regs.L1().gradient(beta))


def test_elastic_net_hessian():
beta = cupy.array([1, 2, 3])
cpt.assert_array_equal(regs.ElasticNet(weight=0.5).hessian(beta),
np.eye(len(beta)) * regs.ElasticNet().weight)


def test_elastic_net_hessian_raises():
with pytest.raises(ValueError):
regs.ElasticNet(weight=0.5).hessian(cupy.array([0, 1, 2]))


def test_elastic_net_proximal_operator():
beta = cupy.array([1, 2, 3])
cpt.assert_array_equal(regs.ElasticNet(weight=0.5).proximal_operator(beta, 1), beta)
59 changes: 59 additions & 0 deletions dask_glm/tests/cupy/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import pytest
import numpy as np
import cupy
import cupy.testing as cpt

from dask_glm import utils
from dask.array.utils import assert_eq


def test_normalize_normalizes():
@utils.normalize
def do_nothing(X, y):
return cupy.array([0.0, 1.0, 2.0])
X = cupy.array([[1, 0, 0], [1, 2, 2]])
y = cupy.array([0, 1, 0])
res = do_nothing(X, y)
cpt.assert_array_equal(res, cupy.array([-3.0, 1.0, 2.0]))


def test_normalize_doesnt_normalize():
@utils.normalize
def do_nothing(X, y):
return cupy.array([0.0, 1.0, 2.0])
X = cupy.array([[1, 0, 0], [1, 2, 2]])
y = cupy.array([0, 1, 0])
res = do_nothing(X, y, normalize=False)
cpt.assert_array_equal(res, cupy.array([0, 1, 2]))


def test_normalize_normalizes_if_intercept_not_present():
@utils.normalize
def do_nothing(X, y):
return cupy.array([0.0, 1.0, 2.0])
X = cupy.array([[1, 0, 0], [3, 9.0, 2]])
y = cupy.array([0, 1, 0])
res = do_nothing(X, y)
cpt.assert_array_equal(res, cupy.array([0, 1 / 4.5, 2]))


def test_normalize_raises_if_multiple_constants():
@utils.normalize
def do_nothing(X, y):
return cupy.array([0.0, 1.0, 2.0])
X = cupy.array([[1, 2, 3], [1, 2, 3]])
y = cupy.array([0, 1, 0])
with pytest.raises(ValueError):
res = do_nothing(X, y)


def test_add_intercept():
X = cupy.zeros((4, 4))
result = utils.add_intercept(X)
expected = cupy.array([
[0, 0, 0, 0, 1],
[0, 0, 0, 0, 1],
[0, 0, 0, 0, 1],
[0, 0, 0, 0, 1],
], dtype=X.dtype)
cpt.assert_array_equal(result, expected)
Loading