-
-
Notifications
You must be signed in to change notification settings - Fork 46
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Showing
10 changed files
with
215 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
*.pyc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
language: python | ||
sudo: false | ||
|
||
env: | ||
matrix: | ||
- PYTHON=2.7 | ||
- PYTHON=3.5 | ||
|
||
install: | ||
# Install conda | ||
- wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh | ||
- bash miniconda.sh -b -p $HOME/miniconda | ||
- export PATH="$HOME/miniconda/bin:$PATH" | ||
- conda config --set always_yes yes --set changeps1 no | ||
- conda update conda | ||
|
||
# Install dependencies | ||
- conda create -n test-environment python=$PYTHON | ||
- source activate test-environment | ||
- conda install -c conda-forge numpy dask | ||
|
||
# Install dask-glm | ||
- pip install --no-deps -e . | ||
|
||
script: | ||
- py.test dask-glm | ||
- flake8 dask | ||
|
||
notifications: | ||
email: false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
Copyright (c) 2016, Continuum Analytics, Inc. and contributors | ||
All rights reserved. | ||
|
||
Redistribution and use in source and binary forms, with or without modification, | ||
are permitted provided that the following conditions are met: | ||
|
||
Redistributions of source code must retain the above copyright notice, | ||
this list of conditions and the following disclaimer. | ||
|
||
Redistributions in binary form must reproduce the above copyright notice, | ||
this list of conditions and the following disclaimer in the documentation | ||
and/or other materials provided with the distribution. | ||
|
||
Neither the name of Continuum Analytics nor the names of any contributors | ||
may be used to endorse or promote products derived from this software | ||
without specific prior written permission. | ||
|
||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | ||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | ||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF | ||
THE POSSIBILITY OF SUCH DAMAGE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
recursive-include dask_glm *.py | ||
recursive-include docs *.rst | ||
|
||
include setup.py | ||
include README.rst | ||
include LICENSE.txt | ||
include MANIFEST.in | ||
|
||
prune docs/_build |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
Generalized Linear Models in Dask | ||
================================= | ||
|
||
*This library is not ready for use.* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from .gradient import gradient |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
# Constants | ||
|
||
import numpy as np | ||
import dask.array as da | ||
|
||
|
||
firstBacktrackMult = 0.1 | ||
nextBacktrackMult = 0.5 | ||
armijoMult = 0.1 | ||
stepGrowth = 1.25 | ||
stepSize = 1.0 | ||
recalcRate = 10 | ||
backtrackMult = firstBacktrackMult | ||
|
||
|
||
# Compute the initial point | ||
def gradient(X, y, max_steps=100): | ||
N, M = X.shape | ||
firstBacktrackMult = 0.1 | ||
nextBacktrackMult = 0.5 | ||
armijoMult = 0.1 | ||
stepGrowth = 1.25 | ||
stepSize = 1.0 | ||
recalcRate = 10 | ||
backtrackMult = firstBacktrackMult | ||
beta = np.zeros(M) | ||
|
||
print('## -f |df/f| |dx/x| step') | ||
print('----------------------------------------------') | ||
for k in range(max_steps): | ||
# Compute the gradient | ||
if k % recalcRate == 0: | ||
Xbeta = X.dot(beta) | ||
eXbeta = da.exp(Xbeta) | ||
func = da.log1p(eXbeta).sum() - y.dot(Xbeta) | ||
e1 = eXbeta + 1.0 | ||
gradient = X.T.dot(eXbeta / e1 - y) | ||
steplen = (gradient**2).sum()**0.5 | ||
Xgradient = X.dot(gradient) | ||
|
||
Xbeta, eXbeta, func, gradient, steplen, Xgradient = da.compute(Xbeta, eXbeta, func, gradient, steplen, Xgradient) | ||
|
||
obeta = beta | ||
oXbeta = Xbeta | ||
|
||
# Compute the step size | ||
lf = func | ||
for ii in range(100): | ||
beta = obeta - stepSize * gradient | ||
if ii and np.array_equal(beta, obeta): | ||
stepSize = 0 | ||
break | ||
Xbeta = oXbeta - stepSize * Xgradient | ||
# This prevents overflow | ||
if np.all(Xbeta < 700): | ||
eXbeta = np.exp(Xbeta) | ||
func = np.sum(np.log1p(eXbeta)) - np.dot(y, Xbeta) | ||
df = lf - func | ||
if df >= armijoMult * stepSize * steplen ** 2: | ||
break | ||
stepSize *= backtrackMult | ||
if stepSize == 0: | ||
print('No more progress') | ||
break | ||
df /= max(func, lf) | ||
db = stepSize * steplen / (np.linalg.norm(beta) + stepSize * steplen) | ||
print('%2d %.6e %9.2e %.2e %.1e'%(k+1,func,df,db,stepSize)) | ||
if df < 1e-14: | ||
print('Converged') | ||
break | ||
stepSize *= stepGrowth | ||
backtrackMult = nextBacktrackMult | ||
|
||
return beta | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
|
||
import math | ||
|
||
import dask.array as da | ||
import numpy as np | ||
import pytest | ||
|
||
from dask_glm import gradient | ||
|
||
|
||
def logit(y): | ||
return 1.0 / ( 1.0 + da.exp(-y) ) | ||
|
||
|
||
M = 100 | ||
N = 100000 | ||
S = 2 | ||
|
||
X = np.random.randn(N,M) | ||
X[:,1] = 1.0 | ||
beta0 = np.random.randn(M) | ||
|
||
|
||
def make_y(X, beta0=beta0): | ||
N, M = X.shape | ||
z0 = X.dot(beta0) | ||
z0 = da.compute(z0)[0] # ensure z0 is a numpy array | ||
scl = S / z0.std() | ||
beta0 *= scl | ||
z0 *= scl | ||
y = np.random.rand(N) < logit(z0) | ||
return y, z0 | ||
|
||
|
||
y, z0 = make_y(X) | ||
L0 = N * math.log(2.0) | ||
|
||
|
||
dX = da.from_array(X, chunks=(N / 10, M)) | ||
dy = da.from_array(y, chunks=(N / 10,)) | ||
|
||
|
||
@pytest.mark.parametrize('X,y', [(X, y), (dX, dy)]) | ||
def test_gradient(X, y): | ||
beta = gradient(X, y) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
dask[array] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
#!/usr/bin/env python | ||
|
||
from os.path import exists | ||
from setuptools import setup | ||
import versioneer | ||
|
||
|
||
setup(name='dask-glm', | ||
version=versioneer.get_version(), | ||
cmdclass=versioneer.get_cmdclass(), | ||
description='Generalized Linear Models with Dask', | ||
url='http://github.com/dask/dask-glm/', | ||
maintainer='Matthew Rocklin', | ||
maintainer_email='mrocklin@gmail.com', | ||
license='BSD', | ||
keywords='dask,glm', | ||
packages=['dask_glm'] | ||
long_description=(open('README.rst').read() if exists('README.rst') | ||
else ''), | ||
install_requires=list(open('requirements.txt').read().strip().split('\n')), | ||
zip_safe=False) |