Skip to content
This repository has been archived by the owner on Jun 18, 2023. It is now read-only.

Commit

Permalink
Build estimator pickles on install #70
Browse files Browse the repository at this point in the history
Pickles are always dependent on installed versions of
libraries used in pickle, so having just one set was
never a good idea. It now also doesn't work because
some pickles can contain Numba JIT'd functions
  • Loading branch information
ceholden committed Dec 7, 2015
1 parent 42beb39 commit d9b4b80
Show file tree
Hide file tree
Showing 11 changed files with 104 additions and 17 deletions.
4 changes: 4 additions & 0 deletions .gitignore
@@ -1,3 +1,7 @@
# YATSM ignores
yatsm/regression/pickles/*.pkl
yatsm/regression/pickles/*.json

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
26 changes: 14 additions & 12 deletions setup.py
@@ -1,12 +1,11 @@
import glob
import logging
import os
import sys

from setuptools import setup
from setuptools import find_packages, setup
from setuptools.extension import Extension

logging.basicConfig()
logging.basicConfig(level=logging.INFO)
log = logging.getLogger()

# Get version
Expand All @@ -22,6 +21,12 @@
with open('README.md') as f:
readme = f.read()

# Build pickles
here = os.path.dirname(__file__)
sys.path.append(os.path.join(here, 'yatsm', 'regression', 'pickles'))
from yatsm.regression.pickles import serialize as serialize_pickles # flake8: noqa
serialize_pickles.make_pickles()

# Installation requirements
install_requires = [
'numpy',
Expand Down Expand Up @@ -65,18 +70,15 @@

# Pre-packaged regression algorithms included in installation
package_data = {
'yatsm': [os.path.join('regression', 'pickles', '*.pkl')]
'yatsm': [
os.path.join('regression', 'pickles', 'pickles.json'),
os.path.join('regression', 'pickles', '*.pkl')
]
}

# Setup
packages = ['yatsm',
'yatsm.algorithms',
'yatsm.cli',
'yatsm.classifiers',
'yatsm.mapping',
'yatsm.phenology',
'yatsm.regression',
'yatsm.segment']
packages = find_packages(exclude=['tests', 'yatsm.regression.pickles'])
packages.sort()

entry_points = '''
[console_scripts]
Expand Down
18 changes: 13 additions & 5 deletions yatsm/regression/packaged.py
@@ -1,14 +1,22 @@
""" Module for finding regression algorithms packaged with YATSM
"""
import json
import logging
import os
import pkg_resources

logger = logging.getLogger('yatsm')

packaged_regressions = ['OLS', 'sklearn_Lasso20', 'glmnet_Lasso20',
'glmnet_LassoCV_n50', 'glmnet_LassoCV_n100',
'rlm_maxiter10']
# packaged_regressions = ['OLS', 'sklearn_Lasso20', 'glmnet_Lasso20',
# 'glmnet_LassoCV_n50', 'glmnet_LassoCV_n100',
# 'rlm_maxiter10']
packaged_regressions = []
_packaged = pkg_resources.resource_filename(
__package__, os.path.join('pickles', 'pickles.json'))
if pkg_resources.resource_exists(__package__,
os.path.join('pickles', 'pickles.json')):
with open(_packaged, 'r') as f:
packaged_regressions = json.load(f).keys()


def find_packaged_regressor(name):
Expand All @@ -18,7 +26,7 @@ def find_packaged_regressor(name):
available pre-packaged regressors
Args:
name (str): name of packaged regression object. Check
name (str): name of packaged regression object
Returns:
str: path to packaged regression method
Expand All @@ -37,7 +45,7 @@ def find_packaged_regressor(name):
raise IOError('Cannot find packaged regressors in %s. Did you install '
'YATSM via setuptools?' % path)

resource = os.path.join('pickles', name + '.pkl')
resource = os.path.join('pickles', name)
if not pkg_resources.resource_exists(__name__, resource):
raise IOError('Cannot find packaged regression method %s, but package '
'directory exists. Check the contents of %s if possible'
Expand Down
Binary file removed yatsm/regression/pickles/OLS.pkl
Binary file not shown.
5 changes: 5 additions & 0 deletions yatsm/regression/pickles/__init__.py
@@ -0,0 +1,5 @@
""" Setup pickles for distribution -- NOT A MODULE INCLUDED IN YATSM
Note that pickles are generated at time of installation. Upgrading or altering
the libraries that the pickles are dependent upon may cause errors.
"""
Binary file removed yatsm/regression/pickles/glmnet_Lasso20.pkl
Binary file not shown.
Binary file removed yatsm/regression/pickles/glmnet_LassoCV_n100.pkl
Binary file not shown.
Binary file removed yatsm/regression/pickles/glmnet_LassoCV_n50.pkl
Binary file not shown.
Binary file removed yatsm/regression/pickles/rlm_maxiter10.pkl
Binary file not shown.
68 changes: 68 additions & 0 deletions yatsm/regression/pickles/serialize.py
@@ -0,0 +1,68 @@
""" Setup script to pickle various statistical estimators for distribution
Available pickles to build:
* glmnet_Lasso20.pkl
* sklearn_Lasso20.pkl
"""
from __future__ import print_function

import json
import logging
import os
import traceback

# Don't alias to ``np``: https://github.com/numba/numba/issues/1559
import numpy
import sklearn.linear_model
from sklearn.externals import joblib as jl
import six

logger = logging.getLogger()


# GLMNET pickles
try:
import glmnet
_glmnet_pickles = {
'glmnet_Lasso20.pkl': glmnet.Lasso(lambdas=20),
'glmnet_LassoCV_n50.pkl': glmnet.LassoCV(
lambdas=numpy.logspace(1e-4, 35, 50)),
}
except:
logger.error('Could not produce pickles from package "glment". '
'Check if it is installed')
print(traceback.format_exc())
_glmnet_pickles = {}

# scikit-learn pickles
_sklearn_pickles = {
'OLS.pkl': sklearn.linear_model.LinearRegression(),
'sklearn_Lasso20.pkl': sklearn.linear_model.Lasso(alpha=20.0),
'sklearn_LassoCV_n50.pkl': sklearn.linear_model.LassoCV(
alphas=numpy.logspace(1e-4, 35, 50)),
}

# YATSM pickles
from ..robust_fit import RLM # flake8: noqa
_yatsm_pickles = {
'rlm_maxiter10.pkl': RLM(maxiter=10)
}

pickles = [_glmnet_pickles, _sklearn_pickles, _yatsm_pickles]
here = os.path.dirname(__file__)
pickles_json = os.path.join(here, 'pickles.json')


def make_pickles():
logger.info('Serializing estimators to pickles...')
packaged = {}

for pickle in pickles:
for fname, obj in six.iteritems(pickle):
jl.dump(obj, os.path.join(here, fname), compress=5)
packaged[fname] = obj.__class__.__name__

with open(pickles_json, 'w') as f:
json.dump(packaged, f, indent=4)
logger.info('Wrote pickles.json to %s' % pickles_json)
Binary file removed yatsm/regression/pickles/sklearn_Lasso20.pkl
Binary file not shown.

0 comments on commit d9b4b80

Please sign in to comment.