diff --git a/README.md b/README.md index e5bbbf390..8d199c2b7 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ -Bambi ------- +# Bambi + BAyesian Model-Building Interface in Python [![Build Status](https://travis-ci.org/bambinos/bambi.svg?branch=master)](https://travis-ci.org/bambinos/bambi) @@ -26,7 +26,6 @@ Alternatively, if you want the bleeding edge version of the package you can inst Bambi requires working versions of numpy, pandas, matplotlib, patsy, pymc3, and theano. Dependencies are listed in `requirements.txt`, and should all be installed by the Bambi installer; no further action should be required. - ## Documentation The Bambi documentation can be found in the [official docs](https://bambinos.github.io/bambi/index.html) diff --git a/bambi/backends/__init__.py b/bambi/backends/__init__.py index 6dfcc67d0..7456771db 100644 --- a/bambi/backends/__init__.py +++ b/bambi/backends/__init__.py @@ -1,6 +1,4 @@ from .pymc import PyMC3BackEnd -__all__ = [ - 'PyMC3BackEnd' -] +__all__ = ["PyMC3BackEnd"] diff --git a/bambi/backends/base.py b/bambi/backends/base.py index 41299799a..1b66fa30c 100644 --- a/bambi/backends/base.py +++ b/bambi/backends/base.py @@ -2,10 +2,8 @@ class BackEnd: + """Base class for BackEnd hierarchy.""" - ''' - Base class for BackEnd hierarchy. - ''' __metaclass__ = ABCMeta @abstractmethod diff --git a/bambi/backends/pymc.py b/bambi/backends/pymc.py index 474b26a3f..ff984d879 100644 --- a/bambi/backends/pymc.py +++ b/bambi/backends/pymc.py @@ -1,17 +1,15 @@ import numpy as np +import pymc3 as pm import theano from arviz import from_pymc3 -import pymc3 as pm + from bambi.priors import Prior from .base import BackEnd class PyMC3BackEnd(BackEnd): - - """ - PyMC3 model-fitting back-end. - """ + """PyMC3 model-fitting back-end.""" # Available link functions links = { @@ -25,7 +23,6 @@ class PyMC3BackEnd(BackEnd): dists = {"HalfFlat": pm.Bound(pm.Flat, lower=0)} def __init__(self): - self.reset() # Attributes defined elsewhere @@ -35,15 +32,13 @@ def __init__(self): self.advi_params = None # build() def reset(self): - """ - Reset PyMC3 model and all tracked distributions and parameters. - """ + """Reset PyMC3 model and all tracked distributions and parameters.""" self.model = pm.Model() self.mu = None self.par_groups = {} def _build_dist(self, spec, label, dist, **kwargs): - """ Build and return a PyMC3 Distribution. """ + """Build and return a PyMC3 Distribution.""" if isinstance(dist, str): if hasattr(pm, dist): dist = getattr(pm, dist) @@ -77,8 +72,7 @@ def _expand_args(key, value, label): return dist(label, **kwargs) def build(self, spec, reset=True): # pylint: disable=arguments-differ - """ - Compile the PyMC3 model from an abstract model specification. + """Compile the PyMC3 model from an abstract model specification. Parameters ---------- @@ -122,8 +116,7 @@ def build(self, spec, reset=True): # pylint: disable=arguments-differ # pylint: disable=arguments-differ, inconsistent-return-statements def run(self, start=None, method="mcmc", init="auto", n_init=50000, **kwargs): - """ - Run the PyMC3 MCMC sampler. + """Run the PyMC3 MCMC sampler. Parameters ---------- @@ -168,9 +161,9 @@ def run(self, start=None, method="mcmc", init="auto", n_init=50000, **kwargs): def _laplace(model): - """ - Fit a model using a laplace approximation. Mainly for pedagogical use. ``mcmc`` and ``advi`` - are better approximations + """Fit a model using a laplace approximation. + + Mainly for pedagogical use. ``mcmc`` and ``advi`` are better approximations. Parameters ---------- diff --git a/bambi/external/patsy.py b/bambi/external/patsy.py index 4dbd6bf5d..8c966bea6 100644 --- a/bambi/external/patsy.py +++ b/bambi/external/patsy.py @@ -1,14 +1,16 @@ -import numpy as np import re -from patsy.util import safe_scalar_isnan + +import numpy as np + from patsy import PatsyError +from patsy.util import safe_scalar_isnan class Custom_NA: - """ - Custom patsy.missing.NAAction class. Similar to patsy drop/raise defaults, - but changes the raised message and logs which rows (if any) are dropped. - See Patsy code/API for NAAction documentation. + """Custom patsy.missing.NAAction class. + + Similar to patsy drop/raise defaults, but changes the raised message and logs which rows (if + any) are dropped. See Patsy code/API for NAAction documentation. """ def __init__(self, dropna=False, NA_types=["None", "NaN"]): diff --git a/bambi/models.py b/bambi/models.py index 0990f688d..2d6cdd8d8 100644 --- a/bambi/models.py +++ b/bambi/models.py @@ -4,23 +4,21 @@ from collections import OrderedDict from copy import deepcopy -import pandas as pd import numpy as np -from patsy import dmatrices, dmatrix +import pandas as pd +import pymc3 as pm import statsmodels.api as sm from arviz.plots import plot_posterior -import pymc3 as pm - +from patsy import dmatrices, dmatrix +from .backends import PyMC3BackEnd from .external.patsy import Custom_NA -from .priors import PriorFactory, PriorScaler, Prior +from .priors import Prior, PriorFactory, PriorScaler from .utils import listify -from .backends import PyMC3BackEnd class Model: - """ - Specification of model class + """Specification of model class. Parameters ---------- @@ -104,9 +102,7 @@ def __init__( self.built = False # build() def reset(self): - """ - Reset list of terms and y-variable. - """ + """Reset list of terms and y-variable.""" self.terms = OrderedDict() self.y = None self.backend = None @@ -116,7 +112,6 @@ def reset(self): self.clean_data = None def _set_backend(self, backend): - backend = backend.lower() if backend.startswith("pymc"): @@ -140,7 +135,6 @@ def build(self, backend=None): supported. If None, assume that `fit()` has already been called (possibly without building) and look in self._backend_name. """ - # retain only the complete cases n_total = len(self.data.index) if self.completes: @@ -316,7 +310,6 @@ def fit( backend : str The name of the BackEnd to use. Currently only 'pymc' backen is supported. """ - if fixed is not None or random is not None: self.add( fixed=fixed, @@ -332,7 +325,6 @@ def fit( if backend is None: backend = "pymc" if self._backend_name is None else self._backend_name - if run: if not self.built or backend != self._backend_name: self.build(backend) @@ -351,8 +343,7 @@ def add( categorical=None, append=True, ): - """ - Adds one or more terms to the model via an R-like formula syntax. + """Add one or more terms to the model via an R-like formula syntax. Parameters ---------- @@ -385,7 +376,6 @@ def add( If True, terms are appended to the existing model rather than replacing any existing terms. This allows formula-based specification of the model in stages. """ - data = self.data # Primitive values (floats, strs) can be overwritten with Prior objects @@ -446,7 +436,6 @@ def _add( Runs during Model.build() """ - # use cleaned data with NAs removed (if user requested) data = self.clean_data # alter this pandas flag to avoid false positive SettingWithCopyWarnings @@ -613,13 +602,11 @@ def _add_y(self, variable, prior=None, family="gaussian", link=None, *args, **kw self.built = False def _match_derived_terms(self, name): - """ - Returns all (random) terms whose named are derived from the - specified string. For example, 'condition|subject' should match the - terms with names '1|subject', 'condition[T.1]|subject', and so on. - Only works for strings with grouping operator ('|'). - """ + """Return all (random) terms whose named are derived from the specified string. + For example, 'condition|subject' should match the terms with names '1|subject', + 'condition[T.1]|subject', and so on. Only works for strings with grouping operator ('|'). + """ if "|" not in name: return None @@ -679,7 +666,6 @@ def _set_priors(self, priors=None, fixed=None, random=None, match_derived_names= Runs during Model.build(). """ - targets = {} if fixed is not None: @@ -705,16 +691,15 @@ def _set_priors(self, priors=None, fixed=None, random=None, match_derived_names= for name, prior in targets.items(): self.terms[name].prior = prior - # helper function to correctly set default priors, auto_scaling, etc. def _prepare_prior(self, prior, _type): - """ + """Helper function to correctly set default priors, auto_scaling, etc. + Parameters ---------- prior : Prior object, or float, or None. _type : string accepted values are: 'intercept, 'fixed', or 'random'. """ - if prior is None and not self.auto_scale: prior = self.default_priors.get(term=_type + "_flat") @@ -736,8 +721,7 @@ def plot_priors(self, var_names=None): raise ValueError("Cannot plot priors until model is built!") with pm.Model(): - # get priors for fixed fx, separately for each level of each - # predictor + # get priors for fixed fx, separately for each level of each predictor dists = [] for fixed_term in self.fixed_terms.values(): if var_names is not None and fixed_term.name not in var_names: @@ -811,7 +795,6 @@ class Term: random = False def __init__(self, name, data, categorical=False, prior=None, constant=None): - self.name = name self.categorical = categorical self._reduced_data = None diff --git a/bambi/priors.py b/bambi/priors.py index 3e503eeca..c2273c999 100644 --- a/bambi/priors.py +++ b/bambi/priors.py @@ -71,7 +71,6 @@ def update(self, **kwargs): kwargs : dict Optional keyword arguments to add to prior args. """ - # Backends expect numpy arrays, so make sure all numeric values are represented as such. kwargs = {k: (np.array(v) if isinstance(v, (int, float)) else v) for k, v in kwargs.items()} self.args.update(kwargs) @@ -114,7 +113,6 @@ class PriorFactory: """ def __init__(self, defaults=None, dists=None, terms=None, families=None): - if defaults is None: defaults = join(dirname(__file__), "config", "priors.json") @@ -138,7 +136,6 @@ def __init__(self, defaults=None, dists=None, terms=None, families=None): self.families = defaults["families"] def _get_prior(self, spec, **kwargs): - if isinstance(spec, str): spec = re.sub(r"^\#", "", spec) return self._get_prior(self.dists[spec]) @@ -217,10 +214,13 @@ def __init__(self, model, taylor): def _get_slope_stats(self, exog, predictor, sigma_corr, full_mod=None, points=4): """ - Args: - full_mod: statsmodels GLM to replace MLE model. For when 'predictor' - is not in the fixed part of the model. - points: number of points to use for LL approximation. + Parameters + ---------- + full_mod : statsmodels.genmod.generalized_linear_model.GLM + Statsmodels GLM to replace MLE model. For when 'predictor' is not in the fixed part + of the model. + points : int + Number of points to use for LL approximation. """ if full_mod is None: diff --git a/bambi/tests/test_built_models.py b/bambi/tests/test_built_models.py index 433eb5cab..41fd76922 100644 --- a/bambi/tests/test_built_models.py +++ b/bambi/tests/test_built_models.py @@ -1,11 +1,13 @@ -import pytest -from bambi.models import Term, Model -from bambi.priors import Prior -import theano.tensor as tt -import pandas as pd -import numpy as np import re + import arviz as az +import numpy as np +import pandas as pd +import pytest +import theano.tensor as tt + +from bambi.models import Model, Term +from bambi.priors import Prior @pytest.fixture(scope="module") @@ -377,7 +379,6 @@ def test_logistic_regression(crossed_data): model1.build(backend="pymc3") model1.fit(tune=0, samples=1) - # build model using fit, pymc3 and theano link function model3 = Model(crossed_data) fitted3 = model3.fit( @@ -445,7 +446,6 @@ def test_poisson_regression(crossed_data): model1.build(backend="pymc3") model1.fit(tune=0, samples=1, init=None) - # check that term names agree assert set(model0.term_names) == set(model1.term_names) @@ -474,6 +474,7 @@ def dicts_close(a, b): assert all([dicts_close(priors0[x], priors1[x]) for x in priors0.keys()]) + def test_laplace(): data = pd.DataFrame(np.repeat((0, 1), (30, 60)), columns=["w"]) model = Model(data=data) diff --git a/bambi/tests/test_model_construction.py b/bambi/tests/test_model_construction.py index ef0293182..12d55e0fa 100644 --- a/bambi/tests/test_model_construction.py +++ b/bambi/tests/test_model_construction.py @@ -1,17 +1,19 @@ -import pytest from os.path import dirname, join -import pandas as pd + import numpy as np -from bambi.models import Term, Model +import pandas as pd +import pytest + +from bambi.models import Model, Term @pytest.fixture(scope="module") def diabetes_data(): - data_dir = join(dirname(__file__), 'data') - data = pd.read_csv(join(data_dir, 'diabetes.txt'), sep='\t') - data['age_grp'] = 0 - data.loc[data['AGE'] > 40, 'age_grp'] = 1 - data.loc[data['AGE'] > 60, 'age_grp'] = 2 + data_dir = join(dirname(__file__), "data") + data = pd.read_csv(join(data_dir, "diabetes.txt"), sep="\t") + data["age_grp"] = 0 + data.loc[data["AGE"] > 40, "age_grp"] = 1 + data.loc[data["AGE"] > 60, "age_grp"] = 2 return data @@ -22,9 +24,9 @@ def base_model(diabetes_data): def test_term_init(diabetes_data): model = Model(diabetes_data) - term = Term('BMI', diabetes_data['BMI']) + term = Term("BMI", diabetes_data["BMI"]) # Test that all defaults are properly initialized - assert term.name == 'BMI' + assert term.name == "BMI" assert term.categorical == False assert not term.random assert term.levels is not None @@ -34,96 +36,97 @@ def test_term_init(diabetes_data): def test_distribute_random_effect_over(diabetes_data): # Random slopes model = Model(diabetes_data) - model.add('BP ~ 1') - model.add(random='C(age_grp)|BMI') - model.build(backend='pymc') - assert model.terms['C(age_grp)[T.1]|BMI'].data.shape == (442, 163) + model.add("BP ~ 1") + model.add(random="C(age_grp)|BMI") + model.build(backend="pymc") + assert model.terms["C(age_grp)[T.1]|BMI"].data.shape == (442, 163) # Nested or crossed random intercepts model.reset() - model.add('BP ~ 1') - model.add(random='0+C(age_grp)|BMI') - model.build(backend='pymc') - assert model.terms['C(age_grp)[0]|BMI'].data.shape == (442, 163) + model.add("BP ~ 1") + model.add(random="0+C(age_grp)|BMI") + model.build(backend="pymc") + assert model.terms["C(age_grp)[0]|BMI"].data.shape == (442, 163) # 163 unique levels of BMI in diabetes_data def test_model_init_from_filename(): from os.path import dirname, join - data_dir = join(dirname(__file__), 'data') - filename = join(data_dir, 'diabetes.txt') + + data_dir = join(dirname(__file__), "data") + filename = join(data_dir, "diabetes.txt") model = Model(filename) assert isinstance(model.data, pd.DataFrame) assert model.data.shape == (442, 11) - assert 'BMI' in model.data.columns + assert "BMI" in model.data.columns def test_model_term_names_property(diabetes_data): model = Model(diabetes_data) - model.add('BMI ~ age_grp') - model.add('BP') - model.add('S1') - model.build(backend='pymc') - assert model.term_names == ['Intercept', 'age_grp', 'BP', 'S1'] + model.add("BMI ~ age_grp") + model.add("BP") + model.add("S1") + model.build(backend="pymc") + assert model.term_names == ["Intercept", "age_grp", "BP", "S1"] def test_add_to_model(diabetes_data): model = Model(diabetes_data) - model.add('BP ~ BMI') - model.build(backend='pymc') - assert isinstance(model.terms['BMI'], Term) - model.add('age_grp') - model.build(backend='pymc') - assert set(model.terms.keys()) == {'Intercept' ,'BMI', 'age_grp'} + model.add("BP ~ BMI") + model.build(backend="pymc") + assert isinstance(model.terms["BMI"], Term) + model.add("age_grp") + model.build(backend="pymc") + assert set(model.terms.keys()) == {"Intercept", "BMI", "age_grp"} # Test that arguments are passed appropriately onto Term initializer - model.add(random='C(age_grp)|BP') - model.build(backend='pymc') - assert isinstance(model.terms['C(age_grp)[T.1]|BP'], Term) - assert 'BP[108.0]' in model.terms['C(age_grp)[T.1]|BP'].levels + model.add(random="C(age_grp)|BP") + model.build(backend="pymc") + assert isinstance(model.terms["C(age_grp)[T.1]|BP"], Term) + assert "BP[108.0]" in model.terms["C(age_grp)[T.1]|BP"].levels def test_one_shot_formula_fit(diabetes_data): model = Model(diabetes_data) - model.fit('S3 ~ S1 + S2', samples=50, run=False) - model.build(backend='pymc3') + model.fit("S3 ~ S1 + S2", samples=50, run=False) + model.build(backend="pymc3") nv = model.backend.model.named_vars - targets = ['S3', 'S1', 'Intercept'] + targets = ["S3", "S1", "Intercept"] assert len(set(nv.keys()) & set(targets)) == 3 def test_invalid_chars_in_random_effect(diabetes_data): model = Model(diabetes_data) with pytest.raises(ValueError): - model.fit(random=['1+BP|age_grp']) + model.fit(random=["1+BP|age_grp"]) def test_add_formula_append(diabetes_data): model = Model(diabetes_data) - model.add('S3 ~ 0') - model.add('S1') - model.build(backend='pymc') - assert hasattr(model, 'y') and model.y is not None and model.y.name == 'S3' - assert 'S1' in model.terms - model.add('S2', append=False) + model.add("S3 ~ 0") + model.add("S1") + model.build(backend="pymc") + assert hasattr(model, "y") and model.y is not None and model.y.name == "S3" + assert "S1" in model.terms + model.add("S2", append=False) assert model.y is None - model.add('S3 ~ 0') - model.build(backend='pymc') - assert 'S2' in model.terms - assert 'S1' not in model.terms + model.add("S3 ~ 0") + model.build(backend="pymc") + assert "S2" in model.terms + assert "S1" not in model.terms def test_derived_term_search(diabetes_data): model = Model(diabetes_data) - model.add('BMI ~ 1', random='age_grp|BP', categorical=['age_grp']) - model.build(backend='pymc') - terms = model._match_derived_terms('age_grp|BP') + model.add("BMI ~ 1", random="age_grp|BP", categorical=["age_grp"]) + model.build(backend="pymc") + terms = model._match_derived_terms("age_grp|BP") names = set([t.name for t in terms]) - assert names == {'1|BP', 'age_grp[T.1]|BP', 'age_grp[T.2]|BP'} + assert names == {"1|BP", "age_grp[T.1]|BP", "age_grp[T.2]|BP"} - term = model._match_derived_terms('1|BP')[0] - assert term.name == '1|BP' + term = model._match_derived_terms("1|BP")[0] + assert term.name == "1|BP" # All of these should find nothing - assert model._match_derived_terms('1|ZZZ') is None - assert model._match_derived_terms('ZZZ|BP') is None - assert model._match_derived_terms('BP') is None - assert model._match_derived_terms('BP') is None + assert model._match_derived_terms("1|ZZZ") is None + assert model._match_derived_terms("ZZZ|BP") is None + assert model._match_derived_terms("BP") is None + assert model._match_derived_terms("BP") is None diff --git a/bambi/tests/test_priors.py b/bambi/tests/test_priors.py index 17f6f9242..b4d98d40c 100644 --- a/bambi/tests/test_priors.py +++ b/bambi/tests/test_priors.py @@ -1,10 +1,12 @@ -import pytest -from bambi.models import Model -from bambi.priors import Prior, Family, PriorFactory -from os.path import dirname, join import json -import pandas as pd +from os.path import dirname, join + import numpy as np +import pandas as pd +import pytest + +from bambi.models import Model +from bambi.priors import Family, Prior, PriorFactory @pytest.fixture(scope="module") @@ -130,7 +132,9 @@ def test_update_term_priors_after_init(diabetes_data): model.set_priors({"age_grp|BP": 0.5}) model.build(backend="pymc") assert model.terms["age_grp[T.1]|BP"].prior.scale == 0.5 - assert np.isclose(model.terms["age_grp[T.1]|BP"].prior.args["sigma"].args["sigma"], 94, rtol=0.2) + assert np.isclose( + model.terms["age_grp[T.1]|BP"].prior.args["sigma"].args["sigma"], 94, rtol=0.2 + ) assert model.terms["1|BP"].prior.scale == 0.5 diff --git a/bambi/tests/test_utils.py b/bambi/tests/test_utils.py index 4523d4976..ed212a936 100644 --- a/bambi/tests/test_utils.py +++ b/bambi/tests/test_utils.py @@ -1,6 +1,7 @@ from bambi.utils import listify + def test_listify(): assert listify(None) == [] assert listify([1, 2, 3]) == [1, 2, 3] - assert listify('giraffe') == ['giraffe'] \ No newline at end of file + assert listify("giraffe") == ["giraffe"] diff --git a/bambi/utils.py b/bambi/utils.py index 39dddfc0a..a5aeee825 100644 --- a/bambi/utils.py +++ b/bambi/utils.py @@ -1,6 +1,8 @@ def listify(obj): - """ Wraps all non-list or tuple objects in a list; provides a simple - way to accept flexible arguments. """ + """Wrap all non-list or tuple objects in a list. + + Provides a simple way to accept flexible arguments. + """ if obj is None: return [] else: diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..8075794dd --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[tool.black] +line-length = 100 +target-version = ['py37', 'py38'] diff --git a/requirements-dev.txt b/requirements-dev.txt index 0a9135d90..0defb5eab 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -5,4 +5,4 @@ pytest-cov>=2.6.1 seaborn>=0.9.0 sphinx>=1.8 nbsphinx>=0.4.2 -black; python_version == '3.7' +black==19.10b0 diff --git a/scripts/create_testenv.sh b/scripts/create_testenv.sh index 33d7eaa4d..1db6675d1 100644 --- a/scripts/create_testenv.sh +++ b/scripts/create_testenv.sh @@ -21,7 +21,7 @@ if [[ $* != *--global* ]]; then fi fi -conda install --yes pip pytest numpy matplotlib pandas patsy statsmodels +conda install --yes pip pytest numpy matplotlib pandas patsy statsmodels black pip install --upgrade pip diff --git a/scripts/lint.sh b/scripts/lint.sh index 2c6277ffb..c360c7981 100755 --- a/scripts/lint.sh +++ b/scripts/lint.sh @@ -1,7 +1,10 @@ #!/bin/bash - set -ex # fail on first error, print commands +echo "Checking code style with black...." +python -m black bambi --check +echo "Success!" + echo "Checking code style with pylint..." python -m pylint bambi/ echo "Success!" diff --git a/scripts/test.sh b/scripts/test.sh index 6840a70be..d9f7af136 100755 --- a/scripts/test.sh +++ b/scripts/test.sh @@ -1,10 +1,7 @@ #!/bin/bash - set -ex # fail on first error, print commands -echo "Checking code style with pylint..." -python -m pylint bambi/ -echo "Success!" +scripts/lint.sh echo "Running unit tests..." python -m pytest -vx --cov=bambi