diff --git a/.circleci/config.yml b/.circleci/config.yml index 65a872c227..72b250b88a 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -8,7 +8,7 @@ jobs: - checkout - run: pyenv local 2.7.15 - run: make installdeps lint - - run: tox -e py27 + - run: make test "python-3.5": working_directory: ~/featuretools @@ -18,7 +18,7 @@ jobs: - checkout - run: pyenv local 3.5.6 - run: make installdeps lint - - run: tox -e py35 + - run: make test "python-3.6": working_directory: ~/featuretools @@ -30,7 +30,8 @@ jobs: - run: pyenv local 3.6.6 - run: make installdeps lint - run: make -C docs/ -e "SPHINXOPTS=-W" clean html - - run: tox -e clean,py36 && codecov + - run: make test + - run: coverage erase && coverage && codecov "python-3.7": working_directory: ~/featuretools @@ -40,7 +41,7 @@ jobs: - checkout - run: pyenv local 3.7.0 - run: make installdeps lint - - run: tox -e py37 + - run: make test workflows: version: 2 diff --git a/.gitignore b/.gitignore index 6378506740..f6a654cadf 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ featuretools/tests/integration_data/products.gzip featuretools/tests/integration_data/regions.gzip featuretools/tests/integration_data/sessions.gzip featuretools/tests/integration_data/stores.gzip +*.dirlock # Byte-compiled / optimized / DLL files __pycache__/ @@ -127,4 +128,4 @@ ENV/ .pytest_cache #IDE -.vscode \ No newline at end of file +.vscode diff --git a/Makefile b/Makefile index 2572581b91..2d3015038b 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,3 @@ -TEST_CMD=setup.py test --addopts --boxed clean: find . -name '*.pyo' -delete find . -name '*.pyc' -delete @@ -9,7 +8,7 @@ lint: flake8 featuretools && isort --check-only --recursive featuretools test: lint - python $(TEST_CMD) + pytest featuretools/tests installdeps: pip install --upgrade pip diff --git a/dev-requirements.txt b/dev-requirements.txt index c355c23f45..b03c589538 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -10,4 +10,3 @@ matplotlib==2.2.3; python_version<'3' nbsphinx==0.3.5 Sphinx==1.8.1 sphinx_rtd_theme==0.4.2 -tox==3.5.3 diff --git a/featuretools/__init__.py b/featuretools/__init__.py index 1d90c77fe8..85b717c408 100644 --- a/featuretools/__init__.py +++ b/featuretools/__init__.py @@ -5,7 +5,7 @@ from .entityset.api import * from . import primitives from .synthesis.api import * -from .primitives import Feature, list_primitives +from .primitives import list_primitives, install_primitives, Feature from .computational_backends.api import * from . import tests from .utils.pickle_utils import * diff --git a/featuretools/__main__.py b/featuretools/__main__.py new file mode 100644 index 0000000000..eca4558f8d --- /dev/null +++ b/featuretools/__main__.py @@ -0,0 +1,39 @@ +import click +import pandas as pd + +import featuretools +from featuretools.primitives import install_primitives +from featuretools.primitives.install import get_featuretools_root + + +@click.group() +def cli(): + pass + + +@click.command() +def info(): + print("Featuretools version: %s" % featuretools.__version__) + print("Featuretools installation directory: %s" % get_featuretools_root()) + + +@click.command() +@click.option('--prompt/--no-prompt', default=True, help='Confirm primitives before installing') +@click.argument('directory') +def install(prompt, directory): + install_primitives(directory, prompt) + + +@click.command() +def list_primitives(): + with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.max_colwidth', -1, 'display.width', 1000): + print(featuretools.list_primitives()) + + +cli.add_command(install) +cli.add_command(list_primitives) +cli.add_command(info) + + +if __name__ == "__main__": + cli() diff --git a/featuretools/computational_backends/feature_tree.py b/featuretools/computational_backends/feature_tree.py index 7625fec6e1..4d7453a90f 100644 --- a/featuretools/computational_backends/feature_tree.py +++ b/featuretools/computational_backends/feature_tree.py @@ -8,7 +8,7 @@ from featuretools import variable_types from featuretools.exceptions import UnknownFeature -from featuretools.primitives import ( +from featuretools.primitives.base import ( AggregationPrimitive, DirectFeature, IdentityFeature, diff --git a/featuretools/computational_backends/pandas_backend.py b/featuretools/computational_backends/pandas_backend.py index 959a35d3b5..ce5719553e 100644 --- a/featuretools/computational_backends/pandas_backend.py +++ b/featuretools/computational_backends/pandas_backend.py @@ -14,7 +14,7 @@ from featuretools import variable_types from featuretools.exceptions import UnknownFeature -from featuretools.primitives import ( +from featuretools.primitives.base import ( AggregationPrimitive, DirectFeature, IdentityFeature, diff --git a/featuretools/computational_backends/utils.py b/featuretools/computational_backends/utils.py index bb7c2fc679..c4e3c24951 100644 --- a/featuretools/computational_backends/utils.py +++ b/featuretools/computational_backends/utils.py @@ -10,7 +10,7 @@ from distributed import Client, LocalCluster from pandas.tseries.frequencies import to_offset -from featuretools.primitives import AggregationPrimitive, DirectFeature +from featuretools.primitives.base import AggregationPrimitive, DirectFeature from featuretools.utils.wrangle import _check_timedelta logger = logging.getLogger('featuretools.computational_backend') diff --git a/featuretools/primitives/api.py b/featuretools/primitives/api.py index 20747f3919..20ef1bc1f7 100644 --- a/featuretools/primitives/api.py +++ b/featuretools/primitives/api.py @@ -1,13 +1,12 @@ # flake8: noqa -from .aggregation_primitive_base import * -from .aggregation_primitives import * -from .binary_transform import * -from .cum_transform_feature import * -from .direct_feature import * -from .primitive_base import * -from .transform_primitive import * +from .base import Feature +from .install import install_primitives +from .installed import * +from .standard.aggregation_primitives import * +from .standard.binary_transform import * +from .standard.cum_transform_feature import * +from .standard.transform_primitive import * from .utils import ( - apply_dual_op_from_feat, get_aggregation_primitives, get_transform_primitives, list_primitives diff --git a/featuretools/primitives/base/__init__.py b/featuretools/primitives/base/__init__.py new file mode 100644 index 0000000000..7a3213ec20 --- /dev/null +++ b/featuretools/primitives/base/__init__.py @@ -0,0 +1,2 @@ +# flake8: noqa +from .api import * diff --git a/featuretools/primitives/aggregation_primitive_base.py b/featuretools/primitives/base/aggregation_primitive_base.py similarity index 100% rename from featuretools/primitives/aggregation_primitive_base.py rename to featuretools/primitives/base/aggregation_primitive_base.py diff --git a/featuretools/primitives/base/api.py b/featuretools/primitives/base/api.py new file mode 100644 index 0000000000..826ae121ca --- /dev/null +++ b/featuretools/primitives/base/api.py @@ -0,0 +1,12 @@ +# flake8: noqa +from .aggregation_primitive_base import ( + AggregationPrimitive, + make_agg_primitive +) +from .primitive_base import ( + DirectFeature, + Feature, + IdentityFeature, + PrimitiveBase +) +from .transform_primitive_base import TransformPrimitive, make_trans_primitive diff --git a/featuretools/primitives/primitive_base.py b/featuretools/primitives/base/primitive_base.py similarity index 83% rename from featuretools/primitives/primitive_base.py rename to featuretools/primitives/base/primitive_base.py index 7ce984adf0..1b729924b6 100644 --- a/featuretools/primitives/primitive_base.py +++ b/featuretools/primitives/base/primitive_base.py @@ -102,7 +102,6 @@ def entity(self): # P TODO: this should get refactored to return_type @property def variable_type(self): - from . import direct_feature feature = self return_type = self.return_type @@ -120,7 +119,7 @@ def variable_type(self): # direct features should keep the Id return type, but all other features should get # converted to Categorical - if not isinstance(feature, direct_feature.DirectFeature) and return_type == Id: + if not isinstance(feature, DirectFeature) and return_type == Id: return_type = Categorical feature = base_feature @@ -161,7 +160,7 @@ def __eq__(self, other_feature_or_val): See also: :meth:`PrimitiveBase.equal_to` """ - from .binary_transform import Equals + from featuretools.primitives import Equals return Equals(self, other_feature_or_val) def __ne__(self, other_feature_or_val): @@ -170,7 +169,7 @@ def __ne__(self, other_feature_or_val): See also: :meth:`PrimitiveBase.not_equal_to` """ - from .binary_transform import NotEquals + from featuretools.primitives import NotEquals return NotEquals(self, other_feature_or_val) def __gt__(self, other_feature_or_val): @@ -179,7 +178,7 @@ def __gt__(self, other_feature_or_val): See also: :meth:`PrimitiveBase.GT` """ - from .binary_transform import GreaterThan + from featuretools.primitives import GreaterThan return GreaterThan(self, other_feature_or_val) def __ge__(self, other_feature_or_val): @@ -188,7 +187,7 @@ def __ge__(self, other_feature_or_val): See also: :meth:`PrimitiveBase.greater_than_equal_to` """ - from .binary_transform import GreaterThanEqualTo + from featuretools.primitives import GreaterThanEqualTo return GreaterThanEqualTo(self, other_feature_or_val) def __lt__(self, other_feature_or_val): @@ -197,7 +196,7 @@ def __lt__(self, other_feature_or_val): See also: :meth:`PrimitiveBase.less_than` """ - from .binary_transform import LessThan + from featuretools.primitives import LessThan return LessThan(self, other_feature_or_val) def __le__(self, other_feature_or_val): @@ -206,16 +205,16 @@ def __le__(self, other_feature_or_val): See also: :meth:`PrimitiveBase.less_than_equal_to` """ - from .binary_transform import LessThanEqualTo + from featuretools.primitives import LessThanEqualTo return LessThanEqualTo(self, other_feature_or_val) def __add__(self, other_feature_or_val): """Add other_feature_or_val""" - from .binary_transform import Add + from featuretools.primitives import Add return Add(self, other_feature_or_val) def __radd__(self, other): - from .binary_transform import Add + from featuretools.primitives import Add return Add(other, self) def __sub__(self, other_feature_or_val): @@ -224,11 +223,11 @@ def __sub__(self, other_feature_or_val): See also: :meth:`PrimitiveBase.subtract` """ - from .binary_transform import Subtract + from featuretools.primitives import Subtract return Subtract(self, other_feature_or_val) def __rsub__(self, other): - from .binary_transform import Subtract + from featuretools.primitives import Subtract return Subtract(other, self) def __div__(self, other_feature_or_val): @@ -237,18 +236,18 @@ def __div__(self, other_feature_or_val): See also: :meth:`PrimitiveBase.divide` """ - from .binary_transform import Divide + from featuretools.primitives import Divide return Divide(self, other_feature_or_val) def __truediv__(self, other_feature_or_val): return self.__div__(other_feature_or_val) def __rtruediv__(self, other_feature_or_val): - from .binary_transform import Divide + from featuretools.primitives import Divide return Divide(other_feature_or_val, self) def __rdiv__(self, other_feature_or_val): - from .binary_transform import Divide + from featuretools.primitives import Divide return Divide(other_feature_or_val, self) def __mul__(self, other_feature_or_val): @@ -257,11 +256,11 @@ def __mul__(self, other_feature_or_val): See also: :meth:`PrimitiveBase.multiply` """ - from .binary_transform import Multiply + from featuretools.primitives import Multiply return Multiply(self, other_feature_or_val) def __rmul__(self, other): - from .binary_transform import Multiply + from featuretools.primitives import Multiply return Multiply(other, self) def __mod__(self, other_feature_or_val): @@ -270,64 +269,64 @@ def __mod__(self, other_feature_or_val): See also: :meth:`PrimitiveBase.modulo` """ - from .binary_transform import Mod + from featuretools.primitives import Mod return Mod(self, other_feature_or_val) def __and__(self, other): return self.AND(other) def __rand__(self, other): - from .binary_transform import And + from featuretools.primitives import And return And(other, self) def __or__(self, other): return self.OR(other) def __ror__(self, other): - from .binary_transform import Or + from featuretools.primitives import Or return Or(other, self) def __not__(self, other): return self.NOT(other) def __abs__(self): - from .transform_primitive import Absolute + from featuretools.primitives import Absolute return Absolute(self) def __neg__(self): - from .binary_transform import Negate + from featuretools.primitives import Negate return Negate(self) def AND(self, other_feature): """Logical AND with other_feature""" - from .binary_transform import And + from featuretools.primitives import And return And(self, other_feature) def OR(self, other_feature): """Logical OR with other_feature""" - from .binary_transform import Or + from featuretools.primitives import Or return Or(self, other_feature) def NOT(self): """Creates inverse of feature""" - from .transform_primitive import Not - from .binary_transform import Compare + from featuretools.primitives import Not + from featuretools.primitives import Compare if isinstance(self, Compare): return self.invert() return Not(self) def LIKE(self, like_string, case_sensitive=False): - from .transform_primitive import Like + from featuretools.primitives import Like return Like(self, like_string, case_sensitive=case_sensitive) def isin(self, list_of_output): - from .transform_primitive import IsIn + from featuretools.primitives import IsIn return IsIn(self, list_of_outputs=list_of_output) def is_null(self): """Compares feature to null by equality""" - from .transform_primitive import IsNull + from featuretools.primitives import IsNull return IsNull(self) def __invert__(self): @@ -465,14 +464,48 @@ def get_depth(self, stop_at=None): return 0 +class DirectFeature(PrimitiveBase): + """Feature for child entity that inherits + a feature value from a parent entity""" + input_types = [Variable] + return_type = None + + def __init__(self, base_feature, child_entity): + base_feature = self._check_feature(base_feature) + if base_feature.expanding: + self.expanding = True + + path = child_entity.entityset.find_forward_path(child_entity.id, base_feature.entity.id) + if len(path) > 1: + parent_entity_id = path[1].child_entity.id + parent_entity = child_entity.entityset[parent_entity_id] + parent_feature = DirectFeature(base_feature, parent_entity) + else: + parent_feature = base_feature + + self.parent_entity = parent_feature.entity + self._variable_type = parent_feature.variable_type + super(DirectFeature, self).__init__(child_entity, [parent_feature]) + + @property + def default_value(self): + return self.base_features[0].default_value + + @property + def variable(self): + return getattr(self.base_features[0], 'variable', None) + + def generate_name(self): + return u"%s.%s" % (self.parent_entity.id, + self.base_features[0].get_name()) + + class Feature(PrimitiveBase): """ Alias for IdentityFeature and DirectFeature depending on arguments """ def __new__(self, feature_or_var, entity=None): - from . import direct_feature - if entity is None: assert isinstance(feature_or_var, (Variable)) return IdentityFeature(feature_or_var) @@ -483,4 +516,4 @@ def __new__(self, feature_or_var, entity=None): if feature_or_var.entity.id == entity.id: return IdentityFeature(entity) - return direct_feature.DirectFeature(feature_or_var, entity) + return DirectFeature(feature_or_var, entity) diff --git a/featuretools/primitives/base/transform_primitive_base.py b/featuretools/primitives/base/transform_primitive_base.py new file mode 100644 index 0000000000..85ded78d53 --- /dev/null +++ b/featuretools/primitives/base/transform_primitive_base.py @@ -0,0 +1,126 @@ +import copy +import functools + +from .primitive_base import PrimitiveBase +from .utils import inspect_function_args + + +class TransformPrimitive(PrimitiveBase): + """Feature for entity that is a based off one or more other features + in that entity.""" + rolling_function = False + + def __init__(self, *base_features): + # Any edits made to this method should also be made to the + # new_class_init method in make_trans_primitive + self.base_features = [self._check_feature(f) for f in base_features] + if any(bf.expanding for bf in self.base_features): + self.expanding = True + assert len(set([f.entity for f in self.base_features])) == 1, \ + "More than one entity for base features" + super(TransformPrimitive, self).__init__(self.base_features[0].entity, + self.base_features) + + def generate_name(self): + name = u"{}(".format(self.name.upper()) + name += u", ".join(f.get_name() for f in self.base_features) + name += u")" + return name + + @property + def default_value(self): + return self.base_features[0].default_value + + +def make_trans_primitive(function, input_types, return_type, name=None, + description='A custom transform primitive', + cls_attributes=None, uses_calc_time=False, + commutative=False): + '''Returns a new transform primitive class + + Args: + function (function): Function that takes in an array and applies some + transformation to it, returning an array. + + input_types (list[Variable]): Variable types of the inputs. + + return_type (Variable): Variable type of return. + + name (str): Name of the primitive. If no name is provided, the name + of `function` will be used. + + description (str): Description of primitive. + + cls_attributes (dict[str -> anytype]): Custom attributes to be added to + class. Key is attribute name, value is the attribute value. + + uses_calc_time (bool): If True, the cutoff time the feature is being + calculated at will be passed to the function as the keyword + argument 'time'. + + commutative (bool): If True, will only make one feature per unique set + of base features. + + Example: + .. ipython :: python + + from featuretools.primitives import make_trans_primitive + from featuretools.variable_types import Variable, Boolean + + def pd_is_in(array, list_of_outputs=None): + if list_of_outputs is None: + list_of_outputs = [] + return pd.Series(array).isin(list_of_outputs) + + def isin_generate_name(self): + return u"%s.isin(%s)" % (self.base_features[0].get_name(), + str(self.kwargs['list_of_outputs'])) + + IsIn = make_trans_primitive( + function=pd_is_in, + input_types=[Variable], + return_type=Boolean, + name="is_in", + description="For each value of the base feature, checks " + "whether it is in a list that provided.", + cls_attributes={"generate_name": isin_generate_name}) + ''' + # dictionary that holds attributes for class + cls = {"__doc__": description} + if cls_attributes is not None: + cls.update(cls_attributes) + + # creates the new class and set name and types + name = name or function.__name__ + new_class = type(name, (TransformPrimitive,), cls) + new_class.name = name + new_class.input_types = input_types + new_class.return_type = return_type + new_class.commutative = commutative + new_class, default_kwargs = inspect_function_args(new_class, + function, + uses_calc_time) + + if len(default_kwargs) > 0: + new_class.default_kwargs = default_kwargs + + def new_class_init(self, *args, **kwargs): + self.kwargs = copy.deepcopy(self.default_kwargs) + self.base_features = [self._check_feature(f) for f in args] + if any(bf.expanding for bf in self.base_features): + self.expanding = True + assert len(set([f.entity for f in self.base_features])) == 1, \ + "More than one entity for base features" + self.kwargs.update(kwargs) + self.partial = functools.partial(function, **self.kwargs) + self.partial.__name__ = name + + super(TransformPrimitive, self).__init__( + self.base_features[0].entity, self.base_features) + new_class.__init__ = new_class_init + new_class.get_function = lambda self: self.partial + else: + # creates a lambda function that returns function every time + new_class.get_function = lambda self, f=function: f + + return new_class diff --git a/featuretools/primitives/base/utils.py b/featuretools/primitives/base/utils.py new file mode 100644 index 0000000000..74b1e801bd --- /dev/null +++ b/featuretools/primitives/base/utils.py @@ -0,0 +1,25 @@ +try: + # python 3.7 deprecated getargspec + from inspect import getfullargspec as getargspec +except ImportError: + # python 2.7 - 3.6 backwards compatibility import + from inspect import getargspec + + +def inspect_function_args(new_class, function, uses_calc_time): + # inspect function to see if there are keyword arguments + argspec = getargspec(function) + kwargs = {} + if argspec.defaults is not None: + lowest_kwargs_position = len(argspec.args) - len(argspec.defaults) + + for i, arg in enumerate(argspec.args): + if arg == 'time': + if not uses_calc_time: + raise ValueError("'time' is a restricted keyword. Please" + " use a different keyword.") + else: + new_class.uses_calc_time = True + if argspec.defaults is not None and i >= lowest_kwargs_position: + kwargs[arg] = argspec.defaults[i - lowest_kwargs_position] + return new_class, kwargs diff --git a/featuretools/primitives/direct_feature.py b/featuretools/primitives/direct_feature.py deleted file mode 100644 index e721c09530..0000000000 --- a/featuretools/primitives/direct_feature.py +++ /dev/null @@ -1,39 +0,0 @@ -from .primitive_base import PrimitiveBase - -from featuretools.variable_types import Variable - - -class DirectFeature(PrimitiveBase): - """Feature for child entity that inherits - a feature value from a parent entity""" - input_types = [Variable] - return_type = None - - def __init__(self, base_feature, child_entity): - base_feature = self._check_feature(base_feature) - if base_feature.expanding: - self.expanding = True - - path = child_entity.entityset.find_forward_path(child_entity.id, base_feature.entity.id) - if len(path) > 1: - parent_entity_id = path[1].child_entity.id - parent_entity = child_entity.entityset[parent_entity_id] - parent_feature = DirectFeature(base_feature, parent_entity) - else: - parent_feature = base_feature - - self.parent_entity = parent_feature.entity - self._variable_type = parent_feature.variable_type - super(DirectFeature, self).__init__(child_entity, [parent_feature]) - - @property - def default_value(self): - return self.base_features[0].default_value - - @property - def variable(self): - return getattr(self.base_features[0], 'variable', None) - - def generate_name(self): - return u"%s.%s" % (self.parent_entity.id, - self.base_features[0].get_name()) diff --git a/featuretools/primitives/install.py b/featuretools/primitives/install.py new file mode 100644 index 0000000000..e5c5df338b --- /dev/null +++ b/featuretools/primitives/install.py @@ -0,0 +1,186 @@ +import os +import shutil +import sys +import tarfile +from builtins import input +from inspect import isclass + +import s3fs +from botocore.exceptions import NoCredentialsError +from smart_open import smart_open +from tqdm import tqdm + +from .base.primitive_base import PrimitiveBase + +import featuretools + +IS_PY2 = (sys.version_info[0] == 2) + + +if IS_PY2: + import imp +else: + import importlib.util + +if IS_PY2: + from six.moves.urllib.parse import urlparse +else: + from urllib.parse import urlparse + + +def install_primitives(directory_or_archive, prompt=True): + """Install primitives from the provided directory""" + tmp_dir = get_installation_temp_dir() + + # if it isn't local, download it. if remote, it must be archive + if not (os.path.isdir(directory_or_archive) or os.path.isfile(directory_or_archive)): + directory_or_archive = download_archive(directory_or_archive) + + # if archive, extract directory to temp folders + if os.path.isfile(directory_or_archive): + directory = extract_archive(directory_or_archive) + else: + directory = directory_or_archive + + # Iterate over all the files and determine the primitives to install + files = list_primitive_files(directory) + all_primitives = {} + files_to_copy = [] + for filepath in files: + primitive_name, primitive_obj = load_primitive_from_file(filepath) + files_to_copy.append(filepath) + all_primitives[primitive_name] = primitive_obj + + # before installing, confirm with user + primitives_list = ", ".join(all_primitives.keys()) + if prompt: + while True: + resp = input("Install primitives: %s? (Y/n) " % primitives_list) + if resp == "Y": + break + elif resp == "n": + return + else: + print("Installing primitives: %s" % primitives_list) + + # copy the files + installation_dir = get_installation_dir() + for to_copy in tqdm(files_to_copy): + shutil.copy2(to_copy, installation_dir) + + # clean up tmp dir + if os.path.exists(tmp_dir): + shutil.rmtree(tmp_dir) + + +def get_featuretools_root(): + return os.path.dirname(featuretools.__file__) + + +def get_installation_dir(): + "return the path to the installation directory with in featuretools" + installation_dir = os.path.join(get_featuretools_root(), "primitives", "installed") + return installation_dir + + +def get_installation_temp_dir(): + """Returns the path to the installation directory with in featuretools. + + If the directory, doesn't exist it is created + """ + tmp_dir = os.path.join(get_installation_dir(), ".tmp/") + if not os.path.exists(tmp_dir): + os.makedirs(tmp_dir) + return os.path.join(get_installation_dir(), ".tmp/") + + +def download_archive(uri): + # determine where to save locally + filename = os.path.basename(urlparse(uri).path) + local_archive = os.path.join(get_installation_temp_dir(), filename) + + with open(local_archive, 'wb') as f: + try: + remote_archive = smart_open(uri, 'rb', ignore_extension=True) + except NoCredentialsError: + # fallback to anonymous using s3fs + s3 = s3fs.S3FileSystem(anon=True) + remote_archive = s3.open(uri, 'rb') + + for line in remote_archive: + f.write(line) + + remote_archive.close() + + return local_archive + + +def extract_archive(filepath): + if (filepath.endswith("tar.gz")): + tar = tarfile.open(filepath, mode='r:gz') + elif (filepath.endswith("tar")): + tar = tarfile.open(filepath, "r:") + else: + e = "Cannot extract archive from %s." % filepath + e += " Must provide archive ending in .tar or .tar.gz" + raise RuntimeError(e) + + tmp_dir = get_installation_temp_dir() + members = [m for m in tar.getmembers() if check_valid_primitive_path(m.path)] + tar.extractall(tmp_dir, members=members) + tar.close() + + # figure out the directory name from any file in archive + directory = os.path.join(tmp_dir, os.path.dirname(members[0].path)) + + return directory + + +def list_primitive_files(directory): + """returns list of files in directory that might contain primitives""" + files = os.listdir(directory) + keep = [] + for path in files: + if not check_valid_primitive_path(path): + continue + keep.append(os.path.join(directory, path)) + return keep + + +def check_valid_primitive_path(path): + if os.path.isdir(path): + return False + + filename = os.path.basename(path) + + if filename[:2] == "__" or filename[0] == "." or filename[-3:] != ".py": + return False + + return True + + +def load_primitive_from_file(filepath): + """load primitive objects in a file""" + module = os.path.basename(filepath)[:-3] + if IS_PY2: + # for python 2.7 + module = imp.load_source(module, filepath) + else: + # TODO: what is the first argument"? + # for python >3.5 + spec = importlib.util.spec_from_file_location(module, filepath) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + + primitives = [] + for primitive_name in vars(module): + primitive_class = getattr(module, primitive_name) + if isclass(primitive_class) and issubclass(primitive_class, PrimitiveBase): + primitives.append((primitive_name, primitive_class)) + + if len(primitives) == 0: + raise RuntimeError("No primitive defined in file %s" % filepath) + elif len(primitives) > 1: + raise RuntimeError("More than one primitive defined in file %s" % filepath) + + return primitives[0] diff --git a/featuretools/primitives/installed/__init__.py b/featuretools/primitives/installed/__init__.py new file mode 100644 index 0000000000..9bfa35734f --- /dev/null +++ b/featuretools/primitives/installed/__init__.py @@ -0,0 +1,10 @@ +# flake8: noqa +from ..install import load_primitive_from_file, list_primitive_files, get_installation_dir + + +# iterate over files in installed, import class that are right subclass +installed_dir = get_installation_dir() +files = list_primitive_files(installed_dir) +for filepath in files: + primitive_name, primitive_obj = load_primitive_from_file(filepath) + globals()[primitive_name] = primitive_obj diff --git a/featuretools/primitives/standard/__init__.py b/featuretools/primitives/standard/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/featuretools/primitives/aggregation_primitives.py b/featuretools/primitives/standard/aggregation_primitives.py similarity index 90% rename from featuretools/primitives/aggregation_primitives.py rename to featuretools/primitives/standard/aggregation_primitives.py index fcd435eaa4..b02fce0d31 100644 --- a/featuretools/primitives/aggregation_primitives.py +++ b/featuretools/primitives/standard/aggregation_primitives.py @@ -6,7 +6,7 @@ import numpy as np import pandas as pd -from .aggregation_primitive_base import ( +from ..base.aggregation_primitive_base import ( AggregationPrimitive, make_agg_primitive ) @@ -356,39 +356,6 @@ def pd_trend(y, x): return pd_trend -# # TODO: Not implemented yet -# class ConseqPos(AggregationPrimitive): -# name = "conseq_pos" -# input_types = [(variable_types.Numeric,), -# (variable_types.Ordinal,)] -# return_type = variable_types.Numeric -# max_stack_depth = 1 -# stack_on = [] -# stack_on_exclude = [] - -# def get_function(self): -# raise NotImplementedError("This feature has not been implemented") - - -# # TODO: Not implemented yet -# class ConseqSame(AggregationPrimitive): -# name = "conseq_same" -# input_types = [(variable_types.Categorical,), -# (variable_types.Ordinal,), -# (variable_types.Numeric,)] -# return_type = variable_types.Numeric -# max_stack_depth = 1 -# stack_on = [] -# stack_on_exclude = [] - -# def get_function(self): -# raise NotImplementedError("This feature has not been implemented") - - -# # TODO: Not implemented yet -# class TimeSinceLast(AggregationPrimitive): - - def convert_datetime_to_floats(x): first = int(x.iloc[0].value * 1e-9) x = pd.to_numeric(x).astype(np.float64).values diff --git a/featuretools/primitives/binary_transform.py b/featuretools/primitives/standard/binary_transform.py similarity index 98% rename from featuretools/primitives/binary_transform.py rename to featuretools/primitives/standard/binary_transform.py index aba3f33c95..e82a91e7bd 100644 --- a/featuretools/primitives/binary_transform.py +++ b/featuretools/primitives/standard/binary_transform.py @@ -3,8 +3,8 @@ import numpy as np -from .primitive_base import PrimitiveBase -from .transform_primitive import TransformPrimitive +from ..base.primitive_base import PrimitiveBase +from ..base.transform_primitive_base import TransformPrimitive from .utils import apply_dual_op_from_feat from featuretools.variable_types import ( diff --git a/featuretools/primitives/cum_transform_feature.py b/featuretools/primitives/standard/cum_transform_feature.py similarity index 98% rename from featuretools/primitives/cum_transform_feature.py rename to featuretools/primitives/standard/cum_transform_feature.py index d4aa9eefe3..1a85372a89 100644 --- a/featuretools/primitives/cum_transform_feature.py +++ b/featuretools/primitives/standard/cum_transform_feature.py @@ -4,9 +4,9 @@ import numpy as np import pandas as pd +from ..base.primitive_base import IdentityFeature, PrimitiveBase +from ..base.transform_primitive_base import TransformPrimitive from .aggregation_primitives import Count, Max, Mean, Min, Sum -from .primitive_base import IdentityFeature, PrimitiveBase -from .transform_primitive import TransformPrimitive from .utils import apply_dual_op_from_feat from featuretools.utils import is_string diff --git a/featuretools/primitives/transform_primitive.py b/featuretools/primitives/standard/transform_primitive.py similarity index 68% rename from featuretools/primitives/transform_primitive.py rename to featuretools/primitives/standard/transform_primitive.py index 80c6345ebd..4d761d53de 100644 --- a/featuretools/primitives/transform_primitive.py +++ b/featuretools/primitives/standard/transform_primitive.py @@ -1,16 +1,14 @@ from __future__ import division -import copy -import datetime -import functools -import os from builtins import str import numpy as np import pandas as pd -from .primitive_base import PrimitiveBase -from .utils import inspect_function_args +from ..base.transform_primitive_base import ( + TransformPrimitive, + make_trans_primitive +) from featuretools.variable_types import ( Boolean, @@ -26,131 +24,6 @@ Variable ) -current_path = os.path.dirname(os.path.realpath(__file__)) -FEATURE_DATASETS = os.path.join(os.path.join(current_path, '..'), - 'feature_datasets') - - -class TransformPrimitive(PrimitiveBase): - """Feature for entity that is a based off one or more other features - in that entity.""" - rolling_function = False - - def __init__(self, *base_features): - # Any edits made to this method should also be made to the - # new_class_init method in make_trans_primitive - self.base_features = [self._check_feature(f) for f in base_features] - if any(bf.expanding for bf in self.base_features): - self.expanding = True - assert len(set([f.entity for f in self.base_features])) == 1, \ - "More than one entity for base features" - super(TransformPrimitive, self).__init__(self.base_features[0].entity, - self.base_features) - - def generate_name(self): - name = u"{}(".format(self.name.upper()) - name += u", ".join(f.get_name() for f in self.base_features) - name += u")" - return name - - @property - def default_value(self): - return self.base_features[0].default_value - - -def make_trans_primitive(function, input_types, return_type, name=None, - description='A custom transform primitive', - cls_attributes=None, uses_calc_time=False, - commutative=False): - '''Returns a new transform primitive class - - Args: - function (function): Function that takes in an array and applies some - transformation to it, returning an array. - - input_types (list[Variable]): Variable types of the inputs. - - return_type (Variable): Variable type of return. - - name (str): Name of the primitive. If no name is provided, the name - of `function` will be used. - - description (str): Description of primitive. - - cls_attributes (dict[str -> anytype]): Custom attributes to be added to - class. Key is attribute name, value is the attribute value. - - uses_calc_time (bool): If True, the cutoff time the feature is being - calculated at will be passed to the function as the keyword - argument 'time'. - - commutative (bool): If True, will only make one feature per unique set - of base features. - - Example: - .. ipython :: python - - from featuretools.primitives import make_trans_primitive - from featuretools.variable_types import Variable, Boolean - - def pd_is_in(array, list_of_outputs=None): - if list_of_outputs is None: - list_of_outputs = [] - return pd.Series(array).isin(list_of_outputs) - - def isin_generate_name(self): - return u"%s.isin(%s)" % (self.base_features[0].get_name(), - str(self.kwargs['list_of_outputs'])) - - IsIn = make_trans_primitive( - function=pd_is_in, - input_types=[Variable], - return_type=Boolean, - name="is_in", - description="For each value of the base feature, checks " - "whether it is in a list that provided.", - cls_attributes={"generate_name": isin_generate_name}) - ''' - # dictionary that holds attributes for class - cls = {"__doc__": description} - if cls_attributes is not None: - cls.update(cls_attributes) - - # creates the new class and set name and types - name = name or function.__name__ - new_class = type(name, (TransformPrimitive,), cls) - new_class.name = name - new_class.input_types = input_types - new_class.return_type = return_type - new_class.commutative = commutative - new_class, default_kwargs = inspect_function_args(new_class, - function, - uses_calc_time) - - if len(default_kwargs) > 0: - new_class.default_kwargs = default_kwargs - - def new_class_init(self, *args, **kwargs): - self.kwargs = copy.deepcopy(self.default_kwargs) - self.base_features = [self._check_feature(f) for f in args] - if any(bf.expanding for bf in self.base_features): - self.expanding = True - assert len(set([f.entity for f in self.base_features])) == 1, \ - "More than one entity for base features" - self.kwargs.update(kwargs) - self.partial = functools.partial(function, **self.kwargs) - self.partial.__name__ = name - - super(TransformPrimitive, self).__init__( - self.base_features[0].entity, self.base_features) - new_class.__init__ = new_class_init - new_class.get_function = lambda self: self.partial - else: - # creates a lambda function that returns function every time - new_class.get_function = lambda self, f=function: f - - return new_class - class IsNull(TransformPrimitive): """For each value of base feature, return 'True' if value is null.""" @@ -383,27 +256,7 @@ def word_counter(array): # return pd_like -# class TimeSince(TransformPrimitive): -# """ -# For each value of the base feature, compute the timedelta between it and -# a datetime -# """ -# name = "time_since" -# input_types = [[DatetimeTimeIndex], [Datetime]] -# return_type = Timedelta -# uses_calc_time = True - -# def get_function(self): -# def pd_time_since(array, time): -# if time is None: -# time = datetime.now() -# return (time - pd.DatetimeIndex(array)).values -# return pd_time_since - - def pd_time_since(array, time): - if time is None: - time = datetime.now() return (time - pd.DatetimeIndex(array)).values @@ -426,8 +279,6 @@ class DaysSince(TransformPrimitive): def get_function(self): def pd_days_since(array, time): - if time is None: - time = datetime.now() return pd_time_unit('days')(time - pd.DatetimeIndex(array)) return pd_days_since diff --git a/featuretools/primitives/standard/utils.py b/featuretools/primitives/standard/utils.py new file mode 100644 index 0000000000..b0ae2128b8 --- /dev/null +++ b/featuretools/primitives/standard/utils.py @@ -0,0 +1,55 @@ +import pandas as pd + +from ..base.primitive_base import PrimitiveBase + +from featuretools.utils import is_string + + +def apply_dual_op_from_feat(f, array_1, array_2=None): + left = f.left + right = f.right + left_array = array_1 + if array_2 is not None: + right_array = array_2 + else: + right_array = array_1 + to_op = None + other = None + if isinstance(left, PrimitiveBase): + left = pd.Series(left_array) + other = right + to_op = left + op = f._get_op() + if isinstance(right, PrimitiveBase): + right = pd.Series(right_array) + other = right + if to_op is None: + other = left + to_op = right + op = f._get_rop() + to_op, other = ensure_compatible_dtype(to_op, other) + op = getattr(to_op, op) + + assert op is not None, \ + "Need at least one feature for dual op, found 2 scalars" + return op(other) + + +def ensure_compatible_dtype(left, right): + # Pandas converts dtype to float + # if all nans. If the actual values are + # strings/objects though, future features + # that depend on these values may error + # unless we explicitly set the dtype to object + if isinstance(left, pd.Series) and isinstance(right, pd.Series): + if left.dtype != object and right.dtype == object: + left = left.astype(object) + elif right.dtype != object and left.dtype == object: + right = right.astype(object) + elif isinstance(left, pd.Series): + if left.dtype != object and is_string(right): + left = left.astype(object) + elif isinstance(right, pd.Series): + if right.dtype != object and is_string(left): + right = right.astype(object) + return left, right diff --git a/featuretools/primitives/utils.py b/featuretools/primitives/utils.py index 84e8b47ef5..450da624a7 100644 --- a/featuretools/primitives/utils.py +++ b/featuretools/primitives/utils.py @@ -2,47 +2,7 @@ import pandas as pd -from .primitive_base import PrimitiveBase - -import featuretools.primitives -from featuretools.utils import is_string - -try: - # python 3.7 deprecated getargspec - from inspect import getfullargspec as getargspec -except ImportError: - # python 2.7 - 3.6 backwards compatibility import - from inspect import getargspec - - -def apply_dual_op_from_feat(f, array_1, array_2=None): - left = f.left - right = f.right - left_array = array_1 - if array_2 is not None: - right_array = array_2 - else: - right_array = array_1 - to_op = None - other = None - if isinstance(left, PrimitiveBase): - left = pd.Series(left_array) - other = right - to_op = left - op = f._get_op() - if isinstance(right, PrimitiveBase): - right = pd.Series(right_array) - other = right - if to_op is None: - other = left - to_op = right - op = f._get_rop() - to_op, other = ensure_compatible_dtype(to_op, other) - op = getattr(to_op, op) - - assert op is not None, \ - "Need at least one feature for dual op, found 2 scalars" - return op(other) +import featuretools def get_aggregation_primitives(): @@ -80,42 +40,3 @@ def list_primitives(): agg_df['type'] = 'aggregation' return pd.concat([agg_df, transform_df], ignore_index=True)[['name', 'type', 'description']] - - -def ensure_compatible_dtype(left, right): - # Pandas converts dtype to float - # if all nans. If the actual values are - # strings/objects though, future features - # that depend on these values may error - # unless we explicitly set the dtype to object - if isinstance(left, pd.Series) and isinstance(right, pd.Series): - if left.dtype != object and right.dtype == object: - left = left.astype(object) - elif right.dtype != object and left.dtype == object: - right = right.astype(object) - elif isinstance(left, pd.Series): - if left.dtype != object and is_string(right): - left = left.astype(object) - elif isinstance(right, pd.Series): - if right.dtype != object and is_string(left): - right = right.astype(object) - return left, right - - -def inspect_function_args(new_class, function, uses_calc_time): - # inspect function to see if there are keyword arguments - argspec = getargspec(function) - kwargs = {} - if argspec.defaults is not None: - lowest_kwargs_position = len(argspec.args) - len(argspec.defaults) - - for i, arg in enumerate(argspec.args): - if arg == 'time': - if not uses_calc_time: - raise ValueError("'time' is a restricted keyword. Please" - " use a different keyword.") - else: - new_class.uses_calc_time = True - if argspec.defaults is not None and i >= lowest_kwargs_position: - kwargs[arg] = argspec.defaults[i - lowest_kwargs_position] - return new_class, kwargs diff --git a/featuretools/synthesis/deep_feature_synthesis.py b/featuretools/synthesis/deep_feature_synthesis.py index 9a41c9fd60..3e3f17f1e6 100644 --- a/featuretools/synthesis/deep_feature_synthesis.py +++ b/featuretools/synthesis/deep_feature_synthesis.py @@ -5,15 +5,14 @@ import featuretools.primitives.api as ftypes from featuretools import variable_types from featuretools.primitives.api import ( - AggregationPrimitive, BinaryFeature, Compare, - DirectFeature, Discrete, Equals, IdentityFeature, TimeSince ) +from featuretools.primitives.base import AggregationPrimitive, DirectFeature from featuretools.utils import is_string from featuretools.variable_types import Boolean, Categorical, Numeric, Ordinal @@ -28,7 +27,7 @@ class DeepFeatureSynthesis(object): entityset (EntitySet): Entityset for which to build features. - agg_primitives (list[str or :class:`.primitives.AggregationPrimitive`], optional): + agg_primitives (list[str or :class:`.primitives.`], optional): list of Aggregation Feature types to apply. Default: ["sum", "std", "max", "skew", "min", "mean", "count", "percent_true", "num_unique", "mode"] diff --git a/featuretools/tests/computational_backend/test_calculate_feature_matrix.py b/featuretools/tests/computational_backend/test_calculate_feature_matrix.py index b39c82113c..7f62acabd9 100644 --- a/featuretools/tests/computational_backend/test_calculate_feature_matrix.py +++ b/featuretools/tests/computational_backend/test_calculate_feature_matrix.py @@ -26,15 +26,11 @@ get_next_chunk, n_jobs_to_workers ) -from featuretools.primitives import ( +from featuretools.primitives import Count, Max, Min, Percentile, Sum +from featuretools.primitives.base import ( AggregationPrimitive, - Count, DirectFeature, - IdentityFeature, - Max, - Min, - Percentile, - Sum + IdentityFeature ) diff --git a/featuretools/tests/computational_backend/test_pandas_backend.py b/featuretools/tests/computational_backend/test_pandas_backend.py index ecb4a4c344..cf8bdff0c0 100644 --- a/featuretools/tests/computational_backend/test_pandas_backend.py +++ b/featuretools/tests/computational_backend/test_pandas_backend.py @@ -15,7 +15,6 @@ from featuretools.primitives import ( And, Count, - DirectFeature, Equals, GreaterThan, GreaterThanEqualTo, @@ -30,6 +29,7 @@ Sum, Trend ) +from featuretools.primitives.base import DirectFeature @pytest.fixture(scope='module') diff --git a/featuretools/tests/dfs_tests/test_deep_feature_synthesis.py b/featuretools/tests/dfs_tests/test_deep_feature_synthesis.py index 40a6513183..727ce747d7 100644 --- a/featuretools/tests/dfs_tests/test_deep_feature_synthesis.py +++ b/featuretools/tests/dfs_tests/test_deep_feature_synthesis.py @@ -14,19 +14,21 @@ from featuretools.primitives import ( Absolute, Add, - AggregationPrimitive, Count, CumMean, Diff, - DirectFeature, - Feature, Hour, - IdentityFeature, Last, Mean, Mode, Sum, - TimeSincePrevious, + TimeSincePrevious +) +from featuretools.primitives.base import ( + AggregationPrimitive, + DirectFeature, + Feature, + IdentityFeature, TransformPrimitive, make_agg_primitive ) diff --git a/featuretools/tests/primitive_tests/bad_primitive_files/__init__.py b/featuretools/tests/primitive_tests/bad_primitive_files/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/featuretools/tests/primitive_tests/bad_primitive_files/multiple_primitives.py b/featuretools/tests/primitive_tests/bad_primitive_files/multiple_primitives.py new file mode 100644 index 0000000000..d815b547c1 --- /dev/null +++ b/featuretools/tests/primitive_tests/bad_primitive_files/multiple_primitives.py @@ -0,0 +1,12 @@ +from featuretools.primitives import make_agg_primitive +from featuretools.variable_types import Numeric + +CustomMax = make_agg_primitive(lambda x: max(x), + name="CustomMax", + input_types=[Numeric], + return_type=Numeric) + +CustomSum = make_agg_primitive(lambda x: sum(x), + name="CustomSum", + input_types=[Numeric], + return_type=Numeric) diff --git a/featuretools/tests/primitive_tests/bad_primitive_files/no_primitives.py b/featuretools/tests/primitive_tests/bad_primitive_files/no_primitives.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/featuretools/tests/primitive_tests/primitives_to_install.tar.gz b/featuretools/tests/primitive_tests/primitives_to_install.tar.gz new file mode 100644 index 0000000000..efed9137ab Binary files /dev/null and b/featuretools/tests/primitive_tests/primitives_to_install.tar.gz differ diff --git a/featuretools/tests/primitive_tests/primitives_to_install/__init__.py b/featuretools/tests/primitive_tests/primitives_to_install/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/featuretools/tests/primitive_tests/primitives_to_install/custom_max.py b/featuretools/tests/primitive_tests/primitives_to_install/custom_max.py new file mode 100644 index 0000000000..f1dfbbb241 --- /dev/null +++ b/featuretools/tests/primitive_tests/primitives_to_install/custom_max.py @@ -0,0 +1,7 @@ +from featuretools.primitives.base import make_agg_primitive +from featuretools.variable_types import Numeric + +CustomMax = make_agg_primitive(lambda x: max(x), + name="CustomMax", + input_types=[Numeric], + return_type=Numeric) diff --git a/featuretools/tests/primitive_tests/primitives_to_install/custom_mean.py b/featuretools/tests/primitive_tests/primitives_to_install/custom_mean.py new file mode 100644 index 0000000000..08229bc948 --- /dev/null +++ b/featuretools/tests/primitive_tests/primitives_to_install/custom_mean.py @@ -0,0 +1,7 @@ +from featuretools.primitives.base import make_agg_primitive +from featuretools.variable_types import Numeric + +CustomMean = make_agg_primitive(lambda x: sum(x) / len(x), + name="CustomMean", + input_types=[Numeric], + return_type=Numeric) diff --git a/featuretools/tests/primitive_tests/primitives_to_install/custom_sum.py b/featuretools/tests/primitive_tests/primitives_to_install/custom_sum.py new file mode 100644 index 0000000000..144101976f --- /dev/null +++ b/featuretools/tests/primitive_tests/primitives_to_install/custom_sum.py @@ -0,0 +1,7 @@ +from featuretools.primitives.base import make_agg_primitive +from featuretools.variable_types import Numeric + +CustomSum = make_agg_primitive(lambda x: sum(x), + name="CustomSum", + input_types=[Numeric], + return_type=Numeric) diff --git a/featuretools/tests/primitive_tests/test_agg_feats.py b/featuretools/tests/primitive_tests/test_agg_feats.py index 5c33c536bc..2f869e9470 100644 --- a/featuretools/tests/primitive_tests/test_agg_feats.py +++ b/featuretools/tests/primitive_tests/test_agg_feats.py @@ -10,16 +10,18 @@ import featuretools as ft from featuretools.primitives import ( - AggregationPrimitive, Count, - Feature, Mean, Median, NMostCommon, NumTrue, Sum, TimeSinceLast, - get_aggregation_primitives, + get_aggregation_primitives +) +from featuretools.primitives.base import ( + AggregationPrimitive, + Feature, make_agg_primitive ) from featuretools.synthesis.deep_feature_synthesis import ( diff --git a/featuretools/tests/primitive_tests/test_direct_features.py b/featuretools/tests/primitive_tests/test_direct_features.py index 6f448bd8de..100e53dc27 100644 --- a/featuretools/tests/primitive_tests/test_direct_features.py +++ b/featuretools/tests/primitive_tests/test_direct_features.py @@ -3,7 +3,7 @@ from ..testing_utils import make_ecommerce_entityset from featuretools.computational_backends import PandasBackend -from featuretools.primitives import DirectFeature +from featuretools.primitives.base import DirectFeature @pytest.fixture(scope='module') diff --git a/featuretools/tests/primitive_tests/test_install_primitives.py b/featuretools/tests/primitive_tests/test_install_primitives.py new file mode 100644 index 0000000000..0e87c78e02 --- /dev/null +++ b/featuretools/tests/primitive_tests/test_install_primitives.py @@ -0,0 +1,110 @@ +import os +import subprocess + +import pytest + +import featuretools +from featuretools.primitives.base import PrimitiveBase +from featuretools.primitives.install import ( + extract_archive, + get_installation_dir, + list_primitive_files, + load_primitive_from_file +) + +try: + from builtins import reload +except Exception: + from importlib import reload + + +@pytest.fixture(scope='module') +def this_dir(): + return os.path.dirname(os.path.abspath(__file__)) + + +@pytest.fixture(scope='module') +def primitives_to_install_dir(this_dir): + return os.path.join(this_dir, "primitives_to_install") + + +@pytest.fixture(scope='module') +def bad_primitives_files_dir(this_dir): + return os.path.join(this_dir, "bad_primitive_files") + + +@pytest.mark.parametrize("install_path", [ + primitives_to_install_dir(this_dir()), + os.path.join(this_dir(), "primitives_to_install.tar.gz"), + "s3://featuretools-static/primitives_to_install.tar.gz", + "https://s3.amazonaws.com/featuretools-static/primitives_to_install.tar.gz", + "INSTALL_VIA_CLI", + "INSTALL_VIA_MODULE", +]) +def test_install_primitives(install_path): + installation_dir = get_installation_dir() + custom_max_file = os.path.join(installation_dir, "custom_max.py") + custom_mean_file = os.path.join(installation_dir, "custom_mean.py") + custom_sum_file = os.path.join(installation_dir, "custom_sum.py") + + # make sure primitive files aren't there e.g from a failed run + for p in [custom_max_file, custom_mean_file, custom_sum_file]: + try: + os.unlink(p) + except Exception: + pass + + # handle install via command line as a special case + if install_path == "INSTALL_VIA_CLI": + subprocess.check_output(['featuretools', 'install', '--no-prompt', primitives_to_install_dir(this_dir())]) + elif install_path == "INSTALL_VIA_MODULE": + subprocess.check_output(['python', '-m', 'featuretools', 'install', '--no-prompt', primitives_to_install_dir(this_dir())]) + else: + featuretools.primitives.install.install_primitives(install_path, prompt=False) + + # must reload submodule for it to work + reload(featuretools.primitives.installed) + from featuretools.primitives.installed import CustomMax, CustomSum, CustomMean # noqa: F401 + + files = list_primitive_files(installation_dir) + assert set(files) == {custom_max_file, custom_mean_file, custom_sum_file} + + files = list_primitive_files(installation_dir) + # then delete to clean up + for f in files: + os.unlink(f) + + +def test_list_primitive_files(primitives_to_install_dir): + files = list_primitive_files(primitives_to_install_dir) + custom_max_file = os.path.join(primitives_to_install_dir, "custom_max.py") + custom_mean_file = os.path.join(primitives_to_install_dir, "custom_mean.py") + custom_sum_file = os.path.join(primitives_to_install_dir, "custom_sum.py") + assert set(files) == {custom_max_file, custom_mean_file, custom_sum_file} + + +def test_load_primitive_from_file(primitives_to_install_dir): + primitve_file = os.path.join(primitives_to_install_dir, "custom_max.py") + primitive_name, primitive_obj = load_primitive_from_file(primitve_file) + assert issubclass(primitive_obj, PrimitiveBase) + + +def test_errors_more_than_one_primitive_in_file(bad_primitives_files_dir): + primitive_file = os.path.join(bad_primitives_files_dir, "multiple_primitives.py") + error_text = 'More than one primitive defined in file %s' % primitive_file + with pytest.raises(RuntimeError, match=error_text): + load_primitive_from_file(primitive_file) + + +def test_errors_no_primitive_in_file(bad_primitives_files_dir): + primitive_file = os.path.join(bad_primitives_files_dir, "no_primitives.py") + error_text = 'No primitive defined in file %s' % primitive_file + with pytest.raises(RuntimeError, match=error_text): + load_primitive_from_file(primitive_file) + + +def test_extract_non_archive_errors(bad_primitives_files_dir): + primitive_file = os.path.join(bad_primitives_files_dir, "no_primitives.py") + error_text = "Cannot extract archive from %s. Must provide archive ending in .tar or .tar.gz" % primitive_file + with pytest.raises(RuntimeError, match=error_text): + extract_archive(primitive_file) diff --git a/featuretools/tests/primitive_tests/test_primitive_base.py b/featuretools/tests/primitive_tests/test_primitive_base.py index c2168b4c6e..a834f4fd84 100644 --- a/featuretools/tests/primitive_tests/test_primitive_base.py +++ b/featuretools/tests/primitive_tests/test_primitive_base.py @@ -3,7 +3,8 @@ from ..testing_utils import make_ecommerce_entityset -from featuretools.primitives import Feature, IdentityFeature, Last, Mode, Sum +from featuretools.primitives import Last, Mode, Sum +from featuretools.primitives.base import Feature, IdentityFeature from featuretools.variable_types import Categorical, Datetime, Id, Numeric diff --git a/featuretools/tests/primitive_tests/test_transform_features.py b/featuretools/tests/primitive_tests/test_transform_features.py index 7e07a422aa..4bf7a21c23 100644 --- a/featuretools/tests/primitive_tests/test_transform_features.py +++ b/featuretools/tests/primitive_tests/test_transform_features.py @@ -19,10 +19,8 @@ CumSum, Day, Diff, - DirectFeature, Divide, Equals, - Feature, GreaterThan, GreaterThanEqualTo, Haversine, @@ -45,7 +43,11 @@ Percentile, Subtract, Sum, - get_transform_primitives, + get_transform_primitives +) +from featuretools.primitives.base import ( + DirectFeature, + Feature, make_trans_primitive ) from featuretools.synthesis.deep_feature_synthesis import match diff --git a/requirements.txt b/requirements.txt index 613c070f19..7e2137323e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ +boto3>=1.9.51 +botocore>=1.12.51 numpy>=1.13.3 pandas>=0.23.0 -s3fs>=0.1.2 tqdm>=4.19.2 toolz>=0.8.2 pyyaml>=3.12 @@ -9,3 +10,6 @@ future>=0.16.0 dask>=0.19.4 distributed>=1.24.2 psutil>=5.4.8 +click>=7.0.0 +smart_open>=1.7.1 +s3fs>=0.1.5 diff --git a/setup.py b/setup.py index 78d5ab1916..d9df52acca 100644 --- a/setup.py +++ b/setup.py @@ -44,6 +44,11 @@ def finalize_options(self): tests_require=open('test-requirements.txt').readlines(), keywords='feature engineering data science machine learning', include_package_data=True, + entry_points={ + 'console_scripts': [ + 'featuretools = featuretools.__main__:cli' + ] + }, long_description=long_description, long_description_content_type='text/markdown' ) diff --git a/tox.ini b/tox.ini deleted file mode 100644 index 5e6ab93812..0000000000 --- a/tox.ini +++ /dev/null @@ -1,14 +0,0 @@ -[tox] -skipsdist = {env:TOXBUILD:false} -envlist = clean,py27,py35,py36,py37 - -[testenv] -commands= py.test --cov=featuretools -deps= - -rtest-requirements.txt - -[testenv:clean] -commands= - coverage erase -deps= - coverage