Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean up unused methods #293

Merged
merged 9 commits into from Oct 26, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -25,7 +25,10 @@
)

from featuretools.primitives import AggregationPrimitive, PrimitiveBase
from featuretools.utils.gen_utils import make_tqdm_iterator
from featuretools.utils.gen_utils import (
get_relationship_variable_id,
make_tqdm_iterator
)
from featuretools.utils.wrangle import _check_time_type
from featuretools.variable_types import DatetimeTimeIndex, NumericTimeIndex

Expand Down Expand Up @@ -435,7 +438,8 @@ def approximate_features(features, cutoff_time, window, entityset, backend,
cutoffs_with_approx_e_ids[target_instance_colname])

if frames is not None:
rvar = entityset.gen_relationship_var(target_entity.id, approx_entity_id)
path = entityset.find_path(approx_entity_id, target_entity.id)
rvar = get_relationship_variable_id(path)
parent_instance_frame = frames[approx_entity_id][target_entity.id]
cutoffs_with_approx_e_ids[rvar] = \
cutoffs_with_approx_e_ids.merge(parent_instance_frame[[rvar]],
Expand Down
8 changes: 5 additions & 3 deletions featuretools/computational_backends/pandas_backend.py
Expand Up @@ -15,15 +15,17 @@
from .feature_tree import FeatureTree

from featuretools import variable_types
from featuretools.entityset.relationship import Relationship
from featuretools.exceptions import UnknownFeature
from featuretools.primitives import (
AggregationPrimitive,
DirectFeature,
IdentityFeature,
TransformPrimitive
)
from featuretools.utils.gen_utils import make_tqdm_iterator
from featuretools.utils.gen_utils import (
get_relationship_variable_id,
make_tqdm_iterator
)

standard_library.install_aliases()
warnings.simplefilter('ignore', np.RankWarning)
Expand Down Expand Up @@ -389,7 +391,7 @@ def _calculate_agg_features(self, features, entity_frames):
relationship_path = self.entityset.find_backward_path(entity.id,
child_entity.id)

groupby_var = Relationship._get_link_variable_name(relationship_path)
groupby_var = get_relationship_variable_id(relationship_path)

# if the use_previous property exists on this feature, include only the
# instances from the child entity included in that Timedelta
Expand Down
14 changes: 0 additions & 14 deletions featuretools/entityset/entity.py
Expand Up @@ -303,20 +303,6 @@ def convert_variable_data(self, column_id, new_type, **kwargs):
raise Exception("Cannot convert column %s to %s" %
(column_id, new_type))

def is_child_of(self, entity_id):
'''
Returns True if self is a child of entity_id
'''
rels = self.entityset.get_backward_relationships(entity_id)
return self.id in [r.child_entity.id for r in rels]

def is_parent_of(self, entity_id):
'''
Returns True if self is a parent of entity_id
'''
rels = self.entityset.get_backward_relationships(self.id)
return entity_id in [r.child_entity.id for r in rels]

def query_by_values(self, instance_vals, variable_id=None, columns=None,
time_last=None, training_window=None):
"""Query instances that have variable with given value
Expand Down
48 changes: 9 additions & 39 deletions featuretools/entityset/entityset.py
Expand Up @@ -621,22 +621,6 @@ def get_backward_relationships(self, entity_id):
"""
return [r for r in self.relationships if r.parent_entity.id == entity_id]

def get_relationship(self, eid_1, eid_2):
"""Get relationship, if any, between eid_1 and eid_2

Args:
eid_1 (str): Id of first entity to get relationships for.
eid_2 (str): Id of second entity to get relationships for.

Returns:
:class:`.Relationship`: Relationship or None
"""
for r in self.relationships:
if r.child_entity.id == eid_1 and r.parent_entity.id == eid_2 or \
r.parent_entity.id == eid_1 and r.child_entity.id == eid_2:
return r
return None

def _is_backward_relationship(self, rel, prev_ent):
if prev_ent == rel.parent_entity.id:
return True
Expand Down Expand Up @@ -1082,15 +1066,12 @@ def related_instances(self, start_entity_id, final_entity_id,
pd.DataFrame : Dataframe of related instances on the final_entity_id
"""
# Load the filtered dataframe for the first entity
training_window_is_dict = isinstance(training_window, dict)
window = training_window
start_estore = self.entity_dict[start_entity_id]
# This check might be brittle
if instance_ids is not None and not hasattr(instance_ids, '__iter__'):
instance_ids = [instance_ids]

if training_window_is_dict:
window = training_window.get(start_estore.id)
df = start_estore.query_by_values(instance_vals=instance_ids,
time_last=time_last,
training_window=window)
Expand All @@ -1108,16 +1089,20 @@ def related_instances(self, start_entity_id, final_entity_id,

# Walk down the path of entities and take related instances at each step
for i, r in enumerate(path):
new_entity_id = r.get_other_entity(prev_entity_id)
rvar_old = r.get_entity_variable(prev_entity_id)
rvar_new = r.get_entity_variable(new_entity_id)
if r.child_entity.id == prev_entity_id:
new_entity_id = r.parent_entity.id
rvar_old = r.child_variable.id
rvar_new = r.parent_variable.id
else:
new_entity_id = r.child_entity.id
rvar_old = r.parent_variable.id
rvar_new = r.child_variable.id

all_ids = df[rvar_old]

# filter the next entity by the values found in the previous
# entity's relationship column
entity_store = self.entity_dict[new_entity_id]
if training_window_is_dict:
window = training_window.get(entity_store.id)
df = entity_store.query_by_values(all_ids,
variable_id=rvar_new,
time_last=time_last,
Expand All @@ -1127,17 +1112,6 @@ def related_instances(self, start_entity_id, final_entity_id,

return df

def gen_relationship_var(self, child_eid, parent_eid):
path = self.find_path(parent_eid, child_eid)
r = path.pop(0)
child_link_name = r.child_variable.id
for r in path:
parent_entity = r.parent_entity
parent_link_name = child_link_name
child_link_name = '%s.%s' % (parent_entity.id,
parent_link_name)
return child_link_name

###########################################################################
# Private methods ######################################################
###########################################################################
Expand Down Expand Up @@ -1332,7 +1306,3 @@ def _load_dummy_entity_data_and_variable_types(cls, metadata):
columns.append(vid)
df = pd.DataFrame({c: [d] for c, d in zip(columns, defaults)}).head(0)
return df, variable_types


def make_index_variable_name(entity_id):
return entity_id + "_id"
34 changes: 0 additions & 34 deletions featuretools/entityset/relationship.py
Expand Up @@ -65,37 +65,3 @@ def parent_variable(self):
def child_variable(self):
"""Instance of variable in child entity"""
return self.child_entity[self._child_variable_id]

def get_entity_variable(self, entity_id):
if self._child_entity_id == entity_id:
return self._child_variable_id
if self._parent_entity_id == entity_id:
return self._parent_variable_id
raise AttributeError("Entity '%s' is not part of relationship" %
entity_id)

def get_other_entity(self, entity_id):
if self._child_entity_id == entity_id:
return self._parent_entity_id
if self._parent_entity_id == entity_id:
return self._child_entity_id
raise AttributeError("Entity '%s' is not part of relationship" %
entity_id)

def get_other_variable(self, variable_id):
if self._child_variable_id == variable_id:
return self._parent_variable_id
if self._parent_variable_id == variable_id:
return self._child_variable_id
raise AttributeError("Variable '%s' is not part of relationship" %
variable_id)

@classmethod
def _get_link_variable_name(cls, path):
r = path[0]
child_link_name = r.child_variable.id
for r in path[1:]:
parent_link_name = child_link_name
child_link_name = '%s.%s' % (r.parent_variable.entity.id,
parent_link_name)
return child_link_name
1 change: 0 additions & 1 deletion featuretools/tests/entityset_tests/test_es.py
Expand Up @@ -844,7 +844,6 @@ def test_normalize_entity_new_time_index(entityset):
make_time_index=True,
new_entity_time_index="value_time")

assert entityset['log'].is_child_of('values')
assert entityset['values'].time_index == 'value_time'
assert 'value_time' in entityset['values'].df.columns
assert len(entityset['values'].df.columns) == 2
Expand Down
30 changes: 10 additions & 20 deletions featuretools/utils/gen_utils.py
@@ -1,5 +1,4 @@
import sys
from builtins import object

from pympler.asizeof import asizeof as getsize # noqa
from tqdm import tqdm
Expand Down Expand Up @@ -31,25 +30,6 @@ def session_type():
return "ipython"


class RedirectStdStreams(object):

def __init__(self, stdout=None, stderr=None):
self._stdout = stdout or sys.stdout
self._stderr = stderr or sys.stderr

def __enter__(self):
self.old_stdout, self.old_stderr = sys.stdout, sys.stderr
self.old_stdout.flush()
self.old_stderr.flush()
sys.stdout, sys.stderr = self._stdout, self._stderr

def __exit__(self, exc_type, exc_value, traceback):
self._stdout.flush()
self._stderr.flush()
sys.stdout = self.old_stdout
sys.stderr = self.old_stderr


def make_tqdm_iterator(**kwargs):
options = {
"file": sys.stdout,
Expand Down Expand Up @@ -88,3 +68,13 @@ def is_string(test_value):
except NameError:
python_string = str
return isinstance(test_value, python_string)


def get_relationship_variable_id(path):
r = path[0]
child_link_name = r.child_variable.id
for r in path[1:]:
parent_link_name = child_link_name
child_link_name = '%s.%s' % (r.parent_entity.id,
parent_link_name)
return child_link_name
39 changes: 0 additions & 39 deletions featuretools/utils/wrangle.py
Expand Up @@ -88,45 +88,6 @@ def _check_timedelta(td, entity_id=None, related_entity_id=None):
return Timedelta(value, unit, entity=entity_id)


def _check_variable_list(variables, entity, ignore_unknown=False):
"""Ensures a list of of values representing variables is
a list of variable instances"""
if len(variables) == 0:
return []

if ignore_unknown:
return [_v for _v in [_check_variable(v, entity, ignore_unknown=ignore_unknown) for v in variables]
if _v]
else:
return [_check_variable(v, entity, ignore_unknown=False) for v in variables]

raise Exception("Couldn't handle list of variables")


def _check_variable(variable, entity, ignore_unknown=False):
"""Ensures a value representing a variable is
a variable instance"""
if not isinstance(variable, variable_types.Variable):
if ignore_unknown and variable not in entity.variables:
return None
else:
return entity[variable]
else:
if ignore_unknown and variable.id not in entity:
return None
else:
return variable


def _check_entity(entity, entityset):
"""Ensures a value representing an entity is
an entity instance"""
from featuretools.entityset.base_entity import BaseEntity
if isinstance(entity, BaseEntity):
return entity
return entityset[entity]


def _check_time_against_column(time, time_column):
'''
Check to make sure that time is compatible with time_column,
Expand Down
8 changes: 0 additions & 8 deletions featuretools/variable_types/variable.py
Expand Up @@ -113,14 +113,6 @@ def __init__(self, id, entity, name=None):
super(Discrete, self).__init__(id, entity, name)
self._interesting_values = []

@property
def percent_unique(self):
if self.nunique is None or self.count is None:
return None
if self.count > 0:
return self.nunique / self.count
return 0

@property
def interesting_values(self):
return self._interesting_values
Expand Down