Skip to content

Commit

Permalink
replace latlong during entity creation
Browse files Browse the repository at this point in the history
  • Loading branch information
thehomebrewnerd committed Jun 2, 2020
1 parent 2289df6 commit 5cc780d
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 11 deletions.
8 changes: 7 additions & 1 deletion featuretools/entityset/entity.py
Expand Up @@ -11,7 +11,8 @@
convert_all_variable_data,
convert_variable_data,
get_linked_vars,
infer_variable_types
infer_variable_types,
replace_latlong_nan
)
from featuretools.utils.wrangle import (
_check_time_type,
Expand Down Expand Up @@ -82,6 +83,11 @@ def __init__(self, id, df, entityset, variable_types=None,

self.set_secondary_time_index(secondary_time_index)

# Fill in single `NaN` values in LatLong variables with a tuple
latlongs = [k for k, v in self.variable_types.items() if v == vtypes.LatLong]
for latlong in latlongs:
self.df[latlong] = replace_latlong_nan(self.df[latlong])

def __repr__(self):
repr_out = u"Entity: {}\n".format(self.id)
repr_out += u" Variables:"
Expand Down
14 changes: 5 additions & 9 deletions featuretools/primitives/standard/transform_primitive.py
Expand Up @@ -5,6 +5,7 @@
TransformPrimitive
)
from featuretools.utils import convert_time_units
from featuretools.utils.entity_utils import replace_latlong_nan
from featuretools.variable_types import (
Boolean,
DateOfBirth,
Expand Down Expand Up @@ -511,7 +512,7 @@ class Latitude(TransformPrimitive):
def get_function(self):
def latitude(latlong):
if latlong.hasnans:
latlong = _replace_latlong_nan(latlong)
latlong = replace_latlong_nan(latlong)
return pd.Series(x[0] for x in latlong)
return latitude

Expand All @@ -534,7 +535,7 @@ class Longitude(TransformPrimitive):
def get_function(self):
def longitude(latlong):
if latlong.hasnans:
latlong = _replace_latlong_nan(latlong)
latlong = replace_latlong_nan(latlong)
return pd.Series(x[1] for x in latlong)
return longitude

Expand Down Expand Up @@ -577,9 +578,9 @@ def __init__(self, unit='miles'):
def get_function(self):
def haversine(latlong1, latlong2):
if latlong1.hasnans:
latlong1 = _replace_latlong_nan(latlong1)
latlong1 = replace_latlong_nan(latlong1)
if latlong2.hasnans:
latlong2 = _replace_latlong_nan(latlong2)
latlong2 = replace_latlong_nan(latlong2)
lat_1s = np.array([x[0] for x in latlong1])
lon_1s = np.array([x[1] for x in latlong1])
lat_2s = np.array([x[0] for x in latlong2])
Expand Down Expand Up @@ -635,8 +636,3 @@ def get_function(self):
def age(x, time=None):
return (time - x).dt.days / 365
return age


def _replace_latlong_nan(values):
"""replace a single `NaN` value with a tuple: `(np.nan, np.nan)`"""
return np.where(values.isnull(), pd.Series([(np.nan, np.nan)] * len(values)), values)
8 changes: 8 additions & 0 deletions featuretools/tests/entityset_tests/test_entity.py
Expand Up @@ -223,3 +223,11 @@ def test_passing_strings_to_variable_types_dfs():
features = ft.dfs(entities, relationships, target_entity="teams", features_only=True)
name_class = features[0].entity['name'].__class__
assert name_class == variable_types['text']


def test_replace_latlong_nan_during_entity_creation(es):
nan_es = ft.EntitySet("latlong_nan")
df = es['log'].df.copy()
df['latlong'][0] = np.nan
entity = ft.Entity(id="nan_latlong_entity", df=df, entityset=nan_es, variable_types=es['log'].variable_types)
assert entity.df['latlong'][0] == (np.nan, np.nan)
7 changes: 7 additions & 0 deletions featuretools/tests/primitive_tests/test_transform_features.py
Expand Up @@ -622,6 +622,13 @@ def test_haversine_with_nan(es):
assert np.allclose(values, real, atol=0.0001, equal_nan=True)


def test_hav(es):
df = es['log'].df
df['latlong'] = np.nan
es['log'].update_data(df)
fm, features = ft.dfs(entityset=es, target_entity='sessions', trans_primitives=['latitude', 'longitude', 'haversine'], agg_primitives=['last', 'first'])


def test_text_primitives(es):
words = ft.Feature(es['log']['comments'], primitive=NumWords)
chars = ft.Feature(es['log']['comments'], primitive=NumCharacters)
Expand Down
12 changes: 11 additions & 1 deletion featuretools/tests/utils_tests/test_entity_utils.py
@@ -1,3 +1,4 @@
import numpy as np
import pandas as pd

import featuretools as ft
Expand All @@ -6,7 +7,8 @@
convert_all_variable_data,
convert_variable_data,
get_linked_vars,
infer_variable_types
infer_variable_types,
replace_latlong_nan
)


Expand Down Expand Up @@ -163,3 +165,11 @@ def test_get_linked_vars():

customers_linked_vars = get_linked_vars(es['customers'])
assert customers_linked_vars == ['customer_id']


def test_replace_latlong_nan():
values = pd.Series([(np.nan, np.nan), np.nan, (10, 5)])
result = replace_latlong_nan(values)
assert result[0] == values[0]
assert result[1] == (np.nan, np.nan)
assert result[2] == values[2]
5 changes: 5 additions & 0 deletions featuretools/utils/entity_utils.py
Expand Up @@ -191,3 +191,8 @@ def col_is_datetime(col):
return True

return False


def replace_latlong_nan(values):
"""replace a single `NaN` value with a tuple: `(np.nan, np.nan)`"""
return np.where(values.isnull(), pd.Series([(np.nan, np.nan)] * len(values)), values)

0 comments on commit 5cc780d

Please sign in to comment.