Skip to content

Commit

Permalink
Revert changes to transfomr prim components for testing
Browse files Browse the repository at this point in the history
  • Loading branch information
Tamar Grey committed Feb 28, 2023
1 parent 1a364c8 commit 57dda43
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,17 @@ def transform(self, X, y=None):
es = self._make_entity_set(X_ww)
features = ft.calculate_feature_matrix(features=self._features, entityset=es)

# Convert to object dtype so that pd.NA is converted to np.nan
# until sklearn imputer can handle pd.NA in release 1.1
# FT returns these as string types, currently there isn't much difference
# in terms of performance between object and string
# see https://pandas.pydata.org/docs/user_guide/text.html#text-data-types
# "Currently, the performance of object dtype arrays of strings
# "and arrays.StringArray are about the same."
features = features.astype(object, copy=False)
features.index = X_ww.index
features.ww.init(logical_types={col_: "categorical" for col_ in features})

X_ww = X_ww.ww.drop(self._columns)
X_ww = ww.concat_columns([X_ww, features])

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,9 @@ def make_answer_email_fit_transform(df_with_url_and_email):
["gmail.com", "yahoo.com", "abalone.com", "hotmail.com", "email.org"],
dtype="category",
)
expected.ww["IS_FREE_EMAIL_DOMAIN(email)"] = ww.init_series(
pd.Series(
[True, True, False, True, True],
),
logical_type="BooleanNullable",
expected.ww["IS_FREE_EMAIL_DOMAIN(email)"] = pd.Series(
[True, True, False, True, True],
dtype="category",
)
expected.ww.drop(["email"], inplace=True)
return expected
Expand Down Expand Up @@ -95,11 +93,11 @@ def make_answer_email_fit_transform_missing_values(df_with_url_and_email):
)
expected.ww["IS_FREE_EMAIL_DOMAIN(email)"] = pd.Series(
[None, None, False, True, True],
dtype="boolean",
dtype="category",
)
expected.ww["IS_FREE_EMAIL_DOMAIN(email_2)"] = pd.Series(
[None, None, False, True, None],
dtype="boolean",
dtype="category",
)
return expected

Expand Down Expand Up @@ -143,7 +141,7 @@ def make_expected_logical_types_email_fit_transform():
return {
"categorical": ww.logical_types.Categorical(),
"numeric": ww.logical_types.Double(),
"IS_FREE_EMAIL_DOMAIN(email)": ww.logical_types.BooleanNullable(),
"IS_FREE_EMAIL_DOMAIN(email)": ww.logical_types.Categorical(),
"EMAIL_ADDRESS_TO_DOMAIN(email)": ww.logical_types.Categorical(),
"integer": ww.logical_types.Integer(),
"boolean": ww.logical_types.Boolean(),
Expand Down Expand Up @@ -172,8 +170,8 @@ def make_expected_logical_types_email_fit_transform_missing_values():
"numeric": ww.logical_types.Double(),
"EMAIL_ADDRESS_TO_DOMAIN(email)": ww.logical_types.Categorical(),
"EMAIL_ADDRESS_TO_DOMAIN(email_2)": ww.logical_types.Categorical(),
"IS_FREE_EMAIL_DOMAIN(email)": ww.logical_types.BooleanNullable(),
"IS_FREE_EMAIL_DOMAIN(email_2)": ww.logical_types.BooleanNullable(),
"IS_FREE_EMAIL_DOMAIN(email)": ww.logical_types.Categorical(),
"IS_FREE_EMAIL_DOMAIN(email_2)": ww.logical_types.Categorical(),
"integer": ww.logical_types.Integer(),
"boolean": ww.logical_types.Boolean(),
"nat_lang": ww.logical_types.NaturalLanguage(),
Expand Down

0 comments on commit 57dda43

Please sign in to comment.