Skip to content

Commit

Permalink
Use Featuretools Primitives in DateTimeFeaturizer (#3081)
Browse files Browse the repository at this point in the history
* Use FT Primitives

* RL

* Fix encode as categories logic

* Remove pd row
  • Loading branch information
jeremyliweishih committed Nov 19, 2021
1 parent 26d38a4 commit 72b8048
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 28 deletions.
1 change: 1 addition & 0 deletions docs/source/release_notes.rst
Expand Up @@ -13,6 +13,7 @@ Release Notes
* Fixed bug where components with tuned integer hyperparameters could not get converted to JSON format :pr:`3049`
* Changes
* Delete ``predict_uses_y`` estimator attribute :pr:`3069`
* Change ``DateTimeFeaturizer`` to use corresponding Featuretools primitives :pr:`3081`
* Documentation Changes
* Updated docs to use data check action methods rather than manually cleaning data :pr:`3050`
* Testing Changes
Expand Down
@@ -1,40 +1,40 @@
"""Transformer that can automatically extract features from datetime columns."""
import numpy as np
import pandas as pd
import woodwork as ww
from featuretools.primitives import Hour, Month, Weekday, Year

from evalml.pipelines.components.transformers import Transformer
from evalml.utils import infer_feature_types


def _extract_year(col, encode_as_categories=False):
return col.dt.year, None


_month_to_int_mapping = {
"January": 0,
"February": 1,
"March": 2,
"April": 3,
"May": 4,
"June": 5,
"July": 6,
"August": 7,
"September": 8,
"October": 9,
"November": 10,
"December": 11,
return Year()(col), None


_int_to_month_mapping = {
0: "January",
1: "February",
2: "March",
3: "April",
4: "May",
5: "June",
6: "July",
7: "August",
8: "September",
9: "October",
10: "November",
11: "December",
}


def _extract_month(col, encode_as_categories=False):
months = col.dt.month_name()
months_unique = months.unique()
months_encoded = months.map(lambda m: _month_to_int_mapping.get(m, np.nan))
month = Month()
months = month(col) - 1
months_unique = pd.Series(months.unique())
if encode_as_categories:
months_encoded = ww.init_series(months_encoded, logical_type="Categorical")
return months_encoded, {
m: _month_to_int_mapping.get(m, np.nan) for m in months_unique
}
months = ww.init_series(months, logical_type="Categorical")
return months, {_int_to_month_mapping.get(m, np.nan): m for m in months_unique}


_day_to_int_mapping = {
Expand All @@ -48,17 +48,29 @@ def _extract_month(col, encode_as_categories=False):
}


_int_to_day_mapping = {
0: "Sunday",
1: "Monday",
2: "Tuesday",
3: "Wednesday",
4: "Thursday",
5: "Friday",
6: "Saturday",
}


def _extract_day_of_week(col, encode_as_categories=False):
days = col.dt.day_name()
wd = Weekday()
days = wd(col) + 1
days = days.replace(7, 0)
days_unique = days.unique()
days_encoded = days.map(lambda d: _day_to_int_mapping.get(d, np.nan))
if encode_as_categories:
days_encoded = ww.init_series(days_encoded, logical_type="Categorical")
return days_encoded, {d: _day_to_int_mapping.get(d, np.nan) for d in days_unique}
days = ww.init_series(days, logical_type="Categorical")
return days, {_int_to_day_mapping.get(d, np.nan): d for d in days_unique}


def _extract_hour(col, encode_as_categories=False):
return col.dt.hour, None
return Hour()(col), None


class DateTimeFeaturizer(Transformer):
Expand Down

0 comments on commit 72b8048

Please sign in to comment.