Skip to content

Commit

Permalink
Implementing Country Code and Sub Region Codes as variable types (#430)
Browse files Browse the repository at this point in the history
* Implementing Country Code and Sub Region Codes as variable types

* linting

* linting

* Update api_reference.rst

* Update mock_ds.py

* Update mock_ds.py

* addressing comments

changed _dtype_repr to include underscore

* lint
  • Loading branch information
jxwolstenholme authored and kmax12 committed Feb 14, 2019
1 parent 3f69e70 commit 1998ec7
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 2 deletions.
2 changes: 2 additions & 0 deletions docs/source/api_reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,8 @@ Variable types
ZIPCode
IPAddress
EmailAddress
CountryCode
SubRegionCode


Feature Selection
Expand Down
17 changes: 16 additions & 1 deletion featuretools/tests/testing_utils/mock_ds.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,18 @@ def make_ecommerce_files(with_integer_time_index=False, base_path=None, file_loc
['12345-6789'] * 2 +
[np.nan] +
[''] * 2)

countrycodes = list(['US'] * 5 +
['AL'] * 4 +
[np.nan] * 2 +
[''] * 3 +
['ALB'] * 2 +
['USA'])
subregioncodes = list(['US-AZ'] * 5 +
['US-MT'] * 4 +
[np.nan] * 2 +
[''] +
['UG-219'] * 2 +
['ZM-06'] * 3)
log_df = pd.DataFrame({
'id': range(17),
'session_id': [0] * 5 + [1] * 4 + [2] * 1 + [3] * 2 + [4] * 3 + [5] * 2,
Expand All @@ -137,6 +148,8 @@ def make_ecommerce_files(with_integer_time_index=False, base_path=None, file_loc
'latlong': latlong,
'latlong2': latlong2,
'zipcode': zipcodes,
'countrycode': countrycodes,
'subregioncode': subregioncodes,
'value_many_nans': values_many_nans,
'priority_level': [0] * 2 + [1] * 5 + [0] * 6 + [2] * 2 + [1] * 2,
'purchased': [True] * 11 + [False] * 4 + [True, False],
Expand Down Expand Up @@ -274,6 +287,8 @@ def make_variable_types(with_integer_time_index=False):
'latlong': variable_types.LatLong,
'latlong2': variable_types.LatLong,
'zipcode': variable_types.ZIPCode,
'countrycode': variable_types.CountryCode,
'subregioncode': variable_types.SubRegionCode,
'value_many_nans': variable_types.Numeric,
'priority_level': variable_types.Ordinal,
'purchased': variable_types.Boolean,
Expand Down
23 changes: 22 additions & 1 deletion featuretools/variable_types/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,10 +257,31 @@ class EmailAddress(Variable):
_default_pandas_dtype = str


class CountryCode(Categorical):
"""Represents an ISO-3166 standard country code.
ISO 3166-1 (countries) are supported. These codes
should be in the Alpha-2 format.
e.g. United States of America = US
"""
_dtype_repr = "country_code"
_default_pandas_dtype = str


class SubRegionCode(Categorical):
"""Represents an ISO-3166 standard sub-region code.
ISO 3166-2 codes (sub-regions are supported. These codes
should be in the Alpha-2 format.
e.g. United States of America, Arizona = US-AZ
"""
_dtype_repr = "subregion_code"
_default_pandas_dtype = str


ALL_VARIABLE_TYPES = [Datetime, Numeric, Timedelta,
Categorical, Text, Ordinal,
Boolean, LatLong, ZIPCode, IPAddress,
EmailAddress]
EmailAddress, CountryCode,
SubRegionCode]


DEFAULT_DTYPE_VALUES = {
Expand Down

0 comments on commit 1998ec7

Please sign in to comment.