Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Fix sklearn pipeline visualization can't print OneHotEncoder #765

Merged
merged 16 commits into from Nov 17, 2022

Conversation

Zeroto521
Copy link
Owner

@Zeroto521 Zeroto521 commented Nov 10, 2022

pl_x = make_pipeline(
    GetTF(features),
    ReplaceTF({"normal": 1, "important": 2, "strategic": 3}),
    make_union(
        make_pipeline(
            GetTF(features_category),
            OneHotEncoder(),
        ),
        make_pipeline(
            GetTF(features_number),
            MinMaxScaler(),
        ),
    ),
)
pl_x
stack trace
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
File ~/checkouts/readthedocs.org/user_builds/my-data-toolkit/conda/latest/lib/python3.11/site-packages/IPython/core/formatters.py:972, in MimeBundleFormatter.__call__(self, obj, include, exclude)
    969     method = get_real_method(obj, self.print_method)
    971     if method is not None:
--> 972         return method(include=include, exclude=exclude)
    973     return None
    974 else:

File ~/checkouts/readthedocs.org/user_builds/my-data-toolkit/conda/latest/lib/python3.11/site-packages/sklearn/base.py:629, in BaseEstimator._repr_mimebundle_(self, **kwargs)
    627 def _repr_mimebundle_(self, **kwargs):
    628     """Mime bundle used by jupyter kernels to display estimator"""
--> 629     output = {"text/plain": repr(self)}
    630     if get_config()["display"] == "diagram":
    631         output["text/html"] = estimator_html_repr(self)

File ~/checkouts/readthedocs.org/user_builds/my-data-toolkit/conda/latest/lib/python3.11/site-packages/sklearn/base.py:279, in BaseEstimator.__repr__(self, N_CHAR_MAX)
    271 # use ellipsis for sequences with a lot of elements
    272 pp = _EstimatorPrettyPrinter(
    273     compact=True,
    274     indent=1,
    275     indent_at_name=True,
    276     n_max_elements_to_show=N_MAX_ELEMENTS_TO_SHOW,
    277 )
--> 279 repr_ = pp.pformat(self)
    281 # Use bruteforce ellipsis when there are a lot of non-blank characters
    282 n_nonblank = len("".join(repr_.split()))

File ~/checkouts/readthedocs.org/user_builds/my-data-toolkit/conda/latest/lib/python3.11/pprint.py:158, in PrettyPrinter.pformat(self, object)
    156 def pformat(self, object):
    157     sio = _StringIO()
--> 158     self._format(object, sio, 0, 0, {}, 0)
    159     return sio.getvalue()

File ~/checkouts/readthedocs.org/user_builds/my-data-toolkit/conda/latest/lib/python3.11/pprint.py:175, in PrettyPrinter._format(self, object, stream, indent, allowance, context, level)
    173     self._readable = False
    174     return
--> 175 rep = self._repr(object, context, level)
    176 max_width = self._width - indent - allowance
    177 if len(rep) > max_width:

File ~/checkouts/readthedocs.org/user_builds/my-data-toolkit/conda/latest/lib/python3.11/pprint.py:455, in PrettyPrinter._repr(self, object, context, level)
    454 def _repr(self, object, context, level):
--> 455     repr, readable, recursive = self.format(object, context.copy(),
    456                                             self._depth, level)
    457     if not readable:
    458         self._readable = False

File ~/checkouts/readthedocs.org/user_builds/my-data-toolkit/conda/latest/lib/python3.11/site-packages/sklearn/utils/_pprint.py:189, in _EstimatorPrettyPrinter.format(self, object, context, maxlevels, level)
    188 def format(self, object, context, maxlevels, level):
--> 189     return _safe_repr(
    190         object, context, maxlevels, level, changed_only=self._changed_only
    191     )

File ~/checkouts/readthedocs.org/user_builds/my-data-toolkit/conda/latest/lib/python3.11/site-packages/sklearn/utils/_pprint.py:452, in _safe_repr(object, context, maxlevels, level, changed_only)
    448 for k, v in items:
    449     krepr, kreadable, krecur = saferepr(
    450         k, context, maxlevels, level, changed_only=changed_only
    451     )
--> 452     vrepr, vreadable, vrecur = saferepr(
    453         v, context, maxlevels, level, changed_only=changed_only
    454     )
    455     append("%s=%s" % (krepr.strip("'"), vrepr))
    456     readable = readable and kreadable and vreadable

File ~/checkouts/readthedocs.org/user_builds/my-data-toolkit/conda/latest/lib/python3.11/site-packages/sklearn/utils/_pprint.py:419, in _safe_repr(object, context, maxlevels, level, changed_only)
    417 level += 1
    418 for o in object:
--> 419     orepr, oreadable, orecur = _safe_repr(
    420         o, context, maxlevels, level, changed_only=changed_only
    421     )
    422     append(orepr)
    423     if not oreadable:

File ~/checkouts/readthedocs.org/user_builds/my-data-toolkit/conda/latest/lib/python3.11/site-packages/sklearn/utils/_pprint.py:419, in _safe_repr(object, context, maxlevels, level, changed_only)
    417 level += 1
    418 for o in object:
--> 419     orepr, oreadable, orecur = _safe_repr(
    420         o, context, maxlevels, level, changed_only=changed_only
    421     )
    422     append(orepr)
    423     if not oreadable:

File ~/checkouts/readthedocs.org/user_builds/my-data-toolkit/conda/latest/lib/python3.11/site-packages/sklearn/utils/_pprint.py:452, in _safe_repr(object, context, maxlevels, level, changed_only)
    448 for k, v in items:
    449     krepr, kreadable, krecur = saferepr(
    450         k, context, maxlevels, level, changed_only=changed_only
    451     )
--> 452     vrepr, vreadable, vrecur = saferepr(
    453         v, context, maxlevels, level, changed_only=changed_only
    454     )
    455     append("%s=%s" % (krepr.strip("'"), vrepr))
    456     readable = readable and kreadable and vreadable

File ~/checkouts/readthedocs.org/user_builds/my-data-toolkit/conda/latest/lib/python3.11/site-packages/sklearn/utils/_pprint.py:419, in _safe_repr(object, context, maxlevels, level, changed_only)
    417 level += 1
    418 for o in object:
--> 419     orepr, oreadable, orecur = _safe_repr(
    420         o, context, maxlevels, level, changed_only=changed_only
    421     )
    422     append(orepr)
    423     if not oreadable:

File ~/checkouts/readthedocs.org/user_builds/my-data-toolkit/conda/latest/lib/python3.11/site-packages/sklearn/utils/_pprint.py:419, in _safe_repr(object, context, maxlevels, level, changed_only)
    417 level += 1
    418 for o in object:
--> 419     orepr, oreadable, orecur = _safe_repr(
    420         o, context, maxlevels, level, changed_only=changed_only
    421     )
    422     append(orepr)
    423     if not oreadable:

File ~/checkouts/readthedocs.org/user_builds/my-data-toolkit/conda/latest/lib/python3.11/site-packages/sklearn/utils/_pprint.py:452, in _safe_repr(object, context, maxlevels, level, changed_only)
    448 for k, v in items:
    449     krepr, kreadable, krecur = saferepr(
    450         k, context, maxlevels, level, changed_only=changed_only
    451     )
--> 452     vrepr, vreadable, vrecur = saferepr(
    453         v, context, maxlevels, level, changed_only=changed_only
    454     )
    455     append("%s=%s" % (krepr.strip("'"), vrepr))
    456     readable = readable and kreadable and vreadable

File ~/checkouts/readthedocs.org/user_builds/my-data-toolkit/conda/latest/lib/python3.11/site-packages/sklearn/utils/_pprint.py:419, in _safe_repr(object, context, maxlevels, level, changed_only)
    417 level += 1
    418 for o in object:
--> 419     orepr, oreadable, orecur = _safe_repr(
    420         o, context, maxlevels, level, changed_only=changed_only
    421     )
    422     append(orepr)
    423     if not oreadable:

File ~/checkouts/readthedocs.org/user_builds/my-data-toolkit/conda/latest/lib/python3.11/site-packages/sklearn/utils/_pprint.py:419, in _safe_repr(object, context, maxlevels, level, changed_only)
    417 level += 1
    418 for o in object:
--> 419     orepr, oreadable, orecur = _safe_repr(
    420         o, context, maxlevels, level, changed_only=changed_only
    421     )
    422     append(orepr)
    423     if not oreadable:

File ~/checkouts/readthedocs.org/user_builds/my-data-toolkit/conda/latest/lib/python3.11/site-packages/sklearn/utils/_pprint.py:440, in _safe_repr(object, context, maxlevels, level, changed_only)
    438 recursive = False
    439 if changed_only:
--> 440     params = _changed_params(object)
    441 else:
    442     params = object.get_params(deep=False)

File ~/checkouts/readthedocs.org/user_builds/my-data-toolkit/conda/latest/lib/python3.11/site-packages/sklearn/utils/_pprint.py:93, in _changed_params(estimator)
     89 def _changed_params(estimator):
     90     """Return dict (param_name: value) of parameters that were given to
     91     estimator with non-default values."""
---> 93     params = estimator.get_params(deep=False)
     94     init_func = getattr(estimator.__init__, "deprecated_original", estimator.__init__)
     95     init_params = inspect.signature(init_func).parameters

File ~/checkouts/readthedocs.org/user_builds/my-data-toolkit/conda/latest/lib/python3.11/site-packages/sklearn/base.py:211, in BaseEstimator.get_params(self, deep)
    209 out = dict()
    210 for key in self._get_param_names():
--> 211     value = getattr(self, key)
    212     if deep and hasattr(value, "get_params") and not isinstance(value, type):
    213         deep_items = value.get_params().items()

AttributeError: 'OneHotEncoder' object has no attribute 'sparse_output'

@codecov
Copy link

codecov bot commented Nov 10, 2022

Codecov Report

Merging #765 (7c157a9) into main (853dc3d) will increase coverage by 0.00%.
The diff coverage is 100.00%.

@@           Coverage Diff           @@
##             main     #765   +/-   ##
=======================================
  Coverage   99.60%   99.60%           
=======================================
  Files         111      111           
  Lines        1525     1526    +1     
  Branches      346      346           
=======================================
+ Hits         1519     1520    +1     
  Misses          1        1           
  Partials        5        5           
Impacted Files Coverage Δ
dtoolkit/transformer/sklearn/OneHotEncoder.py 100.00% <100.00%> (ø)

📣 We’re building smart automated test selection to slash your CI/CD build times. Learn more

@@ -106,6 +106,9 @@ def __init__(
)
self.categories_with_parent = categories_with_parent

if not SKLEARN_GE_12:
self.sparse_output = sparse_output
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There don't have spare_output in the lower sklearn version.
So there need to manually add into class attributes.

@@ -343,7 +344,7 @@
"metadata": {},
"outputs": [],
"source": [
"df_x.replace({\"normal\": 1, \"important\": 2, \"strategic\": 3}, inplace=True)\n",
"df_x = df_x.replace({\"normal\": 1, \"important\": 2, \"strategic\": 3})\n",
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To avoid SettingWithCopyWarning

/tmp/ipykernel_3108/3317886993.py:1: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_x.replace({"normal": 1, "important": 2, "strategic": 3}, inplace=True)

@Zeroto521 Zeroto521 merged commit 0ab1c19 into main Nov 17, 2022
@Zeroto521 Zeroto521 deleted the bug/onehotencoder branch November 17, 2022 04:35
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

1 participant