Skip to content
This repository has been archived by the owner on Jan 9, 2024. It is now read-only.

Commit

Permalink
Remove boxcox (#177)
Browse files Browse the repository at this point in the history
* Turn off BoxCox transformer because it may lead to potential bugs

* change the decimal point comparison due to the randomness in TPOT
  • Loading branch information
jzhang-gp committed Nov 21, 2019
1 parent 7b0d201 commit a3e0bfd
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 26 deletions.
10 changes: 8 additions & 2 deletions foreshadow/smart/all.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
TfidfVectorizer,
)
from foreshadow.concrete.internals import (
BoxCox,
ConvertFinancial,
DummyEncoder,
FancyImputer,
Expand Down Expand Up @@ -85,7 +84,14 @@ def pick_transformer(self, X, y=None, **fit_params):
best_dist = best_dist if p_vals[best_dist] >= self.p_val else None
if best_dist is None:
return SerializablePipeline(
[("box_cox", BoxCox()), ("robust_scaler", RobustScaler())]
[
# Turning off the BoxCox transformer because if the test
# dataset has an even smaller negative min, it will
# break the pipeline.
# TODO add a different transformer if necessary
# ("box_cox", BoxCox()),
("robust_scaler", RobustScaler())
]
)
else:
return distributions[best_dist]
Expand Down
2 changes: 1 addition & 1 deletion foreshadow/tests/test_foreshadow.py
Original file line number Diff line number Diff line change
Expand Up @@ -847,7 +847,7 @@ def test_foreshadow_serialization_adults_small_classification():
# converge. The test here aims to evaluate if both cases have
# produced a reasonable score and the difference is small.
# assert score1 > 0.76 and score2 > 0.76
assertions.assertAlmostEqual(score1, score2, places=3)
assertions.assertAlmostEqual(score1, score2, places=2)


def test_foreshadow_serialization_adults_small_classification_override():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,23 +61,6 @@ def test_dummy_encoder_other():
assert check.equals(df)


def test_box_cox():
import numpy as np
import pandas as pd
import scipy.stats as ss

from foreshadow.concrete import BoxCox

np.random.seed(0)
data = pd.DataFrame(ss.lognorm.rvs(size=100, s=0.954))
bc = BoxCox()
bc_data = bc.fit_transform(data)
assert ss.shapiro(bc_data)[1] > 0.05
assert np.allclose(
data.values.ravel(), bc.inverse_transform(bc_data).values.ravel()
)


@pytest.mark.parametrize("deep", [True, False])
def test_label_encoder_get_params_keys(deep):
"""Test that the desired keys show up for the LabelEncoder object.
Expand Down
8 changes: 2 additions & 6 deletions foreshadow/tests/test_transformers/test_smart/test_smart.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,7 @@ def test_smart_get_params_default(smart_child, smart_params, deep):
assert smart_params == params


@pytest.mark.parametrize(
"initial_transformer", [None, "BoxCox", "StandardScaler"]
)
@pytest.mark.parametrize("initial_transformer", [None, "StandardScaler"])
def test_smart_get_params_deep(smart_child, smart_params, initial_transformer):
"""Test that smart.get_params(deep=True) functions as desired.
Expand All @@ -78,9 +76,7 @@ def test_smart_get_params_deep(smart_child, smart_params, initial_transformer):
assert smart.get_params(True) == smart_params


@pytest.mark.parametrize(
"initial_transformer", [None, "BoxCox", "StandardScaler"]
)
@pytest.mark.parametrize("initial_transformer", [None, "StandardScaler"])
def test_smart_set_params_default(smart_child, initial_transformer):
"""Test setting both transformer and its parameters simultaneously works.
Expand Down

0 comments on commit a3e0bfd

Please sign in to comment.