Release v0.67.0 (#3968)

* Release v0.67.0 * Lint fix. * Lint fix.
alteryx · Feb 1, 2023 · d12659d · d12659d
1 parent aee70ba
commit d12659d
Show file tree

Hide file tree

Showing 54 changed files with 16 additions and 131 deletions.
diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -3,19 +3,24 @@ Release Notes
 **Future Releases**
     * Enhancements
     * Fixes
-        * Re-added ``TimeSeriesPipeline.should_skip_featurization`` to fix bug where data would get featurized unnecessarily :pr:`3964`
-        * Allow float categories to be passed into CatBoost estimators :pr:`3966`
     * Changes
-        * Update pyproject.toml to correctly specify the data filepaths :pr:`3967`
     * Documentation Changes
-        * Added demo for prediction intervals :pr:`3954`
     * Testing Changes
 
 .. warning::
 
     **Breaking Changes**
 
 
+**v0.67.0 Jan. 31, 2023**
+    * Fixes
+        * Re-added ``TimeSeriesPipeline.should_skip_featurization`` to fix bug where data would get featurized unnecessarily :pr:`3964`
+        * Allow float categories to be passed into CatBoost estimators :pr:`3966`
+    * Changes
+        * Update pyproject.toml to correctly specify the data filepaths :pr:`3967`
+    * Documentation Changes
+        * Added demo for prediction intervals :pr:`3954`
+
 **v0.66.1 Jan. 26, 2023**
     * Fixes
         * Updated ``LabelEncoder`` to store the original typing information :pr:`3960`

diff --git a/docs/source/user_guide/data_checks.ipynb b/docs/source/user_guide/data_checks.ipynb
@@ -805,7 +805,6 @@
     "\n",
     "\n",
     "class MyCustomDataChecks(DataChecks):\n",
-    "\n",
     "    data_checks = [\n",
     "        NullDataCheck,\n",
     "        InvalidTargetDataCheck,\n",

diff --git a/docs/source/user_guide/model_understanding.ipynb b/docs/source/user_guide/model_understanding.ipynb
@@ -783,7 +783,6 @@
     "\n",
     "\n",
     "def hinge_loss(y_true, y_pred_proba):\n",
-    "\n",
     "    probabilities = np.clip(y_pred_proba.iloc[:, 1], 0.001, 0.999)\n",
     "    y_true[y_true == 0] = -1\n",
     "\n",

diff --git a/evalml/__init__.py b/evalml/__init__.py
@@ -23,4 +23,4 @@
 warnings.filterwarnings("ignore", category=FutureWarning)
 warnings.filterwarnings("ignore", category=DeprecationWarning)
 
-__version__ = "0.66.1"
+__version__ = "0.67.0"
diff --git a/evalml/data_checks/null_data_check.py b/evalml/data_checks/null_data_check.py
@@ -280,7 +280,6 @@ def validate(self, X, y=None):
             )
 
         if moderately_null_cols:
-
             impute_strategies_dict = {}
             for col in moderately_null_cols:
                 col_in_df = X.ww[col]

diff --git a/evalml/model_understanding/feature_explanations.py b/evalml/model_understanding/feature_explanations.py
@@ -43,7 +43,6 @@ def readable_explanation(
         )
 
     if importance_method == "permutation":
-
         if objective == "auto":
             objective = evalml.automl.get_default_primary_search_objective(
                 pipeline.problem_type,

diff --git a/evalml/pipelines/component_graph.py b/evalml/pipelines/component_graph.py
@@ -585,7 +585,6 @@ def _get_feature_provenance(self, input_feature_names):
         for component_instance in transformers:
             component_provenance = component_instance._get_feature_provenance()
             for component_input, component_output in component_provenance.items():
-
                 # Case 1: The transformer created features from one of the original features
                 if component_input in provenance:
                     provenance[component_input] = provenance[component_input].union(

diff --git a/evalml/pipelines/components/transformers/encoders/onehot_encoder.py b/evalml/pipelines/components/transformers/encoders/onehot_encoder.py
@@ -277,7 +277,6 @@ def _get_feature_names(self):
             unique_encoded_columns = []
             encoded_features_to_drop = []
             for cat_index, category in enumerate(column_categories):
-
                 # Drop categories specified by the user
                 if (
                     self._encoder.drop_idx_ is not None

diff --git a/evalml/pipelines/components/transformers/feature_selection/rf_regressor_feature_selector.py b/evalml/pipelines/components/transformers/feature_selection/rf_regressor_feature_selector.py
@@ -47,7 +47,6 @@ def __init__(
         random_seed=0,
         **kwargs,
     ):
-
         parameters = {
             "number_features": number_features,
             "n_estimators": n_estimators,

diff --git a/evalml/pipelines/components/transformers/preprocessing/time_series_featurizer.py b/evalml/pipelines/components/transformers/preprocessing/time_series_featurizer.py
@@ -241,7 +241,6 @@ def _compute_delays(self, X_ww, y):
                 X_ww[categorical_columns],
             )
             for col_name in cols_to_delay:
-
                 col = X_ww[col_name]
                 if col_name in categorical_columns:
                     col = X_categorical[col_name]

diff --git a/evalml/pipelines/components/transformers/preprocessing/transform_primitive_components.py b/evalml/pipelines/components/transformers/preprocessing/transform_primitive_components.py
@@ -9,7 +9,6 @@
 
 
 class _ExtractFeaturesWithTransformPrimitives(Transformer):
-
     hyperparameter_ranges = {}
     """{}"""
 

diff --git a/evalml/pipelines/time_series_pipeline_base.py b/evalml/pipelines/time_series_pipeline_base.py
@@ -108,7 +108,6 @@ def _add_training_data_to_X_Y(self, X, y, X_train, y_train):
             )
             and self.gap
         ):
-
             # The training data does not have the gap dates so don't need to include them
             last_row_of_training -= self.gap
 

diff --git a/evalml/tests/automl_tests/parallel_tests/test_cf_engine.py b/evalml/tests/automl_tests/parallel_tests/test_cf_engine.py
@@ -166,7 +166,6 @@ def test_submit_evaluate_job_single(
     pool = get_pool(pool_type, thread_pool, process_pool)
 
     with CFClient(pool) as client:
-
         pipeline = BinaryClassificationPipeline(
             component_graph=["Logistic Regression Classifier"],
             parameters={"Logistic Regression Classifier": {"n_jobs": 1}},
@@ -232,7 +231,6 @@ def test_submit_evaluate_jobs_multiple(
     pool = get_pool(pool_type, thread_pool, process_pool)
 
     with CFClient(pool) as client:
-
         pipelines = [
             BinaryClassificationPipeline(
                 component_graph=["Logistic Regression Classifier"],
@@ -296,7 +294,6 @@ def test_submit_scoring_job_single(
     pool = get_pool(pool_type, thread_pool, process_pool)
 
     with CFClient(pool) as client:
-
         pipeline = BinaryClassificationPipeline(
             component_graph=["Logistic Regression Classifier"],
             parameters={"Logistic Regression Classifier": {"n_jobs": 1}},
@@ -343,7 +340,6 @@ def test_submit_scoring_jobs_multiple(
     pool = get_pool(pool_type, thread_pool, process_pool)
 
     with CFClient(pool) as client:
-
         pipelines = [
             BinaryClassificationPipeline(
                 component_graph=["Logistic Regression Classifier"],

diff --git a/evalml/tests/automl_tests/parallel_tests/test_dask_engine.py b/evalml/tests/automl_tests/parallel_tests/test_dask_engine.py
@@ -116,7 +116,6 @@ def test_submit_evaluate_job_single(X_y_binary_cls):
     )
 
     with DaskEngine() as engine:
-
         # Verify that engine evaluates a pipeline
         pipeline_future = engine.submit_evaluation_job(
             X=X,

diff --git a/evalml/tests/automl_tests/test_automl.py b/evalml/tests/automl_tests/test_automl.py
@@ -3077,7 +3077,6 @@ def test_automl_rerun(AutoMLTestEnv, X_y_binary, caplog):
 
 
 def test_timeseries_baseline_init_with_correct_gap_max_delay(AutoMLTestEnv, ts_data):
-
     X, _, y = ts_data()
     automl = AutoMLSearch(
         X_train=X,
@@ -3130,7 +3129,6 @@ def test_automl_does_not_include_positive_only_objectives_by_default(
     problem_type,
     X_y_regression,
 ):
-
     X, y = X_y_regression
 
     only_positive = []
@@ -3156,7 +3154,6 @@ def test_automl_does_not_include_positive_only_objectives_by_default(
 
 @pytest.mark.parametrize("non_core_objective", get_non_core_objectives())
 def test_automl_validate_objective(non_core_objective, X_y_regression):
-
     X, y = X_y_regression
 
     with pytest.raises(ValueError, match="is not allowed in AutoML!"):
@@ -3528,7 +3525,6 @@ def test_train_batch_works(
     stackable_classifiers,
     caplog,
 ):
-
     exceptions_to_check = [
         str(e) for e in pipeline_fit_side_effect if isinstance(e, Exception)
     ]
@@ -3653,7 +3649,6 @@ def test_score_batch_works(
     stackable_classifiers,
     caplog,
 ):
-
     exceptions_to_check = []
     expected_scores = {}
     for i, e in enumerate(pipeline_score_side_effect):
@@ -3700,7 +3695,6 @@ def test_score_batch_works(
     def score_batch_and_check():
         caplog.clear()
         with env.test_context(mock_score_side_effect=pipeline_score_side_effect):
-
             scores = automl.score_pipelines(
                 pipelines,
                 X,
@@ -3854,7 +3848,6 @@ def test_automl_supports_float_targets_for_classification(
     ],
 )
 def test_automl_issues_beta_warning_for_time_series(problem_type, X_y_binary):
-
     X, y = X_y_binary
 
     with warnings.catch_warnings(record=True) as warn:
@@ -4872,7 +4865,6 @@ def test_automl_with_iterative_algorithm_puts_ts_estimators_first(
     AutoMLTestEnv,
     is_using_windows,
 ):
-
     X, _, y = ts_data()
 
     env = AutoMLTestEnv("time series regression")
@@ -5004,7 +4996,6 @@ def test_automl_does_not_restrict_use_covariates_if_user_specified(
     is_using_windows,
     X_y_binary,
 ):
-
     X, y = X_y_binary
     X = pd.DataFrame(X)
     X["Date"] = pd.date_range("2010-01-01", periods=X.shape[0])

diff --git a/evalml/tests/automl_tests/test_automl_search_classification.py b/evalml/tests/automl_tests/test_automl_search_classification.py
@@ -611,7 +611,6 @@ def test_max_time_units(X_y_binary):
 
 
 def test_plot_iterations_max_iterations(X_y_binary, go):
-
     X, y = X_y_binary
 
     automl = AutoMLSearch(

diff --git a/evalml/tests/automl_tests/test_automl_search_regression.py b/evalml/tests/automl_tests/test_automl_search_regression.py
@@ -143,7 +143,6 @@ def test_categorical_regression(X_y_categorical_regression):
 
 
 def test_plot_iterations_max_iterations(X_y_regression, go):
-
     X, y = X_y_regression
 
     automl = AutoMLSearch(
@@ -167,7 +166,6 @@ def test_plot_iterations_max_iterations(X_y_regression, go):
 
 
 def test_plot_iterations_max_time(AutoMLTestEnv, X_y_regression, go):
-
     X, y = X_y_regression
 
     automl = AutoMLSearch(

diff --git a/evalml/tests/component_tests/decomposer_tests/test_decomposer.py b/evalml/tests/component_tests/decomposer_tests/test_decomposer.py
@@ -43,7 +43,6 @@ def test_set_time_index(decomposer_child_class):
     decomposer_list,
 )
 def test_decomposer_init_raises_error_if_degree_not_int(decomposer_child_class):
-
     with pytest.raises(TypeError, match="Received str"):
         decomposer_child_class(degree="1")
 
@@ -459,7 +458,6 @@ def test_decomposer_determine_periodicity(
     synthetic_data,
     generate_seasonal_data,
 ):
-
     X, y = generate_seasonal_data(real_or_synthetic=synthetic_data)(
         period,
         trend_degree=trend_degree,

diff --git a/evalml/tests/component_tests/decomposer_tests/test_polynomial_decomposer.py b/evalml/tests/component_tests/decomposer_tests/test_polynomial_decomposer.py
@@ -42,7 +42,6 @@ def test_polynomial_decomposer_get_trend_dataframe(
     ts_data_quadratic_trend,
     ts_data_cubic_trend,
 ):
-
     if degree == 1:
         X_input, _, y_input = ts_data()
     elif degree == 2:

diff --git a/evalml/tests/component_tests/decomposer_tests/test_stl_decomposer.py b/evalml/tests/component_tests/decomposer_tests/test_stl_decomposer.py
@@ -69,7 +69,6 @@ def test_stl_fit_transform_in_sample(
     synthetic_data,
     generate_seasonal_data,
 ):
-
     X, y = generate_seasonal_data(real_or_synthetic=synthetic_data)(
         period,
         freq_str=freq,
@@ -201,7 +200,6 @@ def test_stl_decomposer_get_trend_dataframe(
     fit_before_decompose,
     variateness,
 ):
-
     period = 7
     X, y = generate_seasonal_data(real_or_synthetic="synthetic")(
         period=period,
@@ -232,7 +230,6 @@ def test_stl_decomposer_get_trend_dataframe(
             [get_trend_dataframe_format_correct(x) for idx, x in enumerate(result_dfs)]
 
     elif transformer_fit_on_data != "in-sample":
-
         y_t_new = build_test_target(
             subset_y,
             period,
@@ -276,7 +273,6 @@ def test_stl_decomposer_get_trend_dataframe(
 def test_stl_decomposer_get_trend_dataframe_sets_time_index_internally(
     generate_seasonal_data,
 ):
-
     X, y = generate_seasonal_data(real_or_synthetic="synthetic")(
         period=7,
         set_time_index=False,

diff --git a/evalml/tests/component_tests/test_components.py b/evalml/tests/component_tests/test_components.py
@@ -1301,7 +1301,6 @@ def test_all_transformers_check_fit_input_type(
     make_data_type,
     ts_data,
 ):
-
     X, y = X_y_binary
     X = make_data_type(data_type, X)
     y = make_data_type(data_type, y)
@@ -1721,7 +1720,6 @@ def test_estimator_fit_respects_custom_indices(
     ts_data,
     helper_functions,
 ):
-
     supported_problem_types = estimator_class.supported_problem_types
 
     ts_problem = False

diff --git a/evalml/tests/component_tests/test_ft_transform_primitive_components.py b/evalml/tests/component_tests/test_ft_transform_primitive_components.py
@@ -232,7 +232,6 @@ def test_component_fit_transform(
     make_expected_ltypes,
     df_with_url_and_email,
 ):
-
     data = make_data(df_with_url_and_email)
     expected = make_expected(data)
     expected_logical_types = make_expected_ltypes()

diff --git a/evalml/tests/component_tests/test_imputer.py b/evalml/tests/component_tests/test_imputer.py
@@ -51,7 +51,6 @@ def test_imputer_init(
     numeric_impute_strategy,
     boolean_impute_strategy,
 ):
-
     imputer = Imputer(
         categorical_impute_strategy=categorical_impute_strategy,
         numeric_impute_strategy=numeric_impute_strategy,

diff --git a/evalml/tests/component_tests/test_lgbm_classifier.py b/evalml/tests/component_tests/test_lgbm_classifier.py
@@ -47,7 +47,6 @@ def test_lightgbm_classifier_random_seed_bounds_seed(X_y_binary):
 
 
 def test_fit_predict_binary(X_y_binary, lgbm):
-
     X, y = X_y_binary
 
     sk_clf = lgbm.sklearn.LGBMClassifier(random_state=0)
@@ -65,7 +64,6 @@ def test_fit_predict_binary(X_y_binary, lgbm):
 
 
 def test_fit_predict_multi(X_y_multi, lgbm):
-
     X, y = X_y_multi
 
     clf = lgbm.sklearn.LGBMClassifier(random_state=0)
@@ -83,7 +81,6 @@ def test_fit_predict_multi(X_y_multi, lgbm):
 
 
 def test_feature_importance(X_y_binary, lgbm):
-
     X, y = X_y_binary
 
     clf = LightGBMClassifier(n_jobs=1)
@@ -98,7 +95,6 @@ def test_feature_importance(X_y_binary, lgbm):
 
 
 def test_fit_string_features(X_y_binary, lgbm):
-
     X, y = X_y_binary
     X = pd.DataFrame(X)
     X["string_col"] = "abc"
@@ -269,7 +265,6 @@ def test_binary_label_encoding(mock_predict, X_y_binary):
 
 
 def test_binary_rf(X_y_binary, lgbm):
-
     X, y = X_y_binary
 
     with pytest.raises(lgbm.basic.LightGBMError, match="bagging_fraction"):