alteryx · bchen1116 · Oct 20, 2020 · Oct 13, 2020 · Oct 13, 2020 · Oct 13, 2020
diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -13,6 +13,7 @@ Release Notes
         * Added `PCA Transformer` component for dimensionality reduction :pr:`1270`
     * Fixes
         * Fixed ML performance issue with ordered datasets: always shuffle data in automl's default CV splits :pr:`1265`
+        * Fixed ``boosting type='rf'`` for LightGBM Classifier, as well as ``num_leaves`` error :pr:`1302`
     * Changes
         * Allow ``add_to_rankings`` to be called before AutoMLSearch is called :pr:`1250`
     * Documentation Changes

diff --git a/evalml/pipelines/components/estimators/classifiers/lightgbm_classifier.py b/evalml/pipelines/components/estimators/classifiers/lightgbm_classifier.py
@@ -21,7 +21,7 @@ class LightGBMClassifier(Estimator):
         "boosting_type": ["gbdt", "dart", "goss", "rf"],
         "n_estimators": Integer(10, 100),
         "max_depth": Integer(0, 10),
-        "num_leaves": Integer(1, 100),
+        "num_leaves": Integer(2, 100),
         "min_child_samples": Integer(1, 100)
     }
     model_family = ModelFamily.LIGHTGBM
@@ -30,7 +30,7 @@ class LightGBMClassifier(Estimator):
     SEED_MIN = 0
     SEED_MAX = SEED_BOUNDS.max_bound
 
-    def __init__(self, boosting_type="gbdt", learning_rate=0.1, n_estimators=100, max_depth=0, num_leaves=31, min_child_samples=20, n_jobs=-1, random_state=0, **kwargs):
+    def __init__(self, boosting_type="gbdt", learning_rate=0.1, n_estimators=100, max_depth=0, num_leaves=31, min_child_samples=20, n_jobs=-1, random_state=0, bagging_fraction=0.9, bagging_freq=0, **kwargs):
         # lightGBM's current release doesn't currently support numpy.random.RandomState as the random_state value so we convert to int instead
         random_seed = get_random_seed(random_state, self.SEED_MIN, self.SEED_MAX)
 
@@ -40,9 +40,15 @@ def __init__(self, boosting_type="gbdt", learning_rate=0.1, n_estimators=100, ma
                       "max_depth": max_depth,
                       "num_leaves": num_leaves,
                       "min_child_samples": min_child_samples,
-                      "n_jobs": n_jobs}
+                      "n_jobs": n_jobs,
+                      "bagging_freq": bagging_freq,
+                      "bagging_fraction": bagging_fraction}
         parameters.update(kwargs)
 
+        # if the boosting type is random forest, we want to change the bagging_freq to 1 so that we avoid errors
+        if boosting_type == "rf" and not bagging_freq:
+            parameters.update({'bagging_freq': 1})
+
         lgbm_error_msg = "LightGBM is not installed. Please install using `pip install lightgbm`."
         lgbm = import_or_raise("lightgbm", error_msg=lgbm_error_msg)
         self._ordinal_encoder = None

diff --git a/evalml/tests/component_tests/test_components.py b/evalml/tests/component_tests/test_components.py
@@ -203,7 +203,8 @@ def test_describe_component():
         pass
     try:
         lg_classifier = LightGBMClassifier()
-        assert lg_classifier.describe(return_dict=True) == {'name': 'LightGBM Classifier', 'parameters': {'boosting_type': 'gbdt', 'learning_rate': 0.1, 'n_estimators': 100, 'max_depth': 0, 'num_leaves': 31, 'min_child_samples': 20, 'n_jobs': -1}}
+        assert lg_classifier.describe(return_dict=True) == {'name': 'LightGBM Classifier', 'parameters': {'boosting_type': 'gbdt', 'learning_rate': 0.1, 'n_estimators': 100, 'max_depth': 0, 'num_leaves': 31,
+                                                                                                          'min_child_samples': 20, 'n_jobs': -1, 'bagging_fraction': 0.9, 'bagging_freq': 0}}
     except ImportError:
         pass
 

diff --git a/evalml/tests/component_tests/test_lgbm_classifier.py b/evalml/tests/component_tests/test_lgbm_classifier.py
@@ -2,6 +2,7 @@
 
 import numpy as np
 import pandas as pd
+import pytest
 from pandas.testing import assert_frame_equal, assert_series_equal
 from pytest import importorskip
 
@@ -281,3 +282,40 @@ def test_binary_label_encoding(mock_fit, mock_predict, X_y_binary):
     assert_series_equal(y_arg, y_numeric)
 
     clf.predict(X)
+
+
+def test_binary_rf_not_defaults(X_y_binary):
+    X, y = X_y_binary
+
+    with pytest.raises(lgbm.basic.LightGBMError, match="bagging_fraction"):
+        clf = LightGBMClassifier(boosting_type="rf", bagging_freq=1, bagging_fraction=1.01)
+        clf.fit(X, y)
+
+    clf = LightGBMClassifier(boosting_type="rf", bagging_freq=0)
+    clf.fit(X, y)
+    assert clf.parameters['bagging_freq'] == 1
+    assert clf.parameters['bagging_fraction'] == 0.9
+
+
+def test_binary_rf(X_y_binary):
+    X, y = X_y_binary
+
+    clf = LightGBMClassifier()
+    clf.fit(X, y)
+    assert clf.parameters['bagging_freq'] == 0
+    assert clf.parameters['bagging_fraction'] == 0.9
+
+    clf = LightGBMClassifier(boosting_type="rf")
+    clf.fit(X, y)
+    assert clf.parameters['bagging_freq'] == 1
+    assert clf.parameters['bagging_fraction'] == 0.9
+
+    clf = LightGBMClassifier(boosting_type="rf", bagging_freq=1, bagging_fraction=0.5)
+    clf.fit(X, y)
+    assert clf.parameters['bagging_freq'] == 1
+    assert clf.parameters['bagging_fraction'] == 0.5
+
+    clf = LightGBMClassifier(bagging_freq=1, bagging_fraction=0.5)
+    clf.fit(X, y)
+    assert clf.parameters['bagging_freq'] == 1
+    assert clf.parameters['bagging_fraction'] == 0.5