From b67616fd571107fd03e30b10a92b5959d521f980 Mon Sep 17 00:00:00 2001
From: Daniela <drespana@google.com>
Date: Mon, 11 Nov 2024 15:41:11 +0000
Subject: [PATCH 1/4] docs: create boosted tree model

---
 .../classification_boosted_tree_model_test.py | 64 +++++++++++++++++++
 1 file changed, 64 insertions(+)
 create mode 100644 samples/snippets/classification_boosted_tree_model_test.py

diff --git a/samples/snippets/classification_boosted_tree_model_test.py b/samples/snippets/classification_boosted_tree_model_test.py
new file mode 100644
index 0000000000..464199ad94
--- /dev/null
+++ b/samples/snippets/classification_boosted_tree_model_test.py
@@ -0,0 +1,64 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def test_boosted_tree_model(random_model_id: str) -> None:
+    your_model_id = random_model_id
+    # [START bigquery_dataframes_bqml_boosted_tree_prepare]
+    import bigframes.pandas as bpd
+
+    input_data = bpd.read_gbq(
+        "bigquery-public-data.ml_datasets.census_adult_income",
+        columns=(
+            "age",
+            "workclass",
+            "marital_status",
+            "education_num",
+            "occupation",
+            "hours_per_week",
+            "income_bracket",
+            "functional_weight",
+        ),
+    )
+    input_data["dataframe"] = bpd.Series("training", index=input_data.index,).case_when(
+        [
+            (((input_data["functional_weight"] % 10) == 8), "evaluation"),
+            (((input_data["functional_weight"] % 10) == 9), "prediction"),
+        ]
+    )
+    del input_data["functional_weight"]
+    # [END bigquery_dataframes_bqml_boosted_tree_prepare]
+    # [START bigquery_dataframes_bqml_boosted_tree_create]
+    import bigframes.ml.linear_model
+
+    # input_data is defined in an earlier step.
+    training_data = input_data[input_data["dataframe"] == "training"]
+    X = training_data.drop(columns=["income_bracket", "dataframe"])
+    y = training_data["income_bracket"]
+
+    # create and train the model
+    census_model = bigframes.ml.linear_model.LogisticRegression(
+        # model_type="BOOSTED_TREE_CLASSIFIER",
+        # booster_type="gbtree",
+        max_iterations=50,
+    )
+    census_model.fit(X, y)
+
+    census_model.to_gbq(
+        your_model_id,  # For example: "your-project.census.census_model"
+        replace=True,
+    )
+    # [END bigquery_dataframes_bqml_boosted_tree_create]
+    assert input_data is not None
+    assert census_model is not None

From 669bc745acb0eeb2cb7956e2e347b0a268f98632 Mon Sep 17 00:00:00 2001
From: Daniela <drespana@google.com>
Date: Thu, 14 Nov 2024 16:08:07 +0000
Subject: [PATCH 2/4] merge main

---
 samples/snippets/classification_boosted_tree_model_test.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/samples/snippets/classification_boosted_tree_model_test.py b/samples/snippets/classification_boosted_tree_model_test.py
index 464199ad94..3ea5f85eba 100644
--- a/samples/snippets/classification_boosted_tree_model_test.py
+++ b/samples/snippets/classification_boosted_tree_model_test.py
@@ -40,6 +40,7 @@ def test_boosted_tree_model(random_model_id: str) -> None:
     del input_data["functional_weight"]
     # [END bigquery_dataframes_bqml_boosted_tree_prepare]
     # [START bigquery_dataframes_bqml_boosted_tree_create]
+    # from sklearn.ensemble import GradientBoostingClassifier
     import bigframes.ml.linear_model
 
     # input_data is defined in an earlier step.

From b113a574891c72f5952bd3e0aa67c1a01ee54cb7 Mon Sep 17 00:00:00 2001
From: Daniela <drespana@google.com>
Date: Tue, 19 Nov 2024 20:30:28 +0000
Subject: [PATCH 3/4] update model

---
 .../classification_boosted_tree_model_test.py       | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/samples/snippets/classification_boosted_tree_model_test.py b/samples/snippets/classification_boosted_tree_model_test.py
index 3ea5f85eba..e9dd3f444d 100644
--- a/samples/snippets/classification_boosted_tree_model_test.py
+++ b/samples/snippets/classification_boosted_tree_model_test.py
@@ -40,8 +40,7 @@ def test_boosted_tree_model(random_model_id: str) -> None:
     del input_data["functional_weight"]
     # [END bigquery_dataframes_bqml_boosted_tree_prepare]
     # [START bigquery_dataframes_bqml_boosted_tree_create]
-    # from sklearn.ensemble import GradientBoostingClassifier
-    import bigframes.ml.linear_model
+    from bigframes.ml import ensemble
 
     # input_data is defined in an earlier step.
     training_data = input_data[input_data["dataframe"] == "training"]
@@ -49,10 +48,12 @@ def test_boosted_tree_model(random_model_id: str) -> None:
     y = training_data["income_bracket"]
 
     # create and train the model
-    census_model = bigframes.ml.linear_model.LogisticRegression(
-        # model_type="BOOSTED_TREE_CLASSIFIER",
-        # booster_type="gbtree",
-        max_iterations=50,
+    census_model = ensemble.XGBClassifier(
+        n_estimators=1,
+        booster="gbtree",
+        tree_method="hist",
+        max_iterations=5,  # For a more accurate model, try 50 iterations.
+        subsample=0.85,
     )
     census_model.fit(X, y)
 

From a8ac72d903ddc26ef854693dc8d9fda5e2390435 Mon Sep 17 00:00:00 2001
From: Daniela <drespana@google.com>
Date: Tue, 19 Nov 2024 20:40:13 +0000
Subject: [PATCH 4/4] update test

---
 samples/snippets/classification_boosted_tree_model_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/samples/snippets/classification_boosted_tree_model_test.py b/samples/snippets/classification_boosted_tree_model_test.py
index e9dd3f444d..707ce16279 100644
--- a/samples/snippets/classification_boosted_tree_model_test.py
+++ b/samples/snippets/classification_boosted_tree_model_test.py
@@ -52,7 +52,7 @@ def test_boosted_tree_model(random_model_id: str) -> None:
         n_estimators=1,
         booster="gbtree",
         tree_method="hist",
-        max_iterations=5,  # For a more accurate model, try 50 iterations.
+        max_iterations=1,  # For a more accurate model, try 50 iterations.
         subsample=0.85,
     )
     census_model.fit(X, y)