From 7cca82495b38d9e3c52a086958f07719981eb1cd Mon Sep 17 00:00:00 2001
From: Tahir Fayyaz <tahir.fayyaz@databricks.com>
Date: Tue, 15 Feb 2022 21:32:32 +0000
Subject: [PATCH] Updated Databricks docs for correct jobs 2.1 API and links
 (#21494)

---
 .../providers/databricks/hooks/databricks.py  | 11 +++--
 .../databricks/operators/databricks.py        | 42 +++++++++----------
 .../operators.rst                             | 10 ++---
 3 files changed, 33 insertions(+), 30 deletions(-)

diff --git a/airflow/providers/databricks/hooks/databricks.py b/airflow/providers/databricks/hooks/databricks.py
index fd3dd99acfbbb..ee2523cf4e652 100644
--- a/airflow/providers/databricks/hooks/databricks.py
+++ b/airflow/providers/databricks/hooks/databricks.py
@@ -19,8 +19,11 @@
 Databricks hook.
 
 This hook enable the submitting and running of jobs to the Databricks platform. Internally the
-operators talk to the ``api/2.0/jobs/runs/submit``
-`endpoint <https://docs.databricks.com/api/latest/jobs.html#runs-submit>`_.
+operators talk to the
+``api/2.1/jobs/run-now``
+`endpoint <https://docs.databricks.com/dev-tools/api/latest/jobs.html#operation/JobsRunNow>_`
+or the ``api/2.1/jobs/runs/submit``
+`endpoint <https://docs.databricks.com/dev-tools/api/latest/jobs.html#operation/JobsRunsSubmit>`_.
 """
 import sys
 import time
@@ -380,7 +383,7 @@ def _log_request_error(self, attempt_num: int, error: str) -> None:
 
     def run_now(self, json: dict) -> int:
         """
-        Utility function to call the ``api/2.0/jobs/run-now`` endpoint.
+        Utility function to call the ``api/2.1/jobs/run-now`` endpoint.
 
         :param json: The data used in the body of the request to the ``run-now`` endpoint.
         :return: the run_id as an int
@@ -391,7 +394,7 @@ def run_now(self, json: dict) -> int:
 
     def submit_run(self, json: dict) -> int:
         """
-        Utility function to call the ``api/2.0/jobs/runs/submit`` endpoint.
+        Utility function to call the ``api/2.1/jobs/runs/submit`` endpoint.
 
         :param json: The data used in the body of the request to the ``submit`` endpoint.
         :return: the run_id as an int
diff --git a/airflow/providers/databricks/operators/databricks.py b/airflow/providers/databricks/operators/databricks.py
index 26c330815daa5..e53ff9a0849f8 100644
--- a/airflow/providers/databricks/operators/databricks.py
+++ b/airflow/providers/databricks/operators/databricks.py
@@ -101,14 +101,14 @@ def _handle_databricks_operator_execution(operator, hook, log, context) -> None:
 class DatabricksSubmitRunOperator(BaseOperator):
     """
     Submits a Spark job run to Databricks using the
-    `api/2.0/jobs/runs/submit
-    <https://docs.databricks.com/api/latest/jobs.html#runs-submit>`_
+    `api/2.1/jobs/runs/submit
+    <https://docs.databricks.com/dev-tools/api/latest/jobs.html#operation/JobsRunsSubmit>`_
     API endpoint.
 
     There are two ways to instantiate this operator.
 
     In the first way, you can take the JSON payload that you typically use
-    to call the ``api/2.0/jobs/runs/submit`` endpoint and pass it directly
+    to call the ``api/2.1/jobs/runs/submit`` endpoint and pass it directly
     to our ``DatabricksSubmitRunOperator`` through the ``json`` parameter.
     For example ::
 
@@ -129,7 +129,7 @@ class DatabricksSubmitRunOperator(BaseOperator):
     endpoint. In this method, your code would look like this: ::
 
         new_cluster = {
-          'spark_version': '2.1.0-db3-scala2.11',
+          'spark_version': '10.1.x-scala2.12',
           'num_workers': 2
         }
         notebook_task = {
@@ -162,7 +162,7 @@ class DatabricksSubmitRunOperator(BaseOperator):
         :ref:`howto/operator:DatabricksSubmitRunOperator`
 
     :param json: A JSON object containing API parameters which will be passed
-        directly to the ``api/2.0/jobs/runs/submit`` endpoint. The other named parameters
+        directly to the ``api/2.1/jobs/runs/submit`` endpoint. The other named parameters
         (i.e. ``spark_jar_task``, ``notebook_task``..) to this operator will
         be merged with this json dictionary if they are provided.
         If there are conflicts during the merge, the named parameters will
@@ -170,7 +170,7 @@ class DatabricksSubmitRunOperator(BaseOperator):
 
         .. seealso::
             For more information about templating see :ref:`concepts:jinja-templating`.
-            https://docs.databricks.com/api/latest/jobs.html#runs-submit
+            https://docs.databricks.com/dev-tools/api/latest/jobs.html#operation/JobsRunsSubmit
     :param spark_jar_task: The main class and parameters for the JAR task. Note that
         the actual JAR is specified in the ``libraries``.
         *EITHER* ``spark_jar_task`` *OR* ``notebook_task`` *OR* ``spark_python_task``
@@ -178,28 +178,28 @@ class DatabricksSubmitRunOperator(BaseOperator):
         This field will be templated.
 
         .. seealso::
-            https://docs.databricks.com/api/latest/jobs.html#jobssparkjartask
+            https://docs.databricks.com/dev-tools/api/2.0/jobs.html#jobssparkjartask
     :param notebook_task: The notebook path and parameters for the notebook task.
         *EITHER* ``spark_jar_task`` *OR* ``notebook_task`` *OR* ``spark_python_task``
         *OR* ``spark_submit_task`` *OR* ``pipeline_task`` should be specified.
         This field will be templated.
 
         .. seealso::
-            https://docs.databricks.com/api/latest/jobs.html#jobsnotebooktask
+            https://docs.databricks.com/dev-tools/api/2.0/jobs.html#jobsnotebooktask
     :param spark_python_task: The python file path and parameters to run the python file with.
         *EITHER* ``spark_jar_task`` *OR* ``notebook_task`` *OR* ``spark_python_task``
         *OR* ``spark_submit_task`` *OR* ``pipeline_task`` should be specified.
         This field will be templated.
 
         .. seealso::
-            https://docs.databricks.com/api/latest/jobs.html#jobssparkpythontask
+            https://docs.databricks.com/dev-tools/api/2.0/jobs.html#jobssparkpythontask
     :param spark_submit_task: Parameters needed to run a spark-submit command.
         *EITHER* ``spark_jar_task`` *OR* ``notebook_task`` *OR* ``spark_python_task``
         *OR* ``spark_submit_task`` *OR* ``pipeline_task`` should be specified.
         This field will be templated.
 
         .. seealso::
-            https://docs.databricks.com/api/latest/jobs.html#jobssparksubmittask
+            https://docs.databricks.com/dev-tools/api/2.0/jobs.html#jobssparksubmittask
     :param pipeline_task: Parameters needed to execute a Delta Live Tables pipeline task.
         The provided dictionary must contain at least ``pipeline_id`` field!
         *EITHER* ``spark_jar_task`` *OR* ``notebook_task`` *OR* ``spark_python_task``
@@ -214,7 +214,7 @@ class DatabricksSubmitRunOperator(BaseOperator):
         This field will be templated.
 
         .. seealso::
-            https://docs.databricks.com/api/latest/jobs.html#jobsclusterspecnewcluster
+            https://docs.databricks.com/dev-tools/api/2.0/jobs.html#jobsclusterspecnewcluster
     :param existing_cluster_id: ID for existing cluster on which to run this task.
         *EITHER* ``new_cluster`` *OR* ``existing_cluster_id`` should be specified
         (except when ``pipeline_task`` is used).
@@ -223,7 +223,7 @@ class DatabricksSubmitRunOperator(BaseOperator):
         This field will be templated.
 
         .. seealso::
-            https://docs.databricks.com/api/latest/libraries.html#managedlibrarieslibrary
+            https://docs.databricks.com/dev-tools/api/2.0/jobs.html#managedlibrarieslibrary
     :param run_name: The run name used for this task.
         By default this will be set to the Airflow ``task_id``. This ``task_id`` is a
         required parameter of the superclass ``BaseOperator``.
@@ -344,14 +344,14 @@ def on_kill(self):
 class DatabricksRunNowOperator(BaseOperator):
     """
     Runs an existing Spark job run to Databricks using the
-    `api/2.0/jobs/run-now
-    <https://docs.databricks.com/api/latest/jobs.html#run-now>`_
+    `api/2.1/jobs/run-now
+    <https://docs.databricks.com/dev-tools/api/latest/jobs.html#operation/JobsRunNow>`_
     API endpoint.
 
     There are two ways to instantiate this operator.
 
     In the first way, you can take the JSON payload that you typically use
-    to call the ``api/2.0/jobs/run-now`` endpoint and pass it directly
+    to call the ``api/2.1/jobs/run-now`` endpoint and pass it directly
     to our ``DatabricksRunNowOperator`` through the ``json`` parameter.
     For example ::
 
@@ -408,9 +408,9 @@ class DatabricksRunNowOperator(BaseOperator):
         This field will be templated.
 
         .. seealso::
-            https://docs.databricks.com/api/latest/jobs.html#run-now
+            https://docs.databricks.com/dev-tools/api/latest/jobs.html#operation/JobsRunNow
     :param json: A JSON object containing API parameters which will be passed
-        directly to the ``api/2.0/jobs/run-now`` endpoint. The other named parameters
+        directly to the ``api/2.1/jobs/run-now`` endpoint. The other named parameters
         (i.e. ``notebook_params``, ``spark_submit_params``..) to this operator will
         be merged with this json dictionary if they are provided.
         If there are conflicts during the merge, the named parameters will
@@ -418,7 +418,7 @@ class DatabricksRunNowOperator(BaseOperator):
 
         .. seealso::
             For more information about templating see :ref:`concepts:jinja-templating`.
-            https://docs.databricks.com/api/latest/jobs.html#run-now
+            https://docs.databricks.com/dev-tools/api/latest/jobs.html#operation/JobsRunNow
     :param notebook_params: A dict from keys to values for jobs with notebook task,
         e.g. "notebook_params": {"name": "john doe", "age":  "35"}.
         The map is passed to the notebook and will be accessible through the
@@ -442,7 +442,7 @@ class DatabricksRunNowOperator(BaseOperator):
         This field will be templated.
 
         .. seealso::
-            https://docs.databricks.com/api/latest/jobs.html#run-now
+            https://docs.databricks.com/dev-tools/api/latest/jobs.html#operation/JobsRunNow
     :param jar_params: A list of parameters for jobs with JAR tasks,
         e.g. "jar_params": ["john doe", "35"].
         The parameters will be passed to JAR file as command line parameters.
@@ -453,7 +453,7 @@ class DatabricksRunNowOperator(BaseOperator):
         This field will be templated.
 
         .. seealso::
-            https://docs.databricks.com/api/latest/jobs.html#run-now
+            https://docs.databricks.com/dev-tools/api/latest/jobs.html#operation/JobsRunNow
     :param spark_submit_params: A list of parameters for jobs with spark submit task,
         e.g. "spark_submit_params": ["--class", "org.apache.spark.examples.SparkPi"].
         The parameters will be passed to spark-submit script as command line parameters.
@@ -463,7 +463,7 @@ class DatabricksRunNowOperator(BaseOperator):
         This field will be templated.
 
         .. seealso::
-            https://docs.databricks.com/api/latest/jobs.html#run-now
+            https://docs.databricks.com/dev-tools/api/latest/jobs.html#operation/JobsRunNow
     :param timeout_seconds: The timeout for this run. By default a value of 0 is used
         which means to have no timeout.
         This field will be templated.
diff --git a/docs/apache-airflow-providers-databricks/operators.rst b/docs/apache-airflow-providers-databricks/operators.rst
index 0b8f8ac26f0ea..01c19a7987c56 100644
--- a/docs/apache-airflow-providers-databricks/operators.rst
+++ b/docs/apache-airflow-providers-databricks/operators.rst
@@ -24,14 +24,14 @@ DatabricksSubmitRunOperator
 ===========================
 
 Use the :class:`~airflow.providers.databricks.operators.DatabricksSubmitRunOperator` to submit
-a new Databricks job via Databricks `api/2.0/jobs/runs/submit <https://docs.databricks.com/api/latest/jobs.html#runs-submit>`_ API endpoint.
+a new Databricks job via Databricks `api/2.1/jobs/runs/submit <https://docs.databricks.com/api/latest/jobs.html#runs-submit>`_ API endpoint.
 
 
 Using the Operator
 ^^^^^^^^^^^^^^^^^^
 
 There are two ways to instantiate this operator. In the first way, you can take the JSON payload that you typically use
-to call the ``api/2.0/jobs/runs/submit`` endpoint and pass it directly to our ``DatabricksSubmitRunOperator`` through the ``json`` parameter.
+to call the ``api/2.1/jobs/runs/submit`` endpoint and pass it directly to our ``DatabricksSubmitRunOperator`` through the ``json`` parameter.
 
 Another way to accomplish the same thing is to use the named parameters of the ``DatabricksSubmitRunOperator`` directly. Note that there is exactly
 one named parameter for each top level parameter in the ``runs/submit`` endpoint.
@@ -91,15 +91,15 @@ You can also use named parameters to initialize the operator and run the job.
 DatabricksRunNowOperator
 ===========================
 
-Use the :class:`~airflow.providers.databricks.operators.DatabricksRunNowOperator` to trigger run of existing Databricks job
-via `api/2.0/jobs/runs/run-now <https://docs.databricks.com/dev-tools/api/2.0/jobs.html#run-now>`_ API endpoint.
+Use the :class:`~airflow.providers.databricks.operators.DatabricksRunNowOperator` to trigger a run of an existing Databricks job
+via `api/2.1/jobs/run-now <https://docs.databricks.com/dev-tools/api/latest/jobs.html#operation/JobsRunNow>`_ API endpoint.
 
 
 Using the Operator
 ^^^^^^^^^^^^^^^^^^
 
 There are two ways to instantiate this operator. In the first way, you can take the JSON payload that you typically use
-to call the ``api/2.0/jobs/run-now`` endpoint and pass it directly to our ``DatabricksRunNowOperator`` through the ``json`` parameter.
+to call the ``api/2.1/jobs/run-now`` endpoint and pass it directly to our ``DatabricksRunNowOperator`` through the ``json`` parameter.
 
 Another way to accomplish the same thing is to use the named parameters of the ``DatabricksRunNowOperator`` directly.
 Note that there is exactly one named parameter for each top level parameter in the ``jobs/run-now`` endpoint.