Added automated release notes generation for backport operators (#8807)

We have now mechanism to keep release notes updated for the backport operators in an automated way. It really nicely generates all the necessary information: * summary of requirements for each backport package * list of dependencies (including extras to install them) when package depends on other providers packages * table of new hooks/operators/sensors/protocols/secrets * table of moved hooks/operators/sensors/protocols/secrets with information where they were moved from * changelog of all the changes to the provider package (this will be automatically updated with incremental changelog whenever we decide to release separate packages. The system is fully automated - we will be able to produce release notes automatically (per-package) whenever we decide to release new version of the package in the future.
apache · May 15, 2020 · 92585ca · 92585ca
1 parent f82ad45
commit 92585ca
Show file tree

Hide file tree

Showing 155 changed files with 9,938 additions and 923 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -69,7 +69,8 @@ repos:
           - --fuzzy-match-generates-todo
       - id: insert-license
         name: Add license for all JINJA template files
-        files: ^airflow/www/templates/.*\.html$|^docs/templates/.*\.html$|^airflow/contrib/plugins/metastore_browser/templates/.*\.html$  # yamllint disable-line rule:line-length
+        files: "^airflow/www/templates/.*\\.html$|^docs/templates/.*\\.html$|^airflow/contrib/plugins/\
+metastore_browser/templates/.*\\.html$|.*\\.jinja2"
         exclude: ^\.github/.*$|^airflow/_vendor/.*$
         args:
           - --comment-style
@@ -120,7 +121,7 @@ repos:
       - id: insert-license
         name: Add license for all md files
         files: \.md$
-        exclude: ^\.github/.*$|^airflow/_vendor/.*$
+        exclude: ^\.github/.*$|^airflow/_vendor/.*|PROVIDERS_CHANGES.*\.md
         args:
           - --comment-style
           - "<!--|| -->"
@@ -132,16 +133,10 @@ repos:
     hooks:
       - id: doctoc
         name: Add TOC for md files
-        files: ^README\.md$|^CONTRIBUTING\.md$|^UPDATING.md$|^dev/README\.md$
+        files: ^README\.md$|^CONTRIBUTING\.md$|^UPDATING.md$|^dev/README\.md$|^dev/BACKPORT_PACKAGES.md$
         args:
           - "--maxlevel"
           - "2"
-  - repo: https://github.com/thlorenz/doctoc.git
-    rev: v1.4.0
-    hooks:
-      - id: doctoc
-        name: Add TOC for backport readme files
-        files: BACKPORT_README\.md$
   - repo: meta
     hooks:
       - id: check-hooks-apply
@@ -277,7 +272,8 @@ repos:
           ^airflow/contrib/.*\.py$
       - id: provide-create-sessions
         language: pygrep
-        name: To avoid import cycles make sure provide_session and create_session are imported from airflow.utils.session  # yamllint disable-line rule:line-length
+        name: To avoid import cycles make sure provide_session and create_session are imported from
+              airflow.utils.session
         entry: "from airflow\\.utils\\.db import.* (provide_session|create_session)"
         files: \.py$
         pass_filenames: true

diff --git a/.rat-excludes b/.rat-excludes
@@ -75,6 +75,7 @@ rat-results.txt
 apache-airflow-.*\+source.tar.gz.*
 apache-airflow-.*\+bin.tar.gz.*
 PULL_REQUEST_TEMPLATE.md
+PROVIDERS_CHANGES*.md
 
 # vendored modules
 _vendor/*

diff --git a/BREEZE.rst b/BREEZE.rst
@@ -655,6 +655,8 @@ This is the current syntax for  `./breeze <./breeze>`_:
     cleanup-image                            Cleans up the container image created
     exec                                     Execs into running breeze container in new terminal
     generate-requirements                    Generates pinned requirements for pip dependencies
+    generate-backport-readme                 Generates backport packages readme files
+    prepare-backport-packages                Prepares backport packages
     initialize-local-virtualenv              Initializes local virtualenv
     setup-autocomplete                       Sets up autocomplete for breeze
     stop                                     Stops the docker-compose environment
@@ -870,6 +872,84 @@ This is the current syntax for  `./breeze <./breeze>`_:
   ####################################################################################################
 
 
+  Detailed usage for command: generate-backport-readme
+
+  breeze [FLAGS] generate-backport-readme -- <EXTRA_ARGS>
+
+        Prepares README.md files for backport packages. You can provide (after --) optional version
+        in the form of YYYY.MM.DD, optionally followed by the list of packages to generate readme for.
+        If the first parameter is not formatted as a date, then today is used as version.
+        If no packages are specified, readme for all packages are generated.
+        If no date is specified, current date + 3 days is used (allowing for PMC votes to pass).
+
+        Examples:
+
+        'breeze generate-backport-readme' or
+        'breeze generate-backport-readme -- 2020.05.10' or
+        'breeze generate-backport-readme -- 2020.05.10 https google amazon'
+
+        General form:
+
+        'breeze generate-backport-readme -- YYYY.MM.DD <PACKAGE_ID> ...'
+
+        * YYYY.MM.DD - is the CALVER version of the package to prepare. Note that this date
+          cannot be earlier than the already released version (the script will fail if it
+          will be). It can be set in the future anticipating the future release date.
+
+        * <PACKAGE_ID> is usually directory in the airflow/providers folder (for example
+          'google' but in several cases, it might be one level deeper separated with
+          '.' for example 'apache.hive'
+
+  Flags:
+
+  -v, --verbose
+          Show verbose information about executed commands (enabled by default for running test).
+          Note that you can further increase verbosity and see all the commands executed by breeze
+          by running 'export VERBOSE_COMMANDS="true"' before running breeze.
+
+
+  ####################################################################################################
+
+
+  Detailed usage for command: prepare-backport-packages
+
+  breeze [FLAGS] prepare-backport-packages -- <EXTRA_ARGS>
+
+        Builds backport packages. You can provide (after --) optional list of packages to prepare.
+        If no packages are specified, readme for all packages are generated. You can specify optional
+        --version-suffix flag to generate rc candidates for the packages.
+
+        Make sure to set the right version in './backport_packages/setup_backport_packages.py'
+
+        Examples:
+
+        'breeze prepare-backport-packages' or
+        'breeze prepare-backport-packages -- google' or
+        'breeze prepare-backport-packages --version-suffix rc1 -- http google amazon'
+
+        General form:
+
+        'breeze prepare-backport-packages -- <PACKAGE_ID> ...'
+
+        * <PACKAGE_ID> is usually directory in the airflow/providers folder (for example
+          'google'), but in several cases, it might be one level deeper separated with '.'
+          for example 'apache.hive'
+
+  Flags:
+
+  -S, --version-suffix
+          Adds optional suffix to the generated backport package version. It can be used to generate
+          rc1/rc2 ... versions of the packages.
+
+  -v, --verbose
+          Show verbose information about executed commands (enabled by default for running test).
+          Note that you can further increase verbosity and see all the commands executed by breeze
+          by running 'export VERBOSE_COMMANDS="true"' before running breeze.
+
+
+  ####################################################################################################
+
+
   Detailed usage for command: initialize-local-virtualenv
 
   breeze [FLAGS] initialize-local-virtualenv -- <EXTRA_ARGS>
@@ -1340,6 +1420,13 @@ This is the current syntax for  `./breeze <./breeze>`_:
   -H, --dockerhub-repo
           DockerHub repository used to pull, push, build images. Default: airflow.
 
+  ****************************************************************************************************
+   Flags for generation of the backport packages
+
+  -S, --version-suffix
+          Adds optional suffix to the generated backport package version. It can be used to generate
+          rc1/rc2 ... versions of the packages.
+
   ****************************************************************************************************
    Increase verbosity of the scripts
 

diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
@@ -313,14 +313,15 @@ This is the full list of those extras:
 
   .. START EXTRAS HERE
 
-all, all_dbs, amazon, apache.atlas, apache.cassandra, apache.druid, apache.hdfs, apache.hive,
-apache.pinot, apache.webhdfs, async, atlas, aws, azure, cassandra, celery, cgroups, cloudant,
-cncf.kubernetes, dask, databricks, datadog, devel, devel_ci, devel_hadoop, doc, docker, druid,
+all_dbs, amazon, apache.atlas, apache_beam, apache.cassandra, apache.druid, apache.hdfs,
+apache.hive, apache.pinot, apache.webhdfs, async, atlas, aws, azure, cassandra, celery, cgroups,
+cloudant, cncf.kubernetes, dask, databricks, datadog, devel, devel_hadoop, doc, docker, druid,
 elasticsearch, exasol, facebook, gcp, gcp_api, github_enterprise, google, google_auth, grpc,
 hashicorp, hdfs, hive, jdbc, jira, kerberos, kubernetes, ldap, microsoft.azure, microsoft.mssql,
 microsoft.winrm, mongo, mssql, mysql, odbc, oracle, pagerduty, papermill, password, pinot, postgres,
 presto, qds, rabbitmq, redis, salesforce, samba, segment, sendgrid, sentry, singularity, slack,
-snowflake, ssh, statsd, tableau, vertica, virtualenv, webhdfs, winrm, yandexcloud
+snowflake, spark, ssh, statsd, tableau, vertica, virtualenv, webhdfs, winrm, yandexcloud, all,
+devel_ci
 
   .. END EXTRAS HERE
 
@@ -981,52 +982,3 @@ Resources & Links
 - `Airflow’s official documentation <http://airflow.apache.org/>`__
 
 - `More resources and links to Airflow related content on the Wiki <https://cwiki.apache.org/confluence/display/AIRFLOW/Airflow+Links>`__
-
-Preparing backport packages
-===========================
-
-As part of preparation to Airflow 2.0 we decided to prepare backport of providers package that will be
-possible to install in the Airflow 1.10.*, Python 3.6+ environment.
-Some of those packages will be soon (after testing) officially released via PyPi, but you can build and
-prepare such packages on your own easily.
-
-* The setuptools.py script only works in python3.6+. This is also our minimally supported python
-  version to use the packages in.
-
-* Make sure you have ``setuptools`` and ``wheel`` installed in your python environment. The easiest way
-  to do it is to run ``pip install setuptools wheel``
-
-* Run the following command:
-
-  .. code-block:: bash
-
-    ./scripts/ci/ci_prepare_packages.sh
-
-* Usually you only build some of the providers package. The ``providers`` directory is separated into
-  separate providers. You can see the list of all available providers by running
-  ``./scripts/ci/ci_prepare_packages.sh --help``. You can build the backport package
-  by running ``./scripts/ci/ci_prepare_packages.sh <PROVIDER_NAME>``. Note that there
-  might be (and are) dependencies between some packages that might prevent subset of the packages
-  to be used without installing the packages they depend on. This will be solved soon by
-  adding cross-dependencies between packages.
-
-* This creates a wheel package in your ``dist`` folder with a name similar to:
-  ``apache_airflow_backport_providers-0.0.1-py2.py3-none-any.whl``
-
-* You can install this package with ``pip install <PACKAGE_FILE>``
-
-
-* You can also build sdist (source distribution packages) by running
-  ``python setup.py <PROVIDER_NAME> sdist`` but this is only needed in case of distribution of the packages.
-
-Each package has description generated from the the general ``backport_packages/README.md`` file with the
-following replacements:
-
-* ``{{ PACKAGE_NAME }}`` is replaced with the name of the package
-(``apache-airflow-backport-providers-<NAME>``)
-* ``{{ PACKAGE_DEPENDENCIES }}`` is replaced with list of optional dependencies for the package
-* ``{{ PACKAGE_BACKPORT_README }}`` is replaced with the content of ``BACKPORT_README.md`` file in the
-  package folder if it exists.
-
-Note that those are unofficial packages yet - they are not yet released in PyPi, but you might use them to
-test the master versions of operators/hooks/sensors in Airflow 1.10.* environment  with Python3.6+
diff --git a/INSTALL b/INSTALL
@@ -44,14 +44,15 @@ pip install . --constraint requirements/requirements-python3.7.txt
 # You can also install Airflow with extras specified. The list of available extras:
 # START EXTRAS HERE
 
-all, all_dbs, amazon, apache.atlas, apache.cassandra, apache.druid, apache.hdfs, apache.hive,
-apache.pinot, apache.webhdfs, async, atlas, aws, azure, cassandra, celery, cgroups, cloudant,
-cncf.kubernetes, dask, databricks, datadog, devel, devel_ci, devel_hadoop, doc, docker, druid,
+all_dbs, amazon, apache.atlas, apache_beam, apache.cassandra, apache.druid, apache.hdfs,
+apache.hive, apache.pinot, apache.webhdfs, async, atlas, aws, azure, cassandra, celery, cgroups,
+cloudant, cncf.kubernetes, dask, databricks, datadog, devel, devel_hadoop, doc, docker, druid,
 elasticsearch, exasol, facebook, gcp, gcp_api, github_enterprise, google, google_auth, grpc,
 hashicorp, hdfs, hive, jdbc, jira, kerberos, kubernetes, ldap, microsoft.azure, microsoft.mssql,
 microsoft.winrm, mongo, mssql, mysql, odbc, oracle, pagerduty, papermill, password, pinot, postgres,
 presto, qds, rabbitmq, redis, salesforce, samba, segment, sendgrid, sentry, singularity, slack,
-snowflake, ssh, statsd, tableau, vertica, virtualenv, webhdfs, winrm, yandexcloud
+snowflake, spark, ssh, statsd, tableau, vertica, virtualenv, webhdfs, winrm, yandexcloud, all,
+devel_ci
 
 # END EXTRAS HERE
 

diff --git a/TESTING.rst b/TESTING.rst
@@ -547,12 +547,12 @@ Preparing backport packages for System Tests for Airflow 1.10.* series
 ----------------------------------------------------------------------
 
 To run system tests with old Airflow version you need to prepare backport packages. This
-can be done by running ``./scripts/ci/ci_prepare_packages.sh <PACKAGES TO BUILD>``. For
+can be done by running ``./breeze prepare-backport-packages -- <PACKAGES TO BUILD>``. For
 example the below command will build google postgres and mysql packages:
 
 .. code-block:: bash
 
-  ./scripts/ci/ci_prepare_packages.sh google postgres mysql
+  ./breeze prepare-backport-packages -- google postgres mysql
 
 Those packages will be prepared in ./dist folder. This folder is mapped to /dist folder
 when you enter Breeze, so it is easy to automate installing those packages for testing.
@@ -614,7 +614,7 @@ Here is the typical session that you need to do to run system tests:
 
 .. code-block:: bash
 
-  ./scripts/ci/ci_prepare_packages.sh google postgres mysql
+  ./breeze prepare-backport-packages -- google postgres mysql
 
 2. Enter breeze with installing Airflow 1.10.*, forwarding credentials and installing
    backported packages (you need an appropriate line in ``./files/airflow-breeze-config/variables.env``)
@@ -686,7 +686,7 @@ The typical session then looks as follows:
 
 .. code-block:: bash
 
-  ./scripts/ci/ci_prepare_packages.sh google postgres mysql
+  ./breeze prepare-backport-packages -- google postgres mysql
 
 2. Enter breeze with installing Airflow 1.10.*, forwarding credentials and installing
    backported packages (you need an appropriate line in ``./files/airflow-breeze-config/variables.env``)
@@ -716,7 +716,7 @@ In the host:
 
 .. code-block:: bash
 
-  ./scripts/ci/ci_prepare_packages.sh google
+  ./breeze prepare-backport-packages -- google
 
 In the container:
 

diff --git a/airflow/contrib/operators/bigquery_operator.py b/airflow/contrib/operators/bigquery_operator.py
@@ -19,7 +19,13 @@
 
 import warnings
 
-from airflow.providers.google.cloud.operators.bigquery import BigQueryExecuteQueryOperator
+# pylint: disable=unused-import
+from airflow.providers.google.cloud.operators.bigquery import (  # noqa; noqa; noqa; noqa; noqa
+    BigQueryCreateEmptyDatasetOperator, BigQueryCreateEmptyTableOperator, BigQueryCreateExternalTableOperator,
+    BigQueryDeleteDatasetOperator, BigQueryExecuteQueryOperator, BigQueryGetDatasetOperator,
+    BigQueryGetDatasetTablesOperator, BigQueryPatchDatasetOperator, BigQueryUpdateDatasetOperator,
+    BigQueryUpsertTableOperator,
+)
 
 warnings.warn(
     "This module is deprecated. Please use `airflow.providers.google.cloud.operators.bigquery`.",

diff --git a/airflow/jobs/base_job.py b/airflow/jobs/base_job.py
@@ -271,6 +271,7 @@ def reset_state_for_orphaned_tasks(self, filter_by_dag_run=None, session=None):
                         TI.dag_id == DR.dag_id,
                         TI.execution_date == DR.execution_date))
                 .filter(
+                    # pylint: disable=comparison-with-callable
                     DR.state == State.RUNNING,
                     DR.run_id.notlike(f"{DagRunType.BACKFILL_JOB.value}__%"),
                     TI.state.in_(resettable_states))).all()

diff --git a/airflow/jobs/scheduler_job.py b/airflow/jobs/scheduler_job.py
@@ -1091,6 +1091,7 @@ def _change_state_for_tis_without_dagrun(self,
             .filter(models.TaskInstance.dag_id.in_(simple_dag_bag.dag_ids)) \
             .filter(models.TaskInstance.state.in_(old_states)) \
             .filter(or_(
+                # pylint: disable=comparison-with-callable
                 models.DagRun.state != State.RUNNING,
                 models.DagRun.state.is_(None)))  # pylint: disable=no-member
         # We need to do this for mysql as well because it can cause deadlocks

diff --git a/airflow/models/serialized_dag.py b/airflow/models/serialized_dag.py
@@ -140,6 +140,7 @@ def remove_dag(cls, dag_id: str, session=None):
         :param dag_id: dag_id to be deleted
         :param session: ORM Session
         """
+        # pylint: disable=no-member
         session.execute(cls.__table__.delete().where(cls.dag_id == dag_id))
 
     @classmethod
@@ -158,6 +159,7 @@ def remove_stale_dags(cls, expiration_date, session=None):
                   "scheduler since %s from %s table ", expiration_date, cls.__tablename__)
 
         session.execute(
+            # pylint: disable=no-member
             cls.__table__.delete().where(cls.last_updated < expiration_date)
         )