Skip to content

Commit

Permalink
Turn Pydantic into an optional dependency
Browse files Browse the repository at this point in the history
We've been internally using pydantic for internal API and it caused
some compatibility issues, because Pydantic is so popular and currently
still users of Pydantic are somewhat split between Pydantic 1 and
Pydantic 2.  The popularity of Pydantic works against us, and since we
are not yet using it in "production" (and in the future we will only
actually use it for Internal API), it seems that turning Pydantic into
an optional dependency is the best way we can proceed.

It's as simple as converting all the direct imports into a common util
imports that have a fallback mechanism when import is not found.

This should enable less conflicts when installing 3rd-party libraries
with Airflow.

Added test where pydantic is removed. Also made sure that the special
cases we have tests for run full suite of tests - non-db and db.
  • Loading branch information
potiuk committed Feb 11, 2024
1 parent 70fd6ad commit 88c435a
Show file tree
Hide file tree
Showing 38 changed files with 403 additions and 174 deletions.
76 changes: 63 additions & 13 deletions .github/workflows/ci.yml
Expand Up @@ -1177,10 +1177,60 @@ jobs:
uses: ./.github/actions/post_tests_failure
if: failure()
tests-postgres-min-sqlalchemy:
timeout-minutes: 130
name: >
DB:MinSQLAlchemy${{needs.build-info.outputs.default-postgres-version}},
Py${{needs.build-info.outputs.default-python-version}}:
${{needs.build-info.outputs.parallel-test-types-list-as-string}}
runs-on: ${{fromJSON(needs.build-info.outputs.runs-on)}}
needs: [build-info, wait-for-ci-images]
env:
RUNS_ON: "${{needs.build-info.outputs.runs-on}}"
PARALLEL_TEST_TYPES: "${{needs.build-info.outputs.parallel-test-types-list-as-string}}"
PR_LABELS: "${{needs.build-info.outputs.pull-request-labels}}"
FULL_TESTS_NEEDED: "${{needs.build-info.outputs.full-tests-needed}}"
DEBUG_RESOURCES: "${{needs.build-info.outputs.debug-resources}}"
BACKEND: "postgres"
ENABLE_COVERAGE: "${{needs.build-info.outputs.run-coverage}}"
PYTHON_MAJOR_MINOR_VERSION: "${{needs.build-info.outputs.default-python-version}}"
PYTHON_VERSION: "${needs.build-info.outputs.default-python-version}}"
POSTGRES_VERSION: "${{needs.build-info.outputs.default-postgres-version}}"
BACKEND_VERSION: "${{needs.build-info.outputs.default-postgres-version}}"
DOWNGRADE_SQLALCHEMY: "true"
JOB_ID: >
postgres-min-sqlalchemy-${{needs.build-info.outputs.default-python-version}}-
${{needs.build-info.outputs.default-postgres-version}}
if: needs.build-info.outputs.run-tests == 'true'
steps:
- name: Cleanup repo
shell: bash
run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*"
- name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )"
uses: actions/checkout@v4
with:
persist-credentials: false
- name: >
Prepare breeze & CI image: ${{needs.build-info.outputs.default-python-version}}:${{env.IMAGE_TAG}}
uses: ./.github/actions/prepare_breeze_and_image
- name: >
Tests: ${{matrix.python-version}}:${{needs.build-info.outputs.parallel-test-types-list-as-string}}
run: >
breeze testing db-tests
--parallel-test-types "${{needs.build-info.outputs.parallel-test-types-list-as-string}}"
- name: >
Post Tests success: ${{needs.build-info.outputs.default-python-version}}:MinSQLAlchemy"
uses: ./.github/actions/post_tests_success
if: success()
- name: >
Post Tests failure: ${{needs.build-info.outputs.default-python-version}}:MinSQLAlchemy"
uses: ./.github/actions/post_tests_failure
if: failure()
tests-postgres-boto:
timeout-minutes: 130
name: >
DB:LatestBoto${{needs.build-info.outputs.default-postgres-version}},
LatestBoto${{needs.build-info.outputs.default-postgres-version}},
Py${{needs.build-info.outputs.default-python-version}}:
${{needs.build-info.outputs.parallel-test-types-list-as-string}}
runs-on: ${{fromJSON(needs.build-info.outputs.runs-on)}}
Expand Down Expand Up @@ -1216,7 +1266,7 @@ jobs:
- name: >
Tests: ${{matrix.python-version}}:${{needs.build-info.outputs.parallel-test-types-list-as-string}}
run: >
breeze testing db-tests
breeze testing tests
--parallel-test-types "${{needs.build-info.outputs.parallel-test-types-list-as-string}}"
- name: >
Post Tests success: ${{needs.build-info.outputs.default-python-version}}:Boto"
Expand All @@ -1227,10 +1277,10 @@ jobs:
uses: ./.github/actions/post_tests_failure
if: failure()
tests-postgres-min-sqlalchemy:
tests-postgres-no-pydantic:
timeout-minutes: 130
name: >
DB:MinSQLAlchemy${{needs.build-info.outputs.default-postgres-version}},
NoPydantic${{needs.build-info.outputs.default-postgres-version}},
Py${{needs.build-info.outputs.default-python-version}}:
${{needs.build-info.outputs.parallel-test-types-list-as-string}}
runs-on: ${{fromJSON(needs.build-info.outputs.runs-on)}}
Expand All @@ -1247,9 +1297,9 @@ jobs:
PYTHON_VERSION: "${needs.build-info.outputs.default-python-version}}"
POSTGRES_VERSION: "${{needs.build-info.outputs.default-postgres-version}}"
BACKEND_VERSION: "${{needs.build-info.outputs.default-postgres-version}}"
DOWNGRADE_SQLALCHEMY: "true"
NO_PYDANTIC: "true"
JOB_ID: >
postgres-min-sqlalchemy-${{needs.build-info.outputs.default-python-version}}-
postgres-nopydantic-${{needs.build-info.outputs.default-python-version}}-
${{needs.build-info.outputs.default-postgres-version}}
if: needs.build-info.outputs.run-tests == 'true'
steps:
Expand All @@ -1266,21 +1316,21 @@ jobs:
- name: >
Tests: ${{matrix.python-version}}:${{needs.build-info.outputs.parallel-test-types-list-as-string}}
run: >
breeze testing db-tests
breeze testing tests
--parallel-test-types "${{needs.build-info.outputs.parallel-test-types-list-as-string}}"
- name: >
Post Tests success: ${{needs.build-info.outputs.default-python-version}}:MinSQLAlchemy"
Post Tests success: ${{needs.build-info.outputs.default-python-version}}:NoPydantic"
uses: ./.github/actions/post_tests_success
if: success()
- name: >
Post Tests failure: ${{needs.build-info.outputs.default-python-version}}:MinSQLAlchemy"
Post Tests failure: ${{needs.build-info.outputs.default-python-version}}:NoPydantic"
uses: ./.github/actions/post_tests_failure
if: failure()
tests-postgres-pendulum-2:
timeout-minutes: 130
name: >
DB:Postgres${{needs.build-info.outputs.default-postgres-version}},
Postgres${{needs.build-info.outputs.default-postgres-version}},
Pendulum2,Py${{needs.build-info.outputs.default-python-version}}:
${{needs.build-info.outputs.parallel-test-types-list-as-string}}
runs-on: ${{fromJSON(needs.build-info.outputs.runs-on)}}
Expand Down Expand Up @@ -1316,7 +1366,7 @@ jobs:
- name: >
Tests: ${{matrix.python-version}}:${{needs.build-info.outputs.parallel-test-types-list-as-string}}
run: >
breeze testing db-tests
breeze testing tests
--parallel-test-types "${{needs.build-info.outputs.parallel-test-types-list-as-string}}"
- name: >
Post Tests success: ${{needs.build-info.outputs.default-python-version}}:Pendulum2"
Expand All @@ -1330,7 +1380,7 @@ jobs:
tests-postgres-in-progress-features-disabled:
timeout-minutes: 130
name: >
DB:InProgressDisabledPostgres${{needs.build-info.outputs.default-postgres-version}},
InProgressDisabledPostgres${{needs.build-info.outputs.default-postgres-version}},
Py${{needs.build-info.outputs.default-python-version}}:
${{needs.build-info.outputs.parallel-test-types-list-as-string}}
runs-on: ${{fromJSON(needs.build-info.outputs.runs-on)}}
Expand Down Expand Up @@ -1366,7 +1416,7 @@ jobs:
- name: >
Tests: ${{matrix.python-version}}:${{needs.build-info.outputs.parallel-test-types-list-as-string}}
run: >
breeze testing db-tests
breeze testing tests
--parallel-test-types "${{needs.build-info.outputs.parallel-test-types-list-as-string}}"
- name: >
Post Tests success: ${{needs.build-info.outputs.default-python-version}}:FeaturesDisabled"
Expand Down
13 changes: 13 additions & 0 deletions Dockerfile.ci
Expand Up @@ -897,6 +897,18 @@ function check_boto_upgrade() {
pip check
}

function check_no_pydantic() {
if [[ ${NO_PYDANTIC=} != "true" ]]; then
return
fi
echo
echo "${COLOR_BLUE}Remove pydantic${COLOR_RESET}"
echo
pip uninstall --root-user-action ignore pydantic -y || true
pip check
}


function check_download_sqlalchemy() {
if [[ ${DOWNGRADE_SQLALCHEMY=} != "true" ]]; then
return
Expand Down Expand Up @@ -949,6 +961,7 @@ function check_run_tests() {
determine_airflow_to_use
environment_initialization
check_boto_upgrade
check_no_pydantic
check_download_sqlalchemy
check_download_pendulum
check_run_tests "${@}"
Expand Down
8 changes: 4 additions & 4 deletions INSTALL
Expand Up @@ -253,10 +253,10 @@ gcp_api, github, github-enterprise, google, google-auth, graphviz, grpc, hashico
http, imap, influxdb, jdbc, jenkins, kerberos, kubernetes, ldap, leveldb, microsoft-azure,
microsoft-mssql, microsoft-psrp, microsoft-winrm, mongo, mssql, mysql, neo4j, odbc, openai,
openfaas, openlineage, opensearch, opsgenie, oracle, otel, pagerduty, pandas, papermill, password,
pgvector, pinecone, pinot, postgres, presto, qdrant, rabbitmq, redis, s3, s3fs, salesforce, samba,
saml, segment, sendgrid, sentry, sftp, singularity, slack, smtp, snowflake, spark, sqlite, ssh,
statsd, tableau, tabular, telegram, trino, vertica, virtualenv, weaviate, webhdfs, winrm, yandex,
zendesk
pgvector, pinecone, pinot, postgres, presto, pydantic, qdrant, rabbitmq, redis, s3, s3fs,
salesforce, samba, saml, segment, sendgrid, sentry, sftp, singularity, slack, smtp, snowflake,
spark, sqlite, ssh, statsd, tableau, tabular, telegram, trino, vertica, virtualenv, weaviate,
webhdfs, winrm, yandex, zendesk

# END REGULAR EXTRAS HERE

Expand Down
2 changes: 2 additions & 0 deletions airflow/providers/apache/hdfs/sensors/hdfs.py
Expand Up @@ -18,6 +18,8 @@

from airflow.sensors.base import BaseSensorOperator

# Ignore missing docstring

_EXCEPTION_MESSAGE = """The old HDFS Sensors have been removed in 4.0.0 version of the apache.hdfs provider.
Please convert your DAGs to use the WebHdfsSensor or downgrade the provider to below 4.*
if you want to continue using it.
Expand Down
5 changes: 1 addition & 4 deletions airflow/providers/papermill/hooks/kernel.py
Expand Up @@ -16,17 +16,14 @@
# under the License.
from __future__ import annotations

from typing import TYPE_CHECKING
import typing

from jupyter_client import AsyncKernelManager
from papermill.clientwrap import PapermillNotebookClient
from papermill.engines import NBClientEngine
from papermill.utils import merge_kwargs, remove_args
from traitlets import Unicode

if TYPE_CHECKING:
from pydantic import typing

from airflow.hooks.base import BaseHook

JUPYTER_KERNEL_SHELL_PORT = 60316
Expand Down
10 changes: 5 additions & 5 deletions airflow/serialization/pydantic/dag.py
Expand Up @@ -21,17 +21,17 @@
from typing import Any, List, Optional

from dateutil import relativedelta
from pydantic import (
from typing_extensions import Annotated

from airflow import DAG, settings
from airflow.configuration import conf as airflow_conf
from airflow.utils.pydantic import (
BaseModel as BaseModelPydantic,
ConfigDict,
PlainSerializer,
PlainValidator,
ValidationInfo,
)
from typing_extensions import Annotated

from airflow import DAG, settings
from airflow.configuration import conf as airflow_conf
from airflow.utils.sqlalchemy import Interval


Expand Down
11 changes: 8 additions & 3 deletions airflow/serialization/pydantic/dag_run.py
Expand Up @@ -19,10 +19,9 @@
from datetime import datetime
from typing import TYPE_CHECKING, Iterable, List, Optional

from pydantic import BaseModel as BaseModelPydantic, ConfigDict

from airflow.serialization.pydantic.dag import PydanticDag
from airflow.serialization.pydantic.dataset import DatasetEventPydantic
from airflow.utils.pydantic import BaseModel as BaseModelPydantic, ConfigDict
from airflow.utils.session import NEW_SESSION, provide_session

if TYPE_CHECKING:
Expand Down Expand Up @@ -101,4 +100,10 @@ def get_task_instance(
)


DagRunPydantic.model_rebuild()
try:
import pydantic # noqa: F401

# Only run this is pydantic is installed
DagRunPydantic.model_rebuild()
except ImportError:
pass
2 changes: 1 addition & 1 deletion airflow/serialization/pydantic/dataset.py
Expand Up @@ -17,7 +17,7 @@
from datetime import datetime
from typing import List, Optional

from pydantic import BaseModel as BaseModelPydantic, ConfigDict
from airflow.utils.pydantic import BaseModel as BaseModelPydantic, ConfigDict


class DagScheduleDatasetReferencePydantic(BaseModelPydantic):
Expand Down
3 changes: 1 addition & 2 deletions airflow/serialization/pydantic/job.py
Expand Up @@ -18,10 +18,9 @@
from functools import cached_property
from typing import Optional

from pydantic import BaseModel as BaseModelPydantic, ConfigDict

from airflow.executors.executor_loader import ExecutorLoader
from airflow.jobs.base_job_runner import BaseJobRunner
from airflow.utils.pydantic import BaseModel as BaseModelPydantic, ConfigDict


def check_runner_initialized(job_runner: Optional[BaseJobRunner], job_type: str) -> BaseJobRunner:
Expand Down
12 changes: 9 additions & 3 deletions airflow/serialization/pydantic/taskinstance.py
Expand Up @@ -19,7 +19,6 @@
from datetime import datetime
from typing import TYPE_CHECKING, Any, Iterable, Optional

from pydantic import BaseModel as BaseModelPydantic, ConfigDict, PlainSerializer, PlainValidator
from typing_extensions import Annotated

from airflow.models import Operator
Expand All @@ -29,16 +28,17 @@
from airflow.serialization.pydantic.dag_run import DagRunPydantic
from airflow.utils.log.logging_mixin import LoggingMixin
from airflow.utils.net import get_hostname
from airflow.utils.pydantic import BaseModel as BaseModelPydantic, ConfigDict, PlainSerializer, PlainValidator
from airflow.utils.session import NEW_SESSION, provide_session
from airflow.utils.xcom import XCOM_RETURN_KEY

if TYPE_CHECKING:
import pendulum
from pydantic_core.core_schema import ValidationInfo
from sqlalchemy.orm import Session

from airflow.models.dagrun import DagRun
from airflow.utils.context import Context
from airflow.utils.pydantic import ValidationInfo
from airflow.utils.state import DagRunState


Expand Down Expand Up @@ -430,4 +430,10 @@ def command_as_list(
)


TaskInstancePydantic.model_rebuild()
try:
import pydantic # noqa: F401

# Only run this is pydantic is installed
TaskInstancePydantic.model_rebuild()
except ImportError:
pass
2 changes: 1 addition & 1 deletion airflow/serialization/pydantic/tasklog.py
Expand Up @@ -16,7 +16,7 @@
# under the License.
from datetime import datetime

from pydantic import BaseModel as BaseModelPydantic, ConfigDict
from airflow.utils.pydantic import BaseModel as BaseModelPydantic, ConfigDict


class LogTemplatePydantic(BaseModelPydantic):
Expand Down
3 changes: 1 addition & 2 deletions airflow/serialization/serialized_objects.py
Expand Up @@ -70,15 +70,14 @@
if TYPE_CHECKING:
from inspect import Parameter

from pydantic import BaseModel

from airflow.models.baseoperatorlink import BaseOperatorLink
from airflow.models.expandinput import ExpandInput
from airflow.models.operator import Operator
from airflow.models.taskmixin import DAGNode
from airflow.serialization.json_schema import Validator
from airflow.ti_deps.deps.base_ti_dep import BaseTIDep
from airflow.timetables.base import Timetable
from airflow.utils.pydantic import BaseModel

HAS_KUBERNETES: bool
try:
Expand Down

0 comments on commit 88c435a

Please sign in to comment.