Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
987f454
fix: update jsonschema constraint to allow 4.20.0+ for fastmcp compat…
devin-ai-integration[bot] Aug 29, 2025
8c84b6d
fix: migrate deprecated RefResolver to referencing library for jsonsc…
devin-ai-integration[bot] Aug 29, 2025
42e2d7f
style: apply formatting fixes from format-fix command
devin-ai-integration[bot] Aug 29, 2025
fc37c17
Apply suggestion from @aaronsteers
aaronsteers Aug 29, 2025
3de3785
fix: add referencing dependency to resolve deptry CI failure
devin-ai-integration[bot] Aug 29, 2025
cffad9f
feat: simplify jsonschema migration to require 4.18.0+ minimum
devin-ai-integration[bot] Aug 29, 2025
5ccaca9
Apply suggestion from @aaronsteers
aaronsteers Aug 29, 2025
5e485bd
fix: regenerate poetry.lock after jsonschema constraint update
devin-ai-integration[bot] Aug 29, 2025
def67f7
feat: broaden jsonschema constraint to >=4.17.3,<5.0 based on referen…
devin-ai-integration[bot] Aug 29, 2025
1f55214
fix: implement hybrid RefResolver approach for backward compatibility
devin-ai-integration[bot] Aug 29, 2025
f49761b
style: apply formatting fixes to transform.py after hybrid RefResolve…
devin-ai-integration[bot] Aug 29, 2025
b8fa937
resolve: merge conflict in pyproject.toml referencing dependency
devin-ai-integration[bot] Aug 29, 2025
edc20d6
fix: improve hybrid RefResolver robustness for CI compatibility
devin-ai-integration[bot] Aug 29, 2025
164afb6
cherry-pick-me: unrelated fix of breaking change in create_source()
aaronsteers Aug 29, 2025
d14cd84
format cleanup
aaronsteers Aug 29, 2025
5d75886
style: apply formatting fixes to hybrid RefResolver implementation
devin-ai-integration[bot] Aug 29, 2025
061de78
Merge branch 'devin/1756425696-jsonschema-version-pin' of https://git…
devin-ai-integration[bot] Aug 29, 2025
8f047b9
Apply suggestion from @aaronsteers
aaronsteers Aug 29, 2025
a84f50a
Apply suggestion from @aaronsteers
aaronsteers Aug 29, 2025
3f19659
revert formatting changes
aaronsteers Aug 29, 2025
96eabbf
revert changes moved to #738
aaronsteers Aug 29, 2025
aa5f6a4
clean up validator implementation
aaronsteers Aug 29, 2025
bb13c89
fix imports
aaronsteers Aug 29, 2025
43ee692
try making DRY
aaronsteers Aug 29, 2025
d2c7a51
revert and retry
aaronsteers Aug 29, 2025
35dda44
Update pyproject.toml
aaronsteers Aug 29, 2025
c137302
chore: relock dependencies with referencing >=0.36.2 constraint
devin-ai-integration[bot] Aug 29, 2025
cb69302
fix: update Poetry to 2.0.1 in Dockerfile and fix referencing usage i…
devin-ai-integration[bot] Aug 29, 2025
93aeb9f
Merge remote-tracking branch 'origin/main' into devin/1756425696-json…
aaronsteers Aug 29, 2025
f325a11
poe lock
aaronsteers Aug 29, 2025
98d3296
fixes, simplification
aaronsteers Aug 29, 2025
c742f80
clean up
aaronsteers Aug 29, 2025
5a6aea4
fux unused stray ref
aaronsteers Aug 29, 2025
7b284d5
remove commented-out code (confirmed working)
aaronsteers Aug 29, 2025
749106b
fix format
aaronsteers Aug 29, 2025
51a5ddf
resolve mypy
aaronsteers Aug 29, 2025
09916a9
remove comment
aaronsteers Aug 29, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions airbyte_cdk/manifest_server/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ FROM python:3.12-slim-bookworm
RUN apt-get update && \
apt-get install -y git && \
rm -rf /var/lib/apt/lists/* && \
pip install poetry==1.8.3
pip install poetry==2.0.1

# Configure poetry to not create virtual environments and disable interactive mode
ENV POETRY_NO_INTERACTION=1 \
Expand Down Expand Up @@ -42,4 +42,4 @@ USER airbyte:airbyte

EXPOSE 8080

CMD ["uvicorn", "airbyte_cdk.manifest_server.app:app", "--host", "0.0.0.0", "--port", "8080"]
CMD ["uvicorn", "airbyte_cdk.manifest_server.app:app", "--host", "0.0.0.0", "--port", "8080"]
38 changes: 29 additions & 9 deletions airbyte_cdk/sources/utils/schema_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,16 @@
import json
import os
import pkgutil
from typing import Any, ClassVar, Dict, List, Mapping, MutableMapping, Optional, Tuple
from copy import deepcopy
from typing import TYPE_CHECKING, Any, ClassVar, Dict, List, Mapping, MutableMapping, Tuple, cast

import jsonref
from jsonschema import RefResolver, validate
from jsonschema import validate
from jsonschema.exceptions import ValidationError
from pydantic.v1 import BaseModel, Field
from referencing import Registry, Resource
from referencing._core import Resolver # used for type hints
from referencing.jsonschema import DRAFT7

from airbyte_cdk.models import ConnectorSpecification, FailureType
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
Expand Down Expand Up @@ -63,18 +67,30 @@ def resolve_ref_links(obj: Any) -> Any:
return obj


def _expand_refs(schema: Any, ref_resolver: Optional[RefResolver] = None) -> None:
def get_ref_resolver_registry(schema: dict[str, Any]) -> Registry:
"""Get a reference resolver registry for the given schema."""
resource: Resource = Resource.from_contents(
contents=schema,
default_specification=DRAFT7,
)
return cast( # Mypy has a hard time detecting this return type.
"Registry",
Registry().with_resource(
uri="",
resource=resource,
),
)


def _expand_refs(schema: Any, ref_resolver: Resolver) -> None:
"""Internal function to iterate over schema and replace all occurrences of $ref with their definitions. Recursive.

:param schema: schema that will be patched
:param ref_resolver: resolver to get definition from $ref, if None pass it will be instantiated
"""
ref_resolver = ref_resolver or RefResolver.from_schema(schema)

if isinstance(schema, MutableMapping):
if "$ref" in schema:
ref_url = schema.pop("$ref")
_, definition = ref_resolver.resolve(ref_url)
definition = ref_resolver.lookup(ref_url).contents
_expand_refs(
definition, ref_resolver=ref_resolver
) # expand refs in definitions as well
Expand All @@ -90,10 +106,14 @@ def _expand_refs(schema: Any, ref_resolver: Optional[RefResolver] = None) -> Non
def expand_refs(schema: Any) -> None:
"""Iterate over schema and replace all occurrences of $ref with their definitions.

If a "definitions" section is present at the root of the schema, it will be removed
after $ref resolution is complete.

:param schema: schema that will be patched
"""
_expand_refs(schema)
schema.pop("definitions", None) # remove definitions created by $ref
ref_resolver = get_ref_resolver_registry(schema).resolver()
_expand_refs(schema, ref_resolver)
schema.pop("definitions", None)


def rename_key(schema: Any, old_key: str, new_key: str) -> None:
Expand Down
38 changes: 25 additions & 13 deletions airbyte_cdk/sources/utils/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,25 @@
#

import logging
from copy import deepcopy
from enum import Flag, auto
from typing import Any, Callable, Dict, Generator, Mapping, Optional, cast
from typing import TYPE_CHECKING, Any, Callable, Dict, Generator, Mapping, Optional, cast

from jsonschema import Draft7Validator, ValidationError, validators
from referencing import Registry, Resource
from referencing._core import Resolver
from referencing.exceptions import Unresolvable
from referencing.jsonschema import DRAFT7

from airbyte_cdk.sources.utils.schema_helpers import expand_refs

from .schema_helpers import get_ref_resolver_registry

try:
from jsonschema.validators import Validator
except:
from jsonschema import Validator

from jsonschema import Draft7Validator, RefResolver, ValidationError, Validator, validators

MAX_NESTING_DEPTH = 3
json_to_python_simple = {
Expand Down Expand Up @@ -191,30 +206,27 @@ def normalizator(
validators parameter for detailed description.
:
"""
# Very first step is to expand $refs in the schema itself:
expand_refs(schema)

# Now we can expand $refs in the property value:
if isinstance(property_value, dict):
expand_refs(property_value)

def resolve(subschema: dict[str, Any]) -> dict[str, Any]:
if "$ref" in subschema:
_, resolved = cast(
RefResolver,
validator_instance.resolver,
).resolve(subschema["$ref"])
return cast(dict[str, Any], resolved)
return subschema
# Now we can validate and normalize the values:

# Transform object and array values before running json schema type checking for each element.
# Recursively normalize every value of the "instance" sub-object,
# if "instance" is an incorrect type - skip recursive normalization of "instance"
if schema_key == "properties" and isinstance(instance, dict):
for k, subschema in property_value.items():
if k in instance:
subschema = resolve(subschema)
instance[k] = self.__normalize(instance[k], subschema)
# Recursively normalize every item of the "instance" sub-array,
# if "instance" is an incorrect type - skip recursive normalization of "instance"
elif schema_key == "items" and isinstance(instance, list):
subschema = resolve(property_value)
for index, item in enumerate(instance):
instance[index] = self.__normalize(item, subschema)
instance[index] = self.__normalize(item, property_value)

# Running native jsonschema traverse algorithm after field normalization is done.
yield from original_validator(
Expand Down
12 changes: 7 additions & 5 deletions airbyte_cdk/utils/spec_schema_transformations.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,23 @@
import re
from typing import Any

from jsonschema import RefResolver
from referencing import Registry, Resource
from referencing.jsonschema import DRAFT7


def resolve_refs(schema: dict[str, Any]) -> dict[str, Any]:
"""
For spec schemas generated using Pydantic models, the resulting JSON schema can contain refs between object
relationships.
"""
json_schema_ref_resolver = RefResolver.from_schema(schema)
resource = Resource.from_contents(schema, default_specification=DRAFT7)
registry = Registry().with_resource("", resource)
resolver = registry.resolver()
str_schema = json.dumps(schema)
for ref_block in re.findall(r'{"\$ref": "#\/definitions\/.+?(?="})"}', str_schema):
ref = json.loads(ref_block)["$ref"]
str_schema = str_schema.replace(
ref_block, json.dumps(json_schema_ref_resolver.resolve(ref)[1])
)
resolved = resolver.lookup(ref).contents
str_schema = str_schema.replace(ref_block, json.dumps(resolved))
pyschema: dict[str, Any] = json.loads(str_schema)
del pyschema["definitions"]
return pyschema
Loading
Loading