Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix annotation processing and rebuilding, mark dataclass as complex #980

Merged
merged 24 commits into from
Feb 21, 2024
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
7ee04f7
Add more test cases for extract_inner_type
sultaniman Feb 19, 2024
571b380
mark dataclasses as complex type
sultaniman Feb 19, 2024
4bb7ce7
Fix annotation reconstruction and better union type checks
sultaniman Feb 19, 2024
9c2e88c
Add more checks to typing tests and more complex pydantic tests
sultaniman Feb 19, 2024
9160055
Ignore mypy warning
sultaniman Feb 19, 2024
02aaba1
Adjust mypy warning ignore reason
sultaniman Feb 19, 2024
0a4c29f
Move complex tests for extract_inner_type to libs
sultaniman Feb 19, 2024
9ab1fdd
Upgrade typing-extensions to 4.9.0 and mypy to 1.8.0
sultaniman Feb 19, 2024
677245e
Ignore mypy warning for syntax error
sultaniman Feb 19, 2024
a7000ee
Pass tuple to pass annotation metadata to Annotated
sultaniman Feb 20, 2024
8acf0f6
Remove generic base for test model
sultaniman Feb 20, 2024
50b70e0
Remove unused mypy error code
sultaniman Feb 20, 2024
9d427be
Add more test cases
sultaniman Feb 20, 2024
101fdd0
Fix linting errors
sultaniman Feb 20, 2024
b448a48
Fix linting errors
sultaniman Feb 20, 2024
a4a8fac
Format code
sultaniman Feb 20, 2024
b85a68c
Format code
sultaniman Feb 20, 2024
b4bee64
Adjust tests for python<3.10
sultaniman Feb 20, 2024
eaf8a4e
Use assert_type instead of simple assert
sultaniman Feb 20, 2024
d2e0f07
Revert
sultaniman Feb 20, 2024
1f37738
Remove unused import
sultaniman Feb 20, 2024
2b566de
Define UUID4 and remove created file
sultaniman Feb 21, 2024
d3eba19
Revert mypy and typing-extensions update
sultaniman Feb 21, 2024
6b73db9
Fix linting issues
sultaniman Feb 21, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion dlt/common/data_types/type_helpers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import binascii
import base64
import dataclasses
import datetime # noqa: I251
from collections.abc import Mapping as C_Mapping, Sequence as C_Sequence
from typing import Any, Type, Literal, Union, cast
Expand Down Expand Up @@ -55,7 +56,7 @@ def py_type_to_sc_type(t: Type[Any]) -> TDataType:
return "bigint"
if issubclass(t, bytes):
return "binary"
if issubclass(t, (C_Mapping, C_Sequence)):
if dataclasses.is_dataclass(t) or issubclass(t, (C_Mapping, C_Sequence)):
return "complex"
# Enum is coerced to str or int respectively
if issubclass(t, Enum):
Expand Down
2 changes: 1 addition & 1 deletion dlt/common/destination/reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ def should_truncate_table_before_load_on_staging_destination(self, table: TTable
return True


TDestinationReferenceArg = Union[str, "Destination", Callable[..., "Destination"], None]
TDestinationReferenceArg = Union[str, "Destination", Callable[..., "Destination"], None] # type: ignore[type-arg]


class Destination(ABC, Generic[TDestinationConfig, TDestinationClient]):
Expand Down
13 changes: 7 additions & 6 deletions dlt/common/libs/pydantic.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,15 +100,16 @@ def pydantic_to_table_schema_columns(
# This applies to pydantic.Json fields, the inner type is the type after json parsing
# (In pydantic 2 the outer annotation is the final type)
annotation = inner_annotation

nullable = is_optional_type(annotation)

if is_union_type(annotation):
inner_type = get_args(annotation)[0]
else:
inner_type = extract_inner_type(annotation)
inner_type = extract_inner_type(annotation)
if is_union_type(inner_type):
first_argument_type = get_args(inner_type)[0]
inner_type = extract_inner_type(first_argument_type)

if inner_type is Json: # Same as `field: Json[Any]`
inner_type = Any
inner_type = Any # type: ignore[assignment]

if inner_type is Any: # Any fields will be inferred from data
continue
Expand Down Expand Up @@ -229,7 +230,7 @@ def _process_annotation(t_: Type[Any]) -> Type[Any]:
"""Recursively recreates models with applied schema contract"""
if is_annotated(t_):
a_t, *a_m = get_args(t_)
return Annotated[_process_annotation(a_t), a_m] # type: ignore
return Annotated[_process_annotation(a_t), tuple(a_m)] # type: ignore[return-value]
elif is_list_generic_type(t_):
l_t: Type[Any] = get_args(t_)[0]
try:
Expand Down
2 changes: 1 addition & 1 deletion dlt/common/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@
TVariantBase = TypeVar("TVariantBase", covariant=True)
TVariantRV = Tuple[str, Any]
VARIANT_FIELD_FORMAT = "v_%s"
TFileOrPath = Union[str, os.PathLike, IO[Any]]
TFileOrPath = Union[str, os.PathLike, IO[Any]] # type: ignore[type-arg]


@runtime_checkable
Expand Down
8,082 changes: 3,933 additions & 4,149 deletions poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions tests/common/test_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
)
from typing_extensions import Annotated, get_args


from dlt.common.configuration.specs.base_configuration import (
BaseConfiguration,
get_config_if_union_hint,
Expand Down
31 changes: 25 additions & 6 deletions tests/libs/test_pydantic.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
import sys
from copy import copy
from dataclasses import dataclass, field
import uuid
import pytest
from typing import (
ClassVar,
Final,
Generic,
Sequence,
Mapping,
Dict,
MutableMapping,
MutableSequence,
TypeVar,
Union,
Optional,
List,
Expand All @@ -29,7 +33,7 @@
validate_items,
create_list_model,
)
from pydantic import BaseModel, Json, AnyHttpUrl, ConfigDict, ValidationError
from pydantic import UUID4, BaseModel, Json, AnyHttpUrl, ConfigDict, ValidationError

from dlt.common.schema.exceptions import DataValidationError

Expand Down Expand Up @@ -118,6 +122,18 @@ class ModelWithConfig(Model):
)


class BookGenre(str, Enum):
scifi = "scifi"
action = "action"
thriller = "thriller"


@dataclass
class BookInfo:
isbn: Optional[str] = field(default="ISBN")
author: Optional[str] = field(default="Charles Bukowski")


class UserLabel(BaseModel):
label: str

Expand All @@ -134,12 +150,16 @@ class UserAddress(BaseModel):

class User(BaseModel):
user_id: int
account_id: UUID4
optional_uuid: Optional[UUID4]
name: Annotated[str, "PII", "name"]
favorite_book: Annotated[Union[Annotated[BookInfo, "meta"], BookGenre, None], "union metadata"]
created_at: Optional[datetime]
labels: List[str]
user_label: UserLabel
user_labels: List[UserLabel]
address: Annotated[UserAddress, "PII", "address"]
uuid_or_str: Union[str, UUID4, None]
unity: Union[UserAddress, UserLabel, Dict[str, UserAddress]]
location: Annotated[Optional[Union[str, List[str]]], None]
something_required: Annotated[Union[str, int], type(None)]
Expand All @@ -151,6 +171,9 @@ class User(BaseModel):

USER_INSTANCE_DATA = dict(
user_id=1,
account_id=uuid.uuid4(),
optional_uuid=None,
favorite_book=BookInfo(isbn="isbn-xyz", author="author"),
name="random name",
created_at=datetime.now(),
labels=["str"],
Expand Down Expand Up @@ -180,6 +203,7 @@ class User(BaseModel):
],
),
unity=dict(label="123"),
uuid_or_str=uuid.uuid4(),
location="Florida keys",
final_location="Ginnie Springs",
something_required=123,
Expand Down Expand Up @@ -387,14 +411,9 @@ class UserPipe(BaseModel):
# and can generate the same schema from the class and from the class instance.

user = UserPipe(**USER_INSTANCE_DATA) # type: ignore
user_using_optional = User(**USER_INSTANCE_DATA) # type: ignore
schema_from_user_class = pydantic_to_table_schema_columns(UserPipe)
schema_from_user_instance = pydantic_to_table_schema_columns(user)
schema_from_user_class_using_optional = pydantic_to_table_schema_columns(user_using_optional)
schema_from_user_instance_using_optional = pydantic_to_table_schema_columns(user_using_optional)
assert schema_from_user_class == schema_from_user_instance
assert schema_from_user_class_using_optional == schema_from_user_instance_using_optional
assert schema_from_user_class == schema_from_user_class_using_optional
assert schema_from_user_class["location"]["nullable"] is True
assert schema_from_user_class["final_location"]["nullable"] is False
assert schema_from_user_class["something_required"]["nullable"] is False
Expand Down
46 changes: 46 additions & 0 deletions tests/libs/test_typing_extended.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from dataclasses import dataclass
from typing import (
sultaniman marked this conversation as resolved.
Show resolved Hide resolved
Final,
Literal,
TypedDict,
Optional,
Union,
)
from typing_extensions import Annotated
from uuid import UUID

from pydantic import UUID4

from dlt.common.typing import (
extract_inner_type,
)


class TTestTyDi(TypedDict):
field: str


@dataclass
class MyDataclass:
booba_tooba: str


TTestLi = Literal["a", "b", "c"]
TOptionalLi = Optional[TTestLi]
TOptionalTyDi = Optional[TTestTyDi]

TOptionalUnionLiTyDi = Optional[Union[TTestTyDi, TTestLi]]


def test_extract_annotated_inner_type() -> None:
assert extract_inner_type(Annotated[TOptionalLi, Optional]) is str # type: ignore[arg-type]
assert extract_inner_type(Annotated[TOptionalLi, "random metadata string"]) is str # type: ignore[arg-type]
assert extract_inner_type(Optional[Annotated[str, "random metadata string"]]) is str # type: ignore[arg-type]
assert extract_inner_type(Final[Annotated[Optional[str], "annotated metadata"]]) is str # type: ignore[arg-type]
assert extract_inner_type(Final[Annotated[Optional[str], None]]) is str # type: ignore[arg-type]
assert extract_inner_type(Final[Annotated[Union[str, int], None]]) is Union[str, int] # type: ignore[arg-type]
assert extract_inner_type(Annotated[Union[str, int], type(None)]) is Union[str, int] # type: ignore[arg-type]
assert extract_inner_type(Annotated[Optional[UUID4], "meta"]) is UUID # type: ignore[arg-type]
assert extract_inner_type(Annotated[Optional[MyDataclass], "meta"]) is MyDataclass # type: ignore[arg-type]
assert extract_inner_type(Annotated[MyDataclass, Optional]) is MyDataclass # type: ignore[arg-type]
assert extract_inner_type(Annotated[MyDataclass, "random metadata string"]) is MyDataclass # type: ignore[arg-type]
Loading