From 3f998c0b6c14b06222521eb65b50e77558dcec0e Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Mon, 14 Aug 2023 08:48:19 +0100 Subject: [PATCH 1/3] Add missing metadata fields --- cloudquery/sdk/schema/arrow.py | 2 ++ cloudquery/sdk/schema/table.py | 19 +++++++++++++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/cloudquery/sdk/schema/arrow.py b/cloudquery/sdk/schema/arrow.py index 2f21bc1..a791847 100644 --- a/cloudquery/sdk/schema/arrow.py +++ b/cloudquery/sdk/schema/arrow.py @@ -7,3 +7,5 @@ METADATA_FALSE = b"false" METADATA_TABLE_NAME = b"cq:table_name" METADATA_TABLE_DESCRIPTION = b"cq:table_description" +METADATA_TABLE_TITLE = "cq:table_title" +METADATA_TABLE_DEPENDS_ON = "cq:table_depends_on" diff --git a/cloudquery/sdk/schema/table.py b/cloudquery/sdk/schema/table.py index d0b56ef..841aef4 100644 --- a/cloudquery/sdk/schema/table.py +++ b/cloudquery/sdk/schema/table.py @@ -1,7 +1,8 @@ from __future__ import annotations -from typing import List, Generator, Any import fnmatch +from typing import List + import pyarrow as pa from cloudquery.sdk.schema import arrow @@ -55,12 +56,22 @@ def from_arrow_schema(cls, schema: pa.Schema) -> Table: columns = [] for field in schema: columns.append(Column.from_arrow_field(field)) + parent = None + if schema.metadata[arrow.METADATA_TABLE_DEPENDS_ON]: + parent = Table( + name=schema.metadata[arrow.METADATA_TABLE_DEPENDS_ON].decode("utf-8"), + columns=[], + ) return cls( name=schema.metadata[arrow.METADATA_TABLE_NAME].decode("utf-8"), + title=schema.metadata[arrow.METADATA_TABLE_TITLE].decode("utf-8"), columns=columns, description=schema.metadata.get(arrow.METADATA_TABLE_DESCRIPTION).decode( "utf-8" ), + is_incremental=schema.metadata.get(arrow.METADATA_INCREMENTAL) + == arrow.METADATA_TRUE, + parent=parent, ) def to_arrow_schema(self): @@ -68,7 +79,11 @@ def to_arrow_schema(self): md = { arrow.METADATA_TABLE_NAME: self.name, arrow.METADATA_TABLE_DESCRIPTION: self.description, - # arrow.METADATA_CONSTRAINT_NAME: + arrow.METADATA_TABLE_TITLE: self.title, + arrow.METADATA_TABLE_DEPENDS_ON: self.parent.name if self.parent else "", + arrow.METADATA_INCREMENTAL: arrow.METADATA_TRUE + if self.is_incremental + else arrow.METADATA_FALSE, } for column in self.columns: fields.append(column.to_arrow_field()) From edf4ae2154e24e954e455d6322a4014029d1d6c7 Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Mon, 14 Aug 2023 09:19:25 +0100 Subject: [PATCH 2/3] Add test --- cloudquery/sdk/schema/arrow.py | 4 ++-- cloudquery/sdk/schema/table.py | 31 ++++++++++++++----------------- tests/schema/test_table.py | 17 +++++++++++++++-- 3 files changed, 31 insertions(+), 21 deletions(-) diff --git a/cloudquery/sdk/schema/arrow.py b/cloudquery/sdk/schema/arrow.py index a791847..23deed2 100644 --- a/cloudquery/sdk/schema/arrow.py +++ b/cloudquery/sdk/schema/arrow.py @@ -7,5 +7,5 @@ METADATA_FALSE = b"false" METADATA_TABLE_NAME = b"cq:table_name" METADATA_TABLE_DESCRIPTION = b"cq:table_description" -METADATA_TABLE_TITLE = "cq:table_title" -METADATA_TABLE_DEPENDS_ON = "cq:table_depends_on" +METADATA_TABLE_TITLE = b"cq:table_title" +METADATA_TABLE_DEPENDS_ON = b"cq:table_depends_on" diff --git a/cloudquery/sdk/schema/table.py b/cloudquery/sdk/schema/table.py index 841aef4..6d76e1d 100644 --- a/cloudquery/sdk/schema/table.py +++ b/cloudquery/sdk/schema/table.py @@ -15,14 +15,14 @@ class Client: class Table: def __init__( - self, - name: str, - columns: List[Column], - title: str = "", - description: str = "", - parent: Table = None, - relations: List[Table] = None, - is_incremental: bool = False, + self, + name: str, + columns: List[Column], + title: str = "", + description: str = "", + parent: Table = None, + relations: List[Table] = None, + is_incremental: bool = False, ) -> None: self.name = name self.columns = columns @@ -57,20 +57,19 @@ def from_arrow_schema(cls, schema: pa.Schema) -> Table: for field in schema: columns.append(Column.from_arrow_field(field)) parent = None - if schema.metadata[arrow.METADATA_TABLE_DEPENDS_ON]: + if arrow.METADATA_TABLE_DEPENDS_ON in schema.metadata: parent = Table( name=schema.metadata[arrow.METADATA_TABLE_DEPENDS_ON].decode("utf-8"), columns=[], ) return cls( - name=schema.metadata[arrow.METADATA_TABLE_NAME].decode("utf-8"), - title=schema.metadata[arrow.METADATA_TABLE_TITLE].decode("utf-8"), + name=schema.metadata.get(arrow.METADATA_TABLE_NAME, b"").decode("utf-8"), + title=schema.metadata.get(arrow.METADATA_TABLE_TITLE, b"").decode("utf-8"), columns=columns, description=schema.metadata.get(arrow.METADATA_TABLE_DESCRIPTION).decode( "utf-8" ), - is_incremental=schema.metadata.get(arrow.METADATA_INCREMENTAL) - == arrow.METADATA_TRUE, + is_incremental=schema.metadata.get(arrow.METADATA_INCREMENTAL, arrow.METADATA_FALSE) == arrow.METADATA_TRUE, parent=parent, ) @@ -81,9 +80,7 @@ def to_arrow_schema(self): arrow.METADATA_TABLE_DESCRIPTION: self.description, arrow.METADATA_TABLE_TITLE: self.title, arrow.METADATA_TABLE_DEPENDS_ON: self.parent.name if self.parent else "", - arrow.METADATA_INCREMENTAL: arrow.METADATA_TRUE - if self.is_incremental - else arrow.METADATA_FALSE, + arrow.METADATA_INCREMENTAL: arrow.METADATA_TRUE if self.is_incremental else arrow.METADATA_FALSE, } for column in self.columns: fields.append(column.to_arrow_field()) @@ -102,7 +99,7 @@ def tables_to_arrow_schemas(tables: List[Table]): def filter_dfs( - tables: List[Table], include_tables: List[str], skip_tables: List[str] + tables: List[Table], include_tables: List[str], skip_tables: List[str] ) -> List[Table]: filtered: List[Table] = [] for table in tables: diff --git a/tests/schema/test_table.py b/tests/schema/test_table.py index 9958ad7..b1f8f27 100644 --- a/tests/schema/test_table.py +++ b/tests/schema/test_table.py @@ -4,5 +4,18 @@ def test_table(): - table = Table("test_table", [Column("test_column", pa.int32())]) - table.to_arrow_schema() + table = Table(name="test_table", + columns=[Column("test_column", pa.int32())], + title="Test Table", + description="Test description", + parent=Table(name="parent_table", columns=[]), + relations=[], + is_incremental=True, + ) + sch = table.to_arrow_schema() + got = Table.from_arrow_schema(sch) + assert got.name == table.name + assert got.title == table.title + assert got.description == table.description + assert got.is_incremental == table.is_incremental + assert got.parent.name == table.parent.name From b7c69b0a93f13e25d5b0f7d291606a6d8818d0e9 Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Mon, 14 Aug 2023 09:19:41 +0100 Subject: [PATCH 3/3] Fmt --- cloudquery/sdk/schema/table.py | 27 ++++++++++++++++----------- tests/schema/test_table.py | 17 +++++++++-------- 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/cloudquery/sdk/schema/table.py b/cloudquery/sdk/schema/table.py index 6d76e1d..36089d3 100644 --- a/cloudquery/sdk/schema/table.py +++ b/cloudquery/sdk/schema/table.py @@ -15,14 +15,14 @@ class Client: class Table: def __init__( - self, - name: str, - columns: List[Column], - title: str = "", - description: str = "", - parent: Table = None, - relations: List[Table] = None, - is_incremental: bool = False, + self, + name: str, + columns: List[Column], + title: str = "", + description: str = "", + parent: Table = None, + relations: List[Table] = None, + is_incremental: bool = False, ) -> None: self.name = name self.columns = columns @@ -69,7 +69,10 @@ def from_arrow_schema(cls, schema: pa.Schema) -> Table: description=schema.metadata.get(arrow.METADATA_TABLE_DESCRIPTION).decode( "utf-8" ), - is_incremental=schema.metadata.get(arrow.METADATA_INCREMENTAL, arrow.METADATA_FALSE) == arrow.METADATA_TRUE, + is_incremental=schema.metadata.get( + arrow.METADATA_INCREMENTAL, arrow.METADATA_FALSE + ) + == arrow.METADATA_TRUE, parent=parent, ) @@ -80,7 +83,9 @@ def to_arrow_schema(self): arrow.METADATA_TABLE_DESCRIPTION: self.description, arrow.METADATA_TABLE_TITLE: self.title, arrow.METADATA_TABLE_DEPENDS_ON: self.parent.name if self.parent else "", - arrow.METADATA_INCREMENTAL: arrow.METADATA_TRUE if self.is_incremental else arrow.METADATA_FALSE, + arrow.METADATA_INCREMENTAL: arrow.METADATA_TRUE + if self.is_incremental + else arrow.METADATA_FALSE, } for column in self.columns: fields.append(column.to_arrow_field()) @@ -99,7 +104,7 @@ def tables_to_arrow_schemas(tables: List[Table]): def filter_dfs( - tables: List[Table], include_tables: List[str], skip_tables: List[str] + tables: List[Table], include_tables: List[str], skip_tables: List[str] ) -> List[Table]: filtered: List[Table] = [] for table in tables: diff --git a/tests/schema/test_table.py b/tests/schema/test_table.py index b1f8f27..9aad2bc 100644 --- a/tests/schema/test_table.py +++ b/tests/schema/test_table.py @@ -4,14 +4,15 @@ def test_table(): - table = Table(name="test_table", - columns=[Column("test_column", pa.int32())], - title="Test Table", - description="Test description", - parent=Table(name="parent_table", columns=[]), - relations=[], - is_incremental=True, - ) + table = Table( + name="test_table", + columns=[Column("test_column", pa.int32())], + title="Test Table", + description="Test description", + parent=Table(name="parent_table", columns=[]), + relations=[], + is_incremental=True, + ) sch = table.to_arrow_schema() got = Table.from_arrow_schema(sch) assert got.name == table.name