From 54d341002f25b37f8f9381b0da4d1b7f3628d018 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 3 Mar 2021 17:09:16 -0800 Subject: [PATCH 1/3] Fetch table and column descriptions --- pybigquery/sqlalchemy_bigquery.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pybigquery/sqlalchemy_bigquery.py b/pybigquery/sqlalchemy_bigquery.py index 79ffae16..352a9406 100644 --- a/pybigquery/sqlalchemy_bigquery.py +++ b/pybigquery/sqlalchemy_bigquery.py @@ -500,11 +500,18 @@ def get_columns(self, connection, table_name, schema=None, **kw): 'name': col.name, 'type': types.ARRAY(coltype) if col.mode == 'REPEATED' else coltype, 'nullable': col.mode == 'NULLABLE' or col.mode == 'REPEATED', + 'comment': col.description, 'default': None, }) return result + def get_table_comment(self, connection, table_name, schema=None, **kw): + table = self._get_table(connection, table_name, schema) + return { + 'text': table.description, + } + def get_foreign_keys(self, connection, table_name, schema=None, **kw): # BigQuery has no support for foreign keys. return [] From 65e502fdf6c79312820cdd92bdd325d1b6dd0747 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Mon, 8 Mar 2021 21:28:55 -0500 Subject: [PATCH 2/3] Reset coltype for unrecognized columns --- pybigquery/sqlalchemy_bigquery.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pybigquery/sqlalchemy_bigquery.py b/pybigquery/sqlalchemy_bigquery.py index 352a9406..01c91eb6 100644 --- a/pybigquery/sqlalchemy_bigquery.py +++ b/pybigquery/sqlalchemy_bigquery.py @@ -495,6 +495,7 @@ def get_columns(self, connection, table_name, schema=None, **kw): coltype = _type_map[col.field_type] except KeyError: util.warn("Did not recognize type '%s' of column '%s'" % (col.field_type, col.name)) + coltype = types.NullType result.append({ 'name': col.name, From 661a9edf46e47c3f77a605547cf906712f614961 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 12 Mar 2021 15:18:04 -0600 Subject: [PATCH 3/3] add tests --- dev_requirements.txt | 8 ++++---- scripts/load_test_data.sh | 2 +- scripts/schema.json | 1 + test/test_sqlalchemy_bigquery.py | 17 +++++++++++++---- 4 files changed, 19 insertions(+), 9 deletions(-) diff --git a/dev_requirements.txt b/dev_requirements.txt index 09e9719d..4e9597d3 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -1,7 +1,7 @@ sqlalchemy>=1.1.9 google-cloud-bigquery>=1.6.0 -future==0.16.0 +future==0.18.2 -pytest==3.2.2 -pytest-flake8==1.0.6 -pytz==2017.2 \ No newline at end of file +pytest==6.2.2 +pytest-flake8==1.0.7 +pytz==2021.1 \ No newline at end of file diff --git a/scripts/load_test_data.sh b/scripts/load_test_data.sh index 6050993a..9f4e7594 100755 --- a/scripts/load_test_data.sh +++ b/scripts/load_test_data.sh @@ -9,7 +9,7 @@ bq rm -f -t test_pybigquery.sample_dml bq rm -f -t test_pybigquery.sample_view bq rm -f -t test_pybigquery_location.sample_one_row -bq mk --table --schema=$(dirname $0)/schema.json --time_partitioning_field timestamp --clustering_fields integer,string test_pybigquery.sample +bq mk --table --schema=$(dirname $0)/schema.json --time_partitioning_field timestamp --clustering_fields integer,string --description 'A sample table containing most data types.' test_pybigquery.sample bq mk --table --schema=$(dirname $0)/schema.json --time_partitioning_field timestamp --clustering_fields integer,string test_pybigquery_alt.sample_alt bq load --source_format=NEWLINE_DELIMITED_JSON --schema=$(dirname $0)/schema.json test_pybigquery.sample $(dirname $0)/sample.json diff --git a/scripts/schema.json b/scripts/schema.json index ffcf6fa9..8dc96ce0 100644 --- a/scripts/schema.json +++ b/scripts/schema.json @@ -52,6 +52,7 @@ "mode": "NULLABLE", "name": "record", "type": "RECORD", + "description": "In Standard SQL this data type is a STRUCT.", "fields": [ { "mode": "NULLABLE", diff --git a/test/test_sqlalchemy_bigquery.py b/test/test_sqlalchemy_bigquery.py index f45265f2..8c77f227 100644 --- a/test/test_sqlalchemy_bigquery.py +++ b/test/test_sqlalchemy_bigquery.py @@ -86,7 +86,13 @@ {'name': 'datetime', 'type': types.DATETIME(), 'nullable': True, 'default': None}, {'name': 'time', 'type': types.TIME(), 'nullable': True, 'default': None}, {'name': 'bytes', 'type': types.BINARY(), 'nullable': True, 'default': None}, - {'name': 'record', 'type': types.JSON(), 'nullable': True, 'default': None}, + { + 'name': 'record', + 'type': types.JSON(), + 'nullable': True, + 'default': None, + 'comment': 'In Standard SQL this data type is a STRUCT.', + }, {'name': 'record.name', 'type': types.String(), 'nullable': True, 'default': None}, {'name': 'record.age', 'type': types.Integer(), 'nullable': True, 'default': None}, {'name': 'nested_record', 'type': types.JSON(), 'nullable': True, 'default': None}, @@ -225,8 +231,9 @@ def test_dataset_location(engine_with_location): def test_reflect_select(table, table_using_test_dataset): for table in [table, table_using_test_dataset]: - assert len(table.c) == 18 + assert table.comment == "A sample table containing most data types." + assert len(table.c) == 18 assert isinstance(table.c.integer, Column) assert isinstance(table.c.integer.type, types.Integer) assert isinstance(table.c.timestamp.type, types.TIMESTAMP) @@ -526,9 +533,10 @@ def test_get_columns(inspector, inspector_using_test_dataset): for columns in columns_queries: for i, col in enumerate(columns): sample_col = SAMPLE_COLUMNS[i] + assert col['comment'] == sample_col.get('comment') + assert col['default'] == sample_col['default'] assert col['name'] == sample_col['name'] assert col['nullable'] == sample_col['nullable'] - assert col['default'] == sample_col['default'] assert col['type'].__class__.__name__ == sample_col['type'].__class__.__name__ columns_without_schema = inspector_using_test_dataset.get_columns('sample') @@ -537,9 +545,10 @@ def test_get_columns(inspector, inspector_using_test_dataset): for columns in columns_queries: for i, col in enumerate(columns): sample_col = SAMPLE_COLUMNS[i] + assert col['comment'] == sample_col.get('comment') + assert col['default'] == sample_col['default'] assert col['name'] == sample_col['name'] assert col['nullable'] == sample_col['nullable'] - assert col['default'] == sample_col['default'] assert col['type'].__class__.__name__ == sample_col['type'].__class__.__name__