From e0aafc0a380f0d48c906a4f977d2baa6ea855893 Mon Sep 17 00:00:00 2001 From: Sergey Vasilyev Date: Fri, 5 May 2023 10:47:39 +0200 Subject: [PATCH 1/4] Skip JSON/JSONB cross-db tests when the db does not have this type --- tests/test_database_types.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/tests/test_database_types.py b/tests/test_database_types.py index 9729b00d..e0cb9449 100644 --- a/tests/test_database_types.py +++ b/tests/test_database_types.py @@ -526,16 +526,17 @@ def get_test_db_pairs(): for source_db, source_type_categories, target_db, target_type_categories in get_test_db_pairs(): for type_category, source_types in source_type_categories.items(): # int, datetime, .. for source_type in source_types: - for target_type in target_type_categories[type_category]: - type_pairs.append( - ( - source_db, - target_db, - source_type, - target_type, - type_category, + if type_category in target_type_categories: # only cross-compatible types + for target_type in target_type_categories[type_category]: + type_pairs.append( + ( + source_db, + target_db, + source_type, + target_type, + type_category, + ) ) - ) def sanitize(name): From 72d409134e9d4539f2945a55784e6e8b8c0e15c5 Mon Sep 17 00:00:00 2001 From: Sergey Vasilyev Date: Fri, 5 May 2023 10:48:18 +0200 Subject: [PATCH 2/4] Avoid indexing by columns that do not support indexing (JSON/JSONB) --- tests/test_database_types.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_database_types.py b/tests/test_database_types.py index e0cb9449..e4bc9b0d 100644 --- a/tests/test_database_types.py +++ b/tests/test_database_types.py @@ -628,9 +628,10 @@ def _create_table_with_indexes(conn, table_path, type_): else: conn.query(tbl.create()) - if conn.dialect.SUPPORTS_INDEXES: - (index_id,) = table_path + (index_id,) = table_path + if conn.dialect.SUPPORTS_INDEXES and type_ not in ('json', 'jsonb', 'array', 'struct'): conn.query(f"CREATE INDEX xa_{index_id} ON {table_name} ({quote('id')}, {quote('col')})") + if conn.dialect.SUPPORTS_INDEXES: conn.query(f"CREATE INDEX xb_{index_id} ON {table_name} ({quote('id')})") conn.query(commit) From 6cdd0a6c7effb090ebfc75321cb756ee37df0ba5 Mon Sep 17 00:00:00 2001 From: Sergey Vasilyev Date: Fri, 5 May 2023 10:48:51 +0200 Subject: [PATCH 3/4] Expect some downloaded rows for fuzzily diffed column types --- tests/test_database_types.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/test_database_types.py b/tests/test_database_types.py index e4bc9b0d..d3e3b22f 100644 --- a/tests/test_database_types.py +++ b/tests/test_database_types.py @@ -726,9 +726,11 @@ def test_types(self, source_db, target_db, source_type, target_type, type_catego checksum_duration = time.monotonic() - start expected = [] self.assertEqual(expected, diff) - self.assertEqual( - 0, differ.stats.get("rows_downloaded", 0) - ) # This may fail if the hash is different, but downloaded values are equal + + # For fuzzily diffed types, some rows can be downloaded for local comparison. This happens + # when hashes are diferent but the essential payload is not; e.g. due to json serialization. + if not {source_type, target_type} & {'json', 'jsonb', 'array', 'struct'}: + self.assertEqual(0, differ.stats.get("rows_downloaded", 0)) # This section downloads all rows to ensure that Python agrees with the # database, in terms of comparison. From fbdb10136191a718e330dab289aa4116a3bf1ba7 Mon Sep 17 00:00:00 2001 From: Sergey Vasilyev Date: Fri, 5 May 2023 10:51:53 +0200 Subject: [PATCH 4/4] Escape and serialise the Postgres JSON/JSONB values in tests --- tests/test_database_types.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tests/test_database_types.py b/tests/test_database_types.py index d3e3b22f..fdf8784d 100644 --- a/tests/test_database_types.py +++ b/tests/test_database_types.py @@ -604,8 +604,15 @@ def _insert_to_table(conn, table_path, values, coltype): elif isinstance(conn, db.BigQuery) and coltype == "datetime": values = [(i, Code(f"cast(timestamp '{sample}' as datetime)")) for i, sample in values] - if isinstance(conn, db.Redshift) and coltype == "json": - values = [(i, Code(f"JSON_PARSE('{sample}')")) for i, sample in values] + elif isinstance(conn, db.Redshift) and coltype in ("json", "jsonb"): + values = [(i, Code(f"JSON_PARSE({sample})")) for i, sample in values] + elif isinstance(conn, db.PostgreSQL) and coltype in ("json", "jsonb"): + values = [(i, Code( + "'{}'".format( + (json.dumps(sample) if isinstance(sample, (dict, list)) else sample) + .replace('\'', '\'\'') + ) + )) for i, sample in values] insert_rows_in_batches(conn, tbl, values, columns=["id", "col"]) conn.query(commit)