From e0aafc0a380f0d48c906a4f977d2baa6ea855893 Mon Sep 17 00:00:00 2001
From: Sergey Vasilyev <sv@datafold.com>
Date: Fri, 5 May 2023 10:47:39 +0200
Subject: [PATCH 1/4] Skip JSON/JSONB cross-db tests when the db does not have
 this type

---
 tests/test_database_types.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/tests/test_database_types.py b/tests/test_database_types.py
index 9729b00d..e0cb9449 100644
--- a/tests/test_database_types.py
+++ b/tests/test_database_types.py
@@ -526,16 +526,17 @@ def get_test_db_pairs():
 for source_db, source_type_categories, target_db, target_type_categories in get_test_db_pairs():
     for type_category, source_types in source_type_categories.items():  # int, datetime, ..
         for source_type in source_types:
-            for target_type in target_type_categories[type_category]:
-                type_pairs.append(
-                    (
-                        source_db,
-                        target_db,
-                        source_type,
-                        target_type,
-                        type_category,
+            if type_category in target_type_categories:  # only cross-compatible types
+                for target_type in target_type_categories[type_category]:
+                    type_pairs.append(
+                        (
+                            source_db,
+                            target_db,
+                            source_type,
+                            target_type,
+                            type_category,
+                        )
                     )
-                )
 
 
 def sanitize(name):

From 72d409134e9d4539f2945a55784e6e8b8c0e15c5 Mon Sep 17 00:00:00 2001
From: Sergey Vasilyev <sv@datafold.com>
Date: Fri, 5 May 2023 10:48:18 +0200
Subject: [PATCH 2/4] Avoid indexing by columns that do not support indexing
 (JSON/JSONB)

---
 tests/test_database_types.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/test_database_types.py b/tests/test_database_types.py
index e0cb9449..e4bc9b0d 100644
--- a/tests/test_database_types.py
+++ b/tests/test_database_types.py
@@ -628,9 +628,10 @@ def _create_table_with_indexes(conn, table_path, type_):
     else:
         conn.query(tbl.create())
 
-    if conn.dialect.SUPPORTS_INDEXES:
-        (index_id,) = table_path
+    (index_id,) = table_path
+    if conn.dialect.SUPPORTS_INDEXES and type_ not in ('json', 'jsonb', 'array', 'struct'):
         conn.query(f"CREATE INDEX xa_{index_id} ON {table_name} ({quote('id')}, {quote('col')})")
+    if conn.dialect.SUPPORTS_INDEXES:
         conn.query(f"CREATE INDEX xb_{index_id} ON {table_name} ({quote('id')})")
 
     conn.query(commit)

From 6cdd0a6c7effb090ebfc75321cb756ee37df0ba5 Mon Sep 17 00:00:00 2001
From: Sergey Vasilyev <sv@datafold.com>
Date: Fri, 5 May 2023 10:48:51 +0200
Subject: [PATCH 3/4] Expect some downloaded rows for fuzzily diffed column
 types

---
 tests/test_database_types.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/test_database_types.py b/tests/test_database_types.py
index e4bc9b0d..d3e3b22f 100644
--- a/tests/test_database_types.py
+++ b/tests/test_database_types.py
@@ -726,9 +726,11 @@ def test_types(self, source_db, target_db, source_type, target_type, type_catego
         checksum_duration = time.monotonic() - start
         expected = []
         self.assertEqual(expected, diff)
-        self.assertEqual(
-            0, differ.stats.get("rows_downloaded", 0)
-        )  # This may fail if the hash is different, but downloaded values are equal
+
+        # For fuzzily diffed types, some rows can be downloaded for local comparison. This happens
+        # when hashes are diferent but the essential payload is not; e.g. due to json serialization.
+        if not {source_type, target_type} & {'json', 'jsonb', 'array', 'struct'}:
+            self.assertEqual(0, differ.stats.get("rows_downloaded", 0))
 
         # This section downloads all rows to ensure that Python agrees with the
         # database, in terms of comparison.

From fbdb10136191a718e330dab289aa4116a3bf1ba7 Mon Sep 17 00:00:00 2001
From: Sergey Vasilyev <sv@datafold.com>
Date: Fri, 5 May 2023 10:51:53 +0200
Subject: [PATCH 4/4] Escape and serialise the Postgres JSON/JSONB values in
 tests

---
 tests/test_database_types.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/tests/test_database_types.py b/tests/test_database_types.py
index d3e3b22f..fdf8784d 100644
--- a/tests/test_database_types.py
+++ b/tests/test_database_types.py
@@ -604,8 +604,15 @@ def _insert_to_table(conn, table_path, values, coltype):
     elif isinstance(conn, db.BigQuery) and coltype == "datetime":
         values = [(i, Code(f"cast(timestamp '{sample}' as datetime)")) for i, sample in values]
 
-    if isinstance(conn, db.Redshift) and coltype == "json":
-        values = [(i, Code(f"JSON_PARSE('{sample}')")) for i, sample in values]
+    elif isinstance(conn, db.Redshift) and coltype in ("json", "jsonb"):
+        values = [(i, Code(f"JSON_PARSE({sample})")) for i, sample in values]
+    elif isinstance(conn, db.PostgreSQL) and coltype in ("json", "jsonb"):
+        values = [(i, Code(
+            "'{}'".format(
+                (json.dumps(sample) if isinstance(sample, (dict, list)) else sample)
+                .replace('\'', '\'\'')
+            )
+        )) for i, sample in values]
 
     insert_rows_in_batches(conn, tbl, values, columns=["id", "col"])
     conn.query(commit)