diff --git a/data_diff/hashdiff_tables.py b/data_diff/hashdiff_tables.py index 58228868..45049ae6 100644 --- a/data_diff/hashdiff_tables.py +++ b/data_diff/hashdiff_tables.py @@ -7,7 +7,7 @@ from runtype import dataclass -from data_diff.sqeleton.abcs import ColType_UUID, NumericType, PrecisionType, StringType, Boolean, JSONType +from data_diff.sqeleton.abcs import ColType_UUID, NumericType, PrecisionType, StringType, Boolean, JSON from .info_tree import InfoTree from .utils import safezip, diffs_are_equiv_jsons @@ -205,7 +205,7 @@ def _bisect_and_diff_segments( if max_rows < self.bisection_threshold or max_space_size < self.bisection_factor * 2: rows1, rows2 = self._threaded_call("get_values", [table1, table2]) json_cols = {i: colname for i, colname in enumerate(table1.extra_columns) - if isinstance(table1._schema[colname], JSONType)} + if isinstance(table1._schema[colname], JSON)} diff = list(diff_sets(rows1, rows2, json_cols)) info_tree.info.set_diff(diff) diff --git a/data_diff/sqeleton/abcs/__init__.py b/data_diff/sqeleton/abcs/__init__.py index 6359a7f5..3f5a8bf4 100644 --- a/data_diff/sqeleton/abcs/__init__.py +++ b/data_diff/sqeleton/abcs/__init__.py @@ -10,6 +10,6 @@ PrecisionType, StringType, Boolean, - JSONType, + JSON, ) from .compiler import AbstractCompiler, Compilable diff --git a/data_diff/sqeleton/abcs/database_types.py b/data_diff/sqeleton/abcs/database_types.py index c182b0e3..145b8452 100644 --- a/data_diff/sqeleton/abcs/database_types.py +++ b/data_diff/sqeleton/abcs/database_types.py @@ -134,19 +134,9 @@ class Text(StringType): supported = False -class JSONType(ColType): - pass - - -class RedShiftSuper(JSONType): - pass - - -class PostgresqlJSON(JSONType): - pass - - -class PostgresqlJSONB(JSONType): +# In majority of DBMSes, it is called JSON/JSONB. Only in Snowflake, it is OBJECT. +@dataclass +class JSON(ColType): pass diff --git a/data_diff/sqeleton/abcs/mixins.py b/data_diff/sqeleton/abcs/mixins.py index f6aaa4d3..89462dd9 100644 --- a/data_diff/sqeleton/abcs/mixins.py +++ b/data_diff/sqeleton/abcs/mixins.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from .database_types import TemporalType, FractionalType, ColType_UUID, Boolean, ColType, String_UUID, JSONType +from .database_types import TemporalType, FractionalType, ColType_UUID, Boolean, ColType, String_UUID, JSON from .compiler import Compilable @@ -49,7 +49,7 @@ def normalize_uuid(self, value: str, coltype: ColType_UUID) -> str: return f"TRIM({value})" return self.to_string(value) - def normalize_json(self, value: str, _coltype: JSONType) -> str: + def normalize_json(self, value: str, _coltype: JSON) -> str: """Creates an SQL expression, that converts 'value' to its minified json string representation.""" raise NotImplementedError() @@ -77,7 +77,7 @@ def normalize_value_by_type(self, value: str, coltype: ColType) -> str: return self.normalize_uuid(value, coltype) elif isinstance(coltype, Boolean): return self.normalize_boolean(value, coltype) - elif isinstance(coltype, JSONType): + elif isinstance(coltype, JSON): return self.normalize_json(value, coltype) return self.to_string(value) diff --git a/data_diff/sqeleton/databases/base.py b/data_diff/sqeleton/databases/base.py index 2a782c79..e9e0884d 100644 --- a/data_diff/sqeleton/databases/base.py +++ b/data_diff/sqeleton/databases/base.py @@ -35,7 +35,7 @@ DbTime, DbPath, Boolean, - JSONType + JSON ) from ..abcs.mixins import Compilable from ..abcs.mixins import ( @@ -260,7 +260,7 @@ def parse_type( elif issubclass(cls, (Text, Native_UUID)): return cls() - elif issubclass(cls, JSONType): + elif issubclass(cls, JSON): return cls() raise TypeError(f"Parsing {type_repr} returned an unknown type '{cls}'.") diff --git a/data_diff/sqeleton/databases/postgresql.py b/data_diff/sqeleton/databases/postgresql.py index eef24854..4caa2f7f 100644 --- a/data_diff/sqeleton/databases/postgresql.py +++ b/data_diff/sqeleton/databases/postgresql.py @@ -1,5 +1,6 @@ from ..abcs.database_types import ( DbPath, + JSON, Timestamp, TimestampTZ, Float, @@ -11,8 +12,6 @@ FractionalType, Boolean, Date, - PostgresqlJSON, - PostgresqlJSONB ) from ..abcs.mixins import AbstractMixin_MD5, AbstractMixin_NormalizeValue from .base import BaseDialect, ThreadedDatabase, import_helper, ConnectError, Mixin_Schema @@ -51,7 +50,7 @@ def normalize_number(self, value: str, coltype: FractionalType) -> str: def normalize_boolean(self, value: str, _coltype: Boolean) -> str: return self.to_string(f"{value}::int") - def normalize_json(self, value: str, _coltype: PostgresqlJSON) -> str: + def normalize_json(self, value: str, _coltype: JSON) -> str: return f"{value}::text" @@ -81,12 +80,10 @@ class PostgresqlDialect(BaseDialect, Mixin_Schema): "character varying": Text, "varchar": Text, "text": Text, - # JSON - "json": PostgresqlJSON, - "jsonb": PostgresqlJSONB, - # UUID + + "json": JSON, + "jsonb": JSON, "uuid": Native_UUID, - # Boolean "boolean": Boolean, } diff --git a/data_diff/sqeleton/databases/redshift.py b/data_diff/sqeleton/databases/redshift.py index f0d03be2..662ad55e 100644 --- a/data_diff/sqeleton/databases/redshift.py +++ b/data_diff/sqeleton/databases/redshift.py @@ -1,11 +1,11 @@ from typing import List, Dict from ..abcs.database_types import ( Float, + JSON, TemporalType, FractionalType, DbPath, TimestampTZ, - RedShiftSuper ) from ..abcs.mixins import AbstractMixin_MD5 from .postgresql import ( @@ -47,7 +47,7 @@ def normalize_timestamp(self, value: str, coltype: TemporalType) -> str: def normalize_number(self, value: str, coltype: FractionalType) -> str: return self.to_string(f"{value}::decimal(38,{coltype.precision})") - def normalize_json(self, value: str, _coltype: RedShiftSuper) -> str: + def normalize_json(self, value: str, _coltype: JSON) -> str: return f'nvl2({value}, json_serialize({value}), NULL)' @@ -57,8 +57,7 @@ class Dialect(PostgresqlDialect): **PostgresqlDialect.TYPE_CLASSES, "double": Float, "real": Float, - # JSON - "super": RedShiftSuper + "super": JSON, } SUPPORTS_INDEXES = False diff --git a/data_diff/utils.py b/data_diff/utils.py index eb71cbb1..81ca5de5 100644 --- a/data_diff/utils.py +++ b/data_diff/utils.py @@ -160,7 +160,7 @@ def diffs_are_equiv_jsons(diff: list, json_cols: dict): return False, overriden_diff_cols match = True for i, (col_a, col_b) in enumerate(safezip(diff[0][1][1:], diff[1][1][1:])): # index 0 is extra_columns first elem - # we only attempt to parse columns of JSONType, but we still need to check if non-json columns don't match + # we only attempt to parse columns of JSON type, but we still need to check if non-json columns don't match match = col_a == col_b if not match and (i in json_cols): if _jsons_equiv(col_a, col_b):