Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Simplify JSON column types #545

Merged
merged 3 commits into from
May 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions data_diff/hashdiff_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from runtype import dataclass

from data_diff.sqeleton.abcs import ColType_UUID, NumericType, PrecisionType, StringType, Boolean, JSONType
from data_diff.sqeleton.abcs import ColType_UUID, NumericType, PrecisionType, StringType, Boolean, JSON

from .info_tree import InfoTree
from .utils import safezip, diffs_are_equiv_jsons
Expand Down Expand Up @@ -205,7 +205,7 @@ def _bisect_and_diff_segments(
if max_rows < self.bisection_threshold or max_space_size < self.bisection_factor * 2:
rows1, rows2 = self._threaded_call("get_values", [table1, table2])
json_cols = {i: colname for i, colname in enumerate(table1.extra_columns)
if isinstance(table1._schema[colname], JSONType)}
if isinstance(table1._schema[colname], JSON)}
diff = list(diff_sets(rows1, rows2, json_cols))

info_tree.info.set_diff(diff)
Expand Down
2 changes: 1 addition & 1 deletion data_diff/sqeleton/abcs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@
PrecisionType,
StringType,
Boolean,
JSONType,
JSON,
)
from .compiler import AbstractCompiler, Compilable
16 changes: 3 additions & 13 deletions data_diff/sqeleton/abcs/database_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,19 +134,9 @@ class Text(StringType):
supported = False


class JSONType(ColType):
pass


class RedShiftSuper(JSONType):
pass


class PostgresqlJSON(JSONType):
pass


class PostgresqlJSONB(JSONType):
# In majority of DBMSes, it is called JSON/JSONB. Only in Snowflake, it is OBJECT.
@dataclass
class JSON(ColType):
pass


Expand Down
6 changes: 3 additions & 3 deletions data_diff/sqeleton/abcs/mixins.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from abc import ABC, abstractmethod
from .database_types import TemporalType, FractionalType, ColType_UUID, Boolean, ColType, String_UUID, JSONType
from .database_types import TemporalType, FractionalType, ColType_UUID, Boolean, ColType, String_UUID, JSON
from .compiler import Compilable


Expand Down Expand Up @@ -49,7 +49,7 @@ def normalize_uuid(self, value: str, coltype: ColType_UUID) -> str:
return f"TRIM({value})"
return self.to_string(value)

def normalize_json(self, value: str, _coltype: JSONType) -> str:
def normalize_json(self, value: str, _coltype: JSON) -> str:
"""Creates an SQL expression, that converts 'value' to its minified json string representation."""
raise NotImplementedError()

Expand Down Expand Up @@ -77,7 +77,7 @@ def normalize_value_by_type(self, value: str, coltype: ColType) -> str:
return self.normalize_uuid(value, coltype)
elif isinstance(coltype, Boolean):
return self.normalize_boolean(value, coltype)
elif isinstance(coltype, JSONType):
elif isinstance(coltype, JSON):
return self.normalize_json(value, coltype)
return self.to_string(value)

Expand Down
4 changes: 2 additions & 2 deletions data_diff/sqeleton/databases/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
DbTime,
DbPath,
Boolean,
JSONType
JSON
)
from ..abcs.mixins import Compilable
from ..abcs.mixins import (
Expand Down Expand Up @@ -260,7 +260,7 @@ def parse_type(
elif issubclass(cls, (Text, Native_UUID)):
return cls()

elif issubclass(cls, JSONType):
elif issubclass(cls, JSON):
return cls()

raise TypeError(f"Parsing {type_repr} returned an unknown type '{cls}'.")
Expand Down
13 changes: 5 additions & 8 deletions data_diff/sqeleton/databases/postgresql.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from ..abcs.database_types import (
DbPath,
JSON,
Timestamp,
TimestampTZ,
Float,
Expand All @@ -11,8 +12,6 @@
FractionalType,
Boolean,
Date,
PostgresqlJSON,
PostgresqlJSONB
)
from ..abcs.mixins import AbstractMixin_MD5, AbstractMixin_NormalizeValue
from .base import BaseDialect, ThreadedDatabase, import_helper, ConnectError, Mixin_Schema
Expand Down Expand Up @@ -51,7 +50,7 @@ def normalize_number(self, value: str, coltype: FractionalType) -> str:
def normalize_boolean(self, value: str, _coltype: Boolean) -> str:
return self.to_string(f"{value}::int")

def normalize_json(self, value: str, _coltype: PostgresqlJSON) -> str:
def normalize_json(self, value: str, _coltype: JSON) -> str:
return f"{value}::text"


Expand Down Expand Up @@ -81,12 +80,10 @@ class PostgresqlDialect(BaseDialect, Mixin_Schema):
"character varying": Text,
"varchar": Text,
"text": Text,
# JSON
"json": PostgresqlJSON,
"jsonb": PostgresqlJSONB,
# UUID

"json": JSON,
"jsonb": JSON,
"uuid": Native_UUID,
# Boolean
"boolean": Boolean,
}

Expand Down
7 changes: 3 additions & 4 deletions data_diff/sqeleton/databases/redshift.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from typing import List, Dict
from ..abcs.database_types import (
Float,
JSON,
TemporalType,
FractionalType,
DbPath,
TimestampTZ,
RedShiftSuper
)
from ..abcs.mixins import AbstractMixin_MD5
from .postgresql import (
Expand Down Expand Up @@ -47,7 +47,7 @@ def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:
def normalize_number(self, value: str, coltype: FractionalType) -> str:
return self.to_string(f"{value}::decimal(38,{coltype.precision})")

def normalize_json(self, value: str, _coltype: RedShiftSuper) -> str:
def normalize_json(self, value: str, _coltype: JSON) -> str:
return f'nvl2({value}, json_serialize({value}), NULL)'


Expand All @@ -57,8 +57,7 @@ class Dialect(PostgresqlDialect):
**PostgresqlDialect.TYPE_CLASSES,
"double": Float,
"real": Float,
# JSON
"super": RedShiftSuper
"super": JSON,
}
SUPPORTS_INDEXES = False

Expand Down
2 changes: 1 addition & 1 deletion data_diff/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def diffs_are_equiv_jsons(diff: list, json_cols: dict):
return False, overriden_diff_cols
match = True
for i, (col_a, col_b) in enumerate(safezip(diff[0][1][1:], diff[1][1][1:])): # index 0 is extra_columns first elem
# we only attempt to parse columns of JSONType, but we still need to check if non-json columns don't match
# we only attempt to parse columns of JSON type, but we still need to check if non-json columns don't match
match = col_a == col_b
if not match and (i in json_cols):
if _jsons_equiv(col_a, col_b):
Expand Down