Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit fedcef6

Browse files
author
Sergey Vasilyev
committed
Rename "type_repr" to "data_type" to align with conventional information schemas
1 parent 9f27117 commit fedcef6

File tree

12 files changed

+36
-36
lines changed

12 files changed

+36
-36
lines changed

data_diff/databases/base.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -711,9 +711,9 @@ def type_repr(self, t) -> str:
711711
def parse_type(self, table_path: DbPath, info: RawColumnInfo) -> ColType:
712712
"Parse type info as returned by the database"
713713

714-
cls = self.TYPE_CLASSES.get(info.type_repr)
714+
cls = self.TYPE_CLASSES.get(info.data_type)
715715
if cls is None:
716-
return UnknownColType(info.type_repr)
716+
return UnknownColType(info.data_type)
717717

718718
if issubclass(cls, TemporalType):
719719
return cls(
@@ -745,7 +745,7 @@ def parse_type(self, table_path: DbPath, info: RawColumnInfo) -> ColType:
745745
elif issubclass(cls, (JSON, Array, Struct, Text, Native_UUID)):
746746
return cls()
747747

748-
raise TypeError(f"Parsing {info.type_repr} returned an unknown type {cls!r}.")
748+
raise TypeError(f"Parsing {info.data_type} returned an unknown type {cls!r}.")
749749

750750
def _convert_db_precision_to_digits(self, p: int) -> int:
751751
"""Convert from binary precision, used by floats, to decimal precision."""
@@ -1024,7 +1024,7 @@ def query_table_schema(self, path: DbPath) -> Dict[str, RawColumnInfo]:
10241024
d = {
10251025
r[0]: RawColumnInfo(
10261026
column_name=r[0],
1027-
type_repr=r[1],
1027+
data_type=r[1],
10281028
datetime_precision=r[2],
10291029
numeric_precision=r[3],
10301030
numeric_scale=r[4],

data_diff/databases/bigquery.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ def type_repr(self, t) -> str:
9595
def parse_type(self, table_path: DbPath, info: RawColumnInfo) -> ColType:
9696
col_type = super().parse_type(table_path, info)
9797
if isinstance(col_type, UnknownColType):
98-
m = self.TYPE_ARRAY_RE.fullmatch(info.type_repr)
98+
m = self.TYPE_ARRAY_RE.fullmatch(info.data_type)
9999
if m:
100100
item_info = attrs.evolve(info, data_type=m.group(1))
101101
item_type = self.parse_type(table_path, item_info)
@@ -106,7 +106,7 @@ def parse_type(self, table_path: DbPath, info: RawColumnInfo) -> ColType:
106106
# - STRUCT<foo INT64, bar STRING(10)> (named)
107107
# - STRUCT<foo INT64, bar ARRAY<INT64>> (with complex fields)
108108
# - STRUCT<foo INT64, bar STRUCT<a INT64, b INT64>> (nested)
109-
m = self.TYPE_STRUCT_RE.fullmatch(info.type_repr)
109+
m = self.TYPE_STRUCT_RE.fullmatch(info.data_type)
110110
if m:
111111
col_type = Struct()
112112

data_diff/databases/clickhouse.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -79,14 +79,14 @@ def _convert_db_precision_to_digits(self, p: int) -> int:
7979

8080
def parse_type(self, table_path: DbPath, info: RawColumnInfo) -> ColType:
8181
nullable_prefix = "Nullable("
82-
if info.type_repr.startswith(nullable_prefix):
83-
info = attrs.evolve(info, data_type=info.type_repr[len(nullable_prefix) :].rstrip(")"))
82+
if info.data_type.startswith(nullable_prefix):
83+
info = attrs.evolve(info, data_type=info.data_type[len(nullable_prefix) :].rstrip(")"))
8484

85-
if info.type_repr.startswith("Decimal"):
85+
if info.data_type.startswith("Decimal"):
8686
info = attrs.evolve(info, data_type="Decimal")
87-
elif info.type_repr.startswith("FixedString"):
87+
elif info.data_type.startswith("FixedString"):
8888
info = attrs.evolve(info, data_type="FixedString")
89-
elif info.type_repr.startswith("DateTime64"):
89+
elif info.data_type.startswith("DateTime64"):
9090
info = attrs.evolve(info, data_type="DateTime64")
9191

9292
return super().parse_type(table_path, info)

data_diff/databases/databricks.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ def query_table_schema(self, path: DbPath) -> Dict[str, RawColumnInfo]:
158158

159159
d = {
160160
r.COLUMN_NAME: RawColumnInfo(
161-
column_name=r.COLUMN_NAME, type_repr=r.TYPE_NAME, datetime_precision=r.DECIMAL_DIGITS
161+
column_name=r.COLUMN_NAME, data_type=r.TYPE_NAME, datetime_precision=r.DECIMAL_DIGITS
162162
)
163163
for r in rows
164164
}
@@ -186,8 +186,8 @@ def _process_table_schema(
186186

187187
resulted_rows = []
188188
for info in col_infos:
189-
row_type = "DECIMAL" if info.type_repr.startswith("DECIMAL") else info.type_repr
190-
info = attrs.evolve(info, type_repr=row_type)
189+
row_type = "DECIMAL" if info.data_type.startswith("DECIMAL") else info.data_type
190+
info = attrs.evolve(info, data_type=row_type)
191191
type_cls = self.dialect.TYPE_CLASSES.get(row_type, UnknownColType)
192192

193193
if issubclass(type_cls, Integer):
@@ -198,7 +198,7 @@ def _process_table_schema(
198198
info = attrs.evolve(info, numeric_precision=numeric_precision)
199199

200200
elif issubclass(type_cls, Decimal):
201-
items = info.type_repr[8:].rstrip(")").split(",")
201+
items = info.data_type[8:].rstrip(")").split(",")
202202
numeric_precision, numeric_scale = int(items[0]), int(items[1])
203203
info = attrs.evolve(
204204
info,

data_diff/databases/duckdb.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ def parse_type(self, table_path: DbPath, info: RawColumnInfo) -> ColType:
8080
r"DECIMAL\((\d+),(\d+)\)": Decimal,
8181
}
8282

83-
for m, t_cls in match_regexps(regexps, info.type_repr):
83+
for m, t_cls in match_regexps(regexps, info.data_type):
8484
precision = int(m.group(2))
8585
return t_cls(precision=precision)
8686

data_diff/databases/oracle.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ def parse_type(self, table_path: DbPath, info: RawColumnInfo) -> ColType:
113113
r"TIMESTAMP\((\d)\)": Timestamp,
114114
}
115115

116-
for m, t_cls in match_regexps(regexps, info.type_repr):
116+
for m, t_cls in match_regexps(regexps, info.data_type):
117117
precision = int(m.group(1))
118118
return t_cls(precision=precision, rounds=self.ROUNDS_ON_PREC_LOSS)
119119

data_diff/databases/presto.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,17 +97,17 @@ def parse_type(self, table_path: DbPath, info: RawColumnInfo) -> ColType:
9797
r"timestamp\((\d)\)": Timestamp,
9898
r"timestamp\((\d)\) with time zone": TimestampTZ,
9999
}
100-
for m, t_cls in match_regexps(timestamp_regexps, info.type_repr):
100+
for m, t_cls in match_regexps(timestamp_regexps, info.data_type):
101101
precision = int(m.group(1))
102102
return t_cls(precision=precision, rounds=self.ROUNDS_ON_PREC_LOSS)
103103

104104
number_regexps = {r"decimal\((\d+),(\d+)\)": Decimal}
105-
for m, n_cls in match_regexps(number_regexps, info.type_repr):
105+
for m, n_cls in match_regexps(number_regexps, info.data_type):
106106
_prec, scale = map(int, m.groups())
107107
return n_cls(scale)
108108

109109
string_regexps = {r"varchar\((\d+)\)": Text, r"char\((\d+)\)": Text}
110-
for m, n_cls in match_regexps(string_regexps, info.type_repr):
110+
for m, n_cls in match_regexps(string_regexps, info.data_type):
111111
return n_cls()
112112

113113
return super().parse_type(table_path, info)

data_diff/databases/redshift.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ def query_svv_columns(self, path: DbPath) -> Dict[str, RawColumnInfo]:
151151
d = {
152152
r[0]: RawColumnInfo(
153153
column_name=r[0],
154-
type_repr=r[1],
154+
data_type=r[1],
155155
datetime_precision=r[2],
156156
numeric_precision=r[3],
157157
numeric_scale=r[4],
@@ -181,7 +181,7 @@ def _normalize_schema_info(self, rows: Iterable[Tuple[Any]]) -> Dict[str, RawCol
181181

182182
schema_dict[col_name] = RawColumnInfo(
183183
column_name=col_name,
184-
type_repr=col_name,
184+
data_type=col_name,
185185
datetime_precision=None,
186186
numeric_precision=precision,
187187
numeric_scale=scale,

data_diff/databases/vertica.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,22 +74,22 @@ def parse_type(self, table_path: DbPath, info: RawColumnInfo) -> ColType:
7474
r"timestamp\(?(\d?)\)?": Timestamp,
7575
r"timestamptz\(?(\d?)\)?": TimestampTZ,
7676
}
77-
for m, t_cls in match_regexps(timestamp_regexps, info.type_repr):
77+
for m, t_cls in match_regexps(timestamp_regexps, info.data_type):
7878
precision = int(m.group(1)) if m.group(1) else 6
7979
return t_cls(precision=precision, rounds=self.ROUNDS_ON_PREC_LOSS)
8080

8181
number_regexps = {
8282
r"numeric\((\d+),(\d+)\)": Decimal,
8383
}
84-
for m, n_cls in match_regexps(number_regexps, info.type_repr):
84+
for m, n_cls in match_regexps(number_regexps, info.data_type):
8585
_prec, scale = map(int, m.groups())
8686
return n_cls(scale)
8787

8888
string_regexps = {
8989
r"varchar\((\d+)\)": Text,
9090
r"char\((\d+)\)": Text,
9191
}
92-
for m, n_cls in match_regexps(string_regexps, info.type_repr):
92+
for m, n_cls in match_regexps(string_regexps, info.data_type):
9393
return n_cls()
9494

9595
return super().parse_type(table_path, info)

data_diff/dbt.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ def _local_diff(
303303
columns_removed = table1_column_names.difference(table2_column_names)
304304
# col type is i = 1 in tuple
305305
columns_type_changed = {
306-
k for k, v in table2_columns.items() if k in table1_columns and v.type_repr != table1_columns[k].type_repr
306+
k for k, v in table2_columns.items() if k in table1_columns and v.data_type != table1_columns[k].data_type
307307
}
308308

309309
if columns_added:

0 commit comments

Comments
 (0)