From 2443caa2b6bc2bfb998c6cdfc87304f92152d90d Mon Sep 17 00:00:00 2001 From: Doug Beatty Date: Thu, 23 Feb 2023 17:05:39 -0700 Subject: [PATCH] Unify output formats --- data_diff/__main__.py | 52 ++++++++++++++++++++++++------------------- data_diff/dbt.py | 25 ++------------------- 2 files changed, 31 insertions(+), 46 deletions(-) diff --git a/data_diff/__main__.py b/data_diff/__main__.py index 61cf1518..6f2be411 100644 --- a/data_diff/__main__.py +++ b/data_diff/__main__.py @@ -259,11 +259,13 @@ def main(conf, run, **kw): try: if kw["dbt"]: - dbt_diff( + diff = dbt_diff( profiles_dir_override=kw["dbt_profiles_dir"], project_dir_override=kw["dbt_project_dir"], is_cloud=kw["cloud"], ) + render_diff(diff, kw["limit"], kw["stats"], kw["json_output"]) + else: return _data_diff(**kw) except Exception as e: @@ -272,6 +274,31 @@ def main(conf, run, **kw): raise +def render_diff(diff_iter, limit, stats, json_output): + if limit: + assert not stats + diff_iter = islice(diff_iter, int(limit)) + + if stats: + if json_output: + rich.print(json.dumps(diff_iter.get_stats_dict())) + else: + rich.print(diff_iter.get_stats_string()) + + else: + for op, values in diff_iter: + color = COLOR_SCHEME[op] + + if json_output: + jsonl = json.dumps([op, list(values)]) + rich.print(f"[{color}]{jsonl}[/{color}]") + else: + text = f"{op} {', '.join(map(str, values))}" + rich.print(f"[{color}]{text}[/{color}]") + + sys.stdout.flush() + + def _data_diff( database1, table1, @@ -444,28 +471,7 @@ def _data_diff( diff_iter = differ.diff_tables(*segments) - if limit: - assert not stats - diff_iter = islice(diff_iter, int(limit)) - - if stats: - if json_output: - rich.print(json.dumps(diff_iter.get_stats_dict())) - else: - rich.print(diff_iter.get_stats_string()) - - else: - for op, values in diff_iter: - color = COLOR_SCHEME[op] - - if json_output: - jsonl = json.dumps([op, list(values)]) - rich.print(f"[{color}]{jsonl}[/{color}]") - else: - text = f"{op} {', '.join(map(str, values))}" - rich.print(f"[{color}]{text}[/{color}]") - - sys.stdout.flush() + render_diff(diff_iter, limit, stats, json_output) end = time.monotonic() diff --git a/data_diff/dbt.py b/data_diff/dbt.py index 30a4e972..3ed9876b 100644 --- a/data_diff/dbt.py +++ b/data_diff/dbt.py @@ -73,7 +73,7 @@ def dbt_diff( ) if not is_cloud and len(diff_vars.primary_keys) == 1: - _local_diff(diff_vars) + return _local_diff(diff_vars) elif not is_cloud: rich.print( "[red]" @@ -152,28 +152,7 @@ def _local_diff(diff_vars: DiffVars) -> None: extra_columns = tuple(mutual_set) diff = diff_tables(table1, table2, threaded=True, algorithm=Algorithm.JOINDIFF, extra_columns=extra_columns) - - if list(diff): - rich.print( - "[red]" - + dev_qualified_string - + " <> " - + prod_qualified_string - + "[/] \n" - + column_diffs_str - + diff.get_stats_string() - + "\n" - ) - else: - rich.print( - "[red]" - + dev_qualified_string - + " <> " - + prod_qualified_string - + "[/] \n" - + column_diffs_str - + "[green]No row differences[/] \n" - ) + return diff def _cloud_diff(diff_vars: DiffVars) -> None: