From d17b7b8e1a12c1536f0d93e4caaa90585f4e9ed4 Mon Sep 17 00:00:00 2001 From: Roland Walker Date: Wed, 9 Jul 2025 16:51:04 -0400 Subject: [PATCH] Add two JSON-based output formats * jsonl: JSONlines format: in which each row is represented on a line as a JSON object, with the column names recapitulated on every line as the property names. * jsonl_escaped: like jsonl, except that JSON escaping is applied for non-ASCII characters. Most users will want jsonl. The implementation file has the generic name json_output_adapter.py in case other JSONish forms are desired. This output format can be combined with jq redirection in mycli: * https://github.com/dbcli/mycli/pull/1248 --- CHANGELOG | 1 + .../tabular_output/json_output_adapter.py | 27 +++++++++++++ .../test_json_output_adapter.py | 40 +++++++++++++++++++ 3 files changed, 68 insertions(+) create mode 100644 cli_helpers/tabular_output/json_output_adapter.py create mode 100644 tests/tabular_output/test_json_output_adapter.py diff --git a/CHANGELOG b/CHANGELOG index 3d80a69..54331eb 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,7 @@ # Changelog - Added noheader CSV and TSV output formats. +- Added `jsonl` and `jsonl_escaped` output formats. ## Version 2.4.0 diff --git a/cli_helpers/tabular_output/json_output_adapter.py b/cli_helpers/tabular_output/json_output_adapter.py new file mode 100644 index 0000000..8176fce --- /dev/null +++ b/cli_helpers/tabular_output/json_output_adapter.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- +"""A JSON data output adapter""" + +from itertools import chain +import json + +from .preprocessors import bytes_to_string, override_missing_value, convert_to_string + +supported_formats = ("jsonl", "jsonl_escaped") +preprocessors = (override_missing_value, bytes_to_string, convert_to_string) + + +def adapter(data, headers, table_format="jsonl", **_kwargs): + """Wrap the formatting inside a function for TabularOutputFormatter.""" + if table_format == "jsonl": + ensure_ascii = False + elif table_format == "jsonl_escaped": + ensure_ascii = True + else: + raise ValueError("Invalid table_format specified.") + + for row in chain(data): + yield json.dumps( + dict(zip(headers, row, strict=True)), + separators=(",", ":"), + ensure_ascii=ensure_ascii, + ) diff --git a/tests/tabular_output/test_json_output_adapter.py b/tests/tabular_output/test_json_output_adapter.py new file mode 100644 index 0000000..d0bd202 --- /dev/null +++ b/tests/tabular_output/test_json_output_adapter.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- +"""Test the json output adapter.""" + +from __future__ import unicode_literals + +from cli_helpers.tabular_output import json_output_adapter + + +def test_jsonl_wrapper(): + """Test the jsonl output adapter.""" + # Test jsonl output. + data = [["ab\r\nc", 1], ["d", 456]] + headers = ["letters", "number"] + output = json_output_adapter.adapter(iter(data), headers, table_format="jsonl") + assert ( + "\n".join(output) + == """{"letters":"ab\\r\\nc","number":1}\n{"letters":"d","number":456}""" + ) + + +def test_unicode_with_jsonl(): + """Test that the jsonl wrapper can pass through non-ascii characters.""" + data = [["观音", 1], ["Ποσειδῶν", 456]] + headers = ["letters", "number"] + output = json_output_adapter.adapter(data, headers, table_format="jsonl") + assert ( + "\n".join(output) + == """{"letters":"观音","number":1}\n{"letters":"Ποσειδῶν","number":456}""" + ) + + +def test_unicode_with_jsonl_esc(): + """Test that the jsonl_escaped wrapper JSON-escapes non-ascii characters.""" + data = [["观音", 1], ["Ποσειδῶν", 456]] + headers = ["letters", "number"] + output = json_output_adapter.adapter(data, headers, table_format="jsonl_escaped") + assert ( + "\n".join(output) + == """{"letters":"\\u89c2\\u97f3","number":1}\n{"letters":"\\u03a0\\u03bf\\u03c3\\u03b5\\u03b9\\u03b4\\u1ff6\\u03bd","number":456}""" + )