Skip to content

Commit

Permalink
fix(duckdb): allow setting auto_detect to False by fixing transla…
Browse files Browse the repository at this point in the history
…tion of columns argument (#10065)
  • Loading branch information
cpcloud committed Sep 9, 2024
1 parent a121ab3 commit 883d2d3
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 1 deletion.
23 changes: 22 additions & 1 deletion ibis/backends/duckdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -600,6 +600,7 @@ def read_json(
self,
source_list: str | list[str] | tuple[str],
table_name: str | None = None,
columns: Mapping[str, str] | None = None,
**kwargs,
) -> ir.Table:
"""Read newline-delimited JSON into an ibis table.
Expand All @@ -614,8 +615,13 @@ def read_json(
File or list of files
table_name
Optional table name
columns
Optional mapping from string column name to duckdb type string.
**kwargs
Additional keyword arguments passed to DuckDB's `read_json_auto` function
Additional keyword arguments passed to DuckDB's `read_json_auto` function.
See https://duckdb.org/docs/data/json/overview.html#json-loading
for parameters and more information about reading JSON.
Returns
-------
Expand All @@ -630,6 +636,21 @@ def read_json(
sg.to_identifier(key).eq(sge.convert(val)) for key, val in kwargs.items()
]

if columns:
options.append(
sg.to_identifier("columns").eq(
sge.Struct.from_arg_list(
[
sge.PropertyEQ(
this=sg.to_identifier(key),
expression=sge.convert(value),
)
for key, value in columns.items()
]
)
)
)

self._create_temp_view(
table_name,
sg.select(STAR).from_(
Expand Down
13 changes: 13 additions & 0 deletions ibis/backends/duckdb/tests/test_register.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,3 +505,16 @@ def test_memtable_null_column_parquet_dtype_roundtrip(con, tmp_path):
after = con.read_parquet(tmp_path / "tmp.parquet")

assert before.a.type() == after.a.type()


def test_read_json_no_auto_detection(con, tmp_path):
ndjson_data = """
{"year": 2007}
{"year": 2008}
{"year": 2009}
"""
path = tmp_path.joinpath("test.ndjson")
path.write_text(ndjson_data)

t = con.read_json(path, auto_detect=False, columns={"year": "varchar"})
assert t.year.type() == dt.string

0 comments on commit 883d2d3

Please sign in to comment.