Skip to content

Commit

Permalink
feat(api): add JSON __getitem__ operation
Browse files Browse the repository at this point in the history
  • Loading branch information
cpcloud authored and kszucs committed Sep 20, 2022
1 parent e629633 commit 3e2efb4
Show file tree
Hide file tree
Showing 19 changed files with 179 additions and 12 deletions.
10 changes: 10 additions & 0 deletions ci/schema/duckdb.sql
Expand Up @@ -93,3 +93,13 @@ INSERT INTO struct VALUES
({'a': 2.0, 'b': NULL, 'c': 3}),
(NULL),
({'a': 3.0, 'b': 'orange', 'c': NULL});

CREATE OR REPLACE TABLE json_t (js JSON);

INSERT INTO json_t VALUES
('{"a": [1,2,3,4], "b": 1}'),
('{"a":null,"b":2}'),
('{"a":"foo", "c":null}'),
('null'),
('[42,47,55]'),
('[]');
12 changes: 12 additions & 0 deletions ci/schema/mysql.sql
Expand Up @@ -72,3 +72,15 @@ CREATE TABLE functional_alltypes (
) DEFAULT CHARACTER SET = utf8;

CREATE INDEX `ix_functional_alltypes_index` ON functional_alltypes (`index`);

DROP TABLE IF EXISTS json_t CASCADE;

CREATE TABLE IF NOT EXISTS json_t (js JSON);

INSERT INTO json_t VALUES
('{"a": [1,2,3,4], "b": 1}'),
('{"a":null,"b":2}'),
('{"a":"foo", "c":null}'),
('null'),
('[42,47,55]'),
('[]');
12 changes: 12 additions & 0 deletions ci/schema/postgresql.sql
Expand Up @@ -183,3 +183,15 @@ CREATE INDEX IF NOT EXISTS idx_geo_geo_linestring ON geo USING GIST (geo_linestr
CREATE INDEX IF NOT EXISTS idx_geo_geo_multipolygon ON geo USING GIST (geo_multipolygon);
CREATE INDEX IF NOT EXISTS idx_geo_geo_point ON geo USING GIST (geo_point);
CREATE INDEX IF NOT EXISTS idx_geo_geo_polygon ON geo USING GIST (geo_polygon);

DROP TABLE IF EXISTS json_t CASCADE;

CREATE TABLE IF NOT EXISTS json_t (js JSON);

INSERT INTO json_t VALUES
('{"a": [1,2,3,4], "b": 1}'),
('{"a":null,"b":2}'),
('{"a":"foo", "c":null}'),
('null'),
('[42,47,55]'),
('[]');
12 changes: 12 additions & 0 deletions ci/schema/sqlite.sql
Expand Up @@ -73,3 +73,15 @@ CREATE TABLE diamonds (
y FLOAT,
z FLOAT
);

DROP TABLE IF EXISTS json_t;

CREATE TABLE IF NOT EXISTS json_t (js JSON);

INSERT INTO json_t VALUES
('{"a": [1,2,3,4], "b": 1}'),
('{"a":null,"b":2}'),
('{"a":"foo", "c":null}'),
('null'),
('[42,47,55]'),
('[]');
3 changes: 3 additions & 0 deletions ibis/backends/clickhouse/tests/conftest.py
Expand Up @@ -27,6 +27,7 @@ class TestConf(UnorderedComparator, BackendTest, RoundHalfToEven):
supported_to_timestamp_units = {'s'}
supports_floating_modulus = False
bool_is_int = True
supports_json = False

@staticmethod
def _load_data(
Expand Down Expand Up @@ -60,6 +61,8 @@ def _load_data(
client.execute(f"DROP DATABASE IF EXISTS {database}")
client.execute(f"CREATE DATABASE {database} ENGINE = Atomic")
client.execute(f"USE {database}")
client.execute("SET allow_experimental_object_type = 1")
client.execute("SET output_format_json_named_tuples_as_objects = 1")

with open(script_dir / 'schema' / 'clickhouse.sql') as schema:
for stmt in filter(None, map(str.strip, schema.read().split(";"))):
Expand Down
5 changes: 5 additions & 0 deletions ibis/backends/conftest.py
Expand Up @@ -576,6 +576,11 @@ def alltypes(backend):
return backend.functional_alltypes


@pytest.fixture(scope="session")
def json_t(backend):
return backend.json_t


@pytest.fixture(scope='session')
def struct(backend):
return backend.struct
Expand Down
12 changes: 12 additions & 0 deletions ibis/backends/dask/tests/conftest.py
Expand Up @@ -46,6 +46,18 @@ def connect(data_directory: Path):
pd.read_csv(str(data_directory / 'awards_players.csv')),
npartitions=NPARTITIONS,
),
'json_t': pd.DataFrame(
{
"js": [
'{"a": [1,2,3,4], "b": 1}',
'{"a":null,"b":2}',
'{"a":"foo", "c":null}',
"null",
"[42,47,55]",
"[]",
]
}
),
}
)

Expand Down
1 change: 1 addition & 0 deletions ibis/backends/datafusion/tests/conftest.py
Expand Up @@ -16,6 +16,7 @@ class TestConf(BackendTest, RoundAwayFromZero):
# returned_timestamp_unit = 'ns'
bool_is_int = True
supports_structs = False
supports_json = False

@staticmethod
def connect(data_directory: Path):
Expand Down
1 change: 1 addition & 0 deletions ibis/backends/impala/tests/conftest.py
Expand Up @@ -33,6 +33,7 @@ class TestConf(UnorderedComparator, BackendTest, RoundAwayFromZero):
supports_divide_by_zero = True
returned_timestamp_unit = 's'
supports_structs = False
supports_json = False

@staticmethod
def _load_data(data_dir: Path, script_dir: Path, **_: Any) -> None:
Expand Down
2 changes: 2 additions & 0 deletions ibis/backends/mysql/tests/conftest.py
@@ -1,3 +1,5 @@
from __future__ import annotations

import os
from pathlib import Path
from typing import Any
Expand Down
12 changes: 12 additions & 0 deletions ibis/backends/pandas/tests/conftest.py
Expand Up @@ -46,6 +46,18 @@ def connect(data_directory: Path):
]
}
),
'json_t': pd.DataFrame(
{
"js": [
'{"a": [1,2,3,4], "b": 1}',
'{"a":null,"b":2}',
'{"a":"foo", "c":null}',
"null",
"[42,47,55]",
"[]",
]
}
),
'array_types': pd.DataFrame(
[
(
Expand Down
5 changes: 2 additions & 3 deletions ibis/backends/postgres/tests/test_json.py
Expand Up @@ -9,11 +9,10 @@

@pytest.mark.parametrize('data', [param({'status': True}, id='status')])
def test_json(data, alltypes):
json_value = json.dumps(data)
lit = ibis.literal(json_value, type='json').name('tmp')
lit = ibis.literal(json.dumps(data), type='json').name('tmp')
expr = alltypes[[alltypes.id, lit]].head(1)
df = expr.execute()
assert df['tmp'].iloc[0] == json_value
assert df['tmp'].iloc[0] == data


@pytest.mark.parametrize('data', [param({'status': True}, id='status')])
Expand Down
18 changes: 18 additions & 0 deletions ibis/backends/pyspark/tests/conftest.py
@@ -1,3 +1,5 @@
from __future__ import annotations

import os
from datetime import datetime, timezone

Expand Down Expand Up @@ -228,6 +230,22 @@ def get_common_spark_testing_client(data_directory, connect):
)
df_udf_random.createOrReplaceTempView('udf_random')

df_json_t = s.createDataFrame(
pd.DataFrame(
{
"js": [
'{"a": [1,2,3,4], "b": 1}',
'{"a":null,"b":2}',
'{"a":"foo", "c":null}',
"null",
"[42,47,55]",
"[]",
]
}
)
)
df_json_t.createOrReplaceTempView("json_t")

return _spark_testing_client


Expand Down
10 changes: 10 additions & 0 deletions ibis/backends/tests/base.py
Expand Up @@ -72,6 +72,7 @@ class BackendTest(abc.ABC):
supports_floating_modulus = True
bool_is_int = False
supports_structs = True
supports_json = True

def __init__(self, data_directory: Path) -> None:
self.connection = self.connect(data_directory)
Expand Down Expand Up @@ -181,6 +182,15 @@ def struct(self) -> Optional[ir.Table]:
f"{self.name()} backend does not support struct types"
)

@property
def json_t(self) -> Optional[ir.Table]:
from ibis import _

if self.supports_json:
return self.connection.table("json_t").mutate(js=_.js.cast("json"))
else:
pytest.xfail(f"{self.name()} backend does not support json types")

@property
def api(self):
return self.connection
Expand Down
39 changes: 39 additions & 0 deletions ibis/backends/tests/test_json.py
@@ -0,0 +1,39 @@
"""Tests for JSON operations."""

import pandas as pd
import pytest
from pytest import param


@pytest.mark.notimpl(["datafusion", "pyspark"])
@pytest.mark.notyet(["clickhouse"], reason="upstream is broken")
@pytest.mark.never(["impala"], reason="doesn't support JSON and never will")
@pytest.mark.parametrize(
("expr_fn", "expected"),
[
param(
lambda t: t.js["a"].name("res"),
pd.Series(
[[1, 2, 3, 4], None, "foo", None, None, None],
name="res",
dtype="object",
),
id="getitem_object",
marks=[pytest.mark.min_server_version(sqlite="3.38.0")],
),
param(
lambda t: t.js[1].name("res"),
pd.Series(
[None, None, None, None, 47, None],
dtype="object",
name="res",
),
marks=[pytest.mark.min_server_version(sqlite="3.38.0")],
id="getitem_array",
),
],
)
def test_json_getitem(backend, json_t, expr_fn, expected):
expr = expr_fn(json_t)
result = expr.execute()
backend.assert_series_equal(result, expected)
1 change: 1 addition & 0 deletions ibis/expr/operations/__init__.py
Expand Up @@ -4,6 +4,7 @@
from ibis.expr.operations.generic import * # noqa: F401,F403
from ibis.expr.operations.geospatial import * # noqa: F401,F403
from ibis.expr.operations.histograms import * # noqa: F401,F403
from ibis.expr.operations.json import * # noqa: F401,F403
from ibis.expr.operations.logical import * # noqa: F401,F403
from ibis.expr.operations.maps import * # noqa: F401,F403
from ibis.expr.operations.numeric import * # noqa: F401,F403
Expand Down
15 changes: 15 additions & 0 deletions ibis/expr/operations/json.py
@@ -0,0 +1,15 @@
from public import public

import ibis.expr.datatypes as dt
import ibis.expr.rules as rlz
from ibis.expr.operations import Value


@public
class JSONGetItem(Value):
arg = rlz.json

index = rlz.one_of((rlz.string, rlz.integer))

output_dtype = dt.json
output_shape = rlz.shape_like("args")
1 change: 1 addition & 0 deletions ibis/expr/rules.py
Expand Up @@ -292,6 +292,7 @@ def column(inner, arg, **kwargs):
timestamp = value(dt.Timestamp)
category = value(dt.category)
temporal = one_of([timestamp, date, time])
json = value(dt.json)

strict_numeric = one_of([integer, floating, decimal])
soft_numeric = one_of([integer, floating, decimal, boolean])
Expand Down
20 changes: 11 additions & 9 deletions ibis/expr/types/json.py
@@ -1,34 +1,36 @@
from public import public

from ibis.expr.types.binary import BinaryColumn, BinaryScalar, BinaryValue
from ibis.expr.types.strings import StringColumn, StringScalar, StringValue
from ibis.expr.types import Column, Scalar, Value


@public
class JSONValue(StringValue):
pass # noqa: E701,E302
class JSONValue(Value):
def __getitem__(self, key):
import ibis.expr.operations as ops

return ops.JSONGetItem(self, key).to_expr()


@public
class JSONScalar(StringScalar, JSONValue):
class JSONScalar(Scalar, JSONValue):
pass # noqa: E701,E302


@public
class JSONColumn(StringColumn, JSONValue):
class JSONColumn(Column, JSONValue):
pass # noqa: E701,E302


@public
class JSONBValue(BinaryValue):
class JSONBValue(Value):
pass # noqa: E701,E302


@public
class JSONBScalar(BinaryScalar, JSONBValue):
class JSONBScalar(Scalar, JSONBValue):
pass # noqa: E701,E302


@public
class JSONBColumn(BinaryColumn, JSONBValue):
class JSONBColumn(Column, JSONBValue):
pass # noqa: E701,E302

0 comments on commit 3e2efb4

Please sign in to comment.