[API-965] Implement SQL JSON support (#529)

* Implement SQL JSON support This PR adds implementation, tests, code samples, and documentation for the SQL JSON column type. * adress review comments * update limitations section of the sql
hazelcast · Mar 23, 2022 · f8f6162 · f8f6162
1 parent bb5e86d
commit f8f6162
Show file tree

Hide file tree

Showing 7 changed files with 204 additions and 25 deletions.
diff --git a/docs/using_python_client_with_hazelcast.rst b/docs/using_python_client_with_hazelcast.rst
@@ -1572,22 +1572,114 @@ Query parameters have the following benefits:
 Querying JSON Objects
 ~~~~~~~~~~~~~~~~~~~~~
 
-To query JSON objects, you should create an explicit mapping using the
+In Hazelcast, the SQL service supports the following ways of working with
+JSON data:
+
+- ``json``: Maps JSON data to a single column of ``JSON`` type where you can
+  use `JsonPath
+  <https://docs.hazelcast.com/hazelcast/latest/sql/working-with-json#querying-json>`__
+  syntax to query and filter it, including nested levels.
+- ``json-flat``: Maps JSON top-level fields to columns with non-JSON types
+  where you can query only top-level keys.
+
+**json**
+
+To query ``json`` objects, you should create an explicit mapping using the
 `CREATE MAPPING
 <https://docs.hazelcast.com/hazelcast/latest/sql/create-mapping.html>`__
 statement, similar to the example above.
 
 For example, this code snippet creates a mapping to a new map called
-``json_employees``, which stores the JSON values ``name`` and ``salary`` and
-query it:
+``json_employees``, which stores the JSON values as ``HazelcastJsonValue``
+objects and queries it using nested fields, which is not possible with the
+``json-flat`` type:
 
 .. code:: python
 
-    client = hazelcast.HazelcastClient()
+    client.sql.execute(
+        """
+    CREATE OR REPLACE MAPPING json_employees
+    TYPE IMap
+    OPTIONS (
+        'keyFormat' = 'int',
+        'valueFormat' = 'json'
+    )
+        """
+    ).result()
+
+    json_employees = client.get_map("json_employees").blocking()
+
+    json_employees.set(
+        1,
+        HazelcastJsonValue(
+            {
+                "personal": {"name": "John Doe"},
+                "job": {"salary": 60000},
+            }
+        ),
+    )
+
+    json_employees.set(
+        2,
+        HazelcastJsonValue(
+            {
+                "personal": {"name": "Jane Doe"},
+                "job": {"salary": 80000},
+            }
+        ),
+    )
+
+    with client.sql.execute(
+        """
+    SELECT JSON_VALUE(this, '$.personal.name') AS name
+    FROM json_employees
+    WHERE JSON_VALUE(this, '$.job.salary' RETURNING INT) > ?
+        """,
+        75000,
+    ).result() as result:
+        for row in result:
+            print(f"Name: {row['name']}")
+
+The ``json`` data type comes with full support for querying JSON in maps and
+Kafka topics.
+
+
+**JSON Functions**
+
+Hazelcast supports the following functions, which can retrieve JSON data.
+
+- `JSON_QUERY <https://docs.hazelcast.com/hazelcast/latest/sql/functions-and-operators#json_query>`__
+  : Extracts a JSON value from a JSON document or a JSON-formatted string that
+  matches a given JsonPath expression.
+
+- `JSON_VALUE <https://docs.hazelcast.com/hazelcast/latest/sql/functions-and-operators#json_value>`__
+  : Extracts a primitive value, such as a string, number, or boolean that
+  matches a given JsonPath expression. This function returns ``NULL`` if a
+  non-primitive value is matched, unless the ``ON ERROR`` behavior is changed.
+
+- `JSON_ARRAY <https://docs.hazelcast.com/hazelcast/latest/sql/functions-and-operators#json_array>`__
+  : Returns a JSON array from a list of input data.
+
+- `JSON_OBJECT <https://docs.hazelcast.com/hazelcast/latest/sql/functions-and-operators#json_object>`__
+  : Returns a JSON object from the given key/value pairs.
+
+**json-flat**
+
+To query ``json-flat`` objects, you should create an explicit mapping using the
+`CREATE MAPPING
+<https://docs.hazelcast.com/hazelcast/latest/sql/create-mapping.html>`__
+statement, similar to the example above.
+
+For example, this code snippet creates a mapping to a new map called
+``json_flat_employees``, which stores the JSON values with columns ``name``
+and ``salary`` as ``HazelcastJsonValue`` objects and queries it using
+top-level fields:
+
+.. code:: python
 
     client.sql.execute(
         """
-    CREATE MAPPING json_employees (
+    CREATE OR REPLACE MAPPING json_flat_employees (
         __key INT,
         name VARCHAR,
         salary INT
@@ -1600,9 +1692,9 @@ query it:
         """
     ).result()
 
-    json_employees = client.get_map("json_employees").blocking()
+    json_flat_employees = client.get_map("json_flat_employees").blocking()
 
-    json_employees.set(
+    json_flat_employees.set(
         1,
         HazelcastJsonValue(
             {
@@ -1612,7 +1704,7 @@ query it:
         ),
     )
 
-    json_employees.set(
+    json_flat_employees.set(
         2,
         HazelcastJsonValue(
             {
@@ -1623,17 +1715,27 @@ query it:
     )
 
     with client.sql.execute(
-        """
-    SELECT __key AS employee_id,
-           name,
-           salary
-    FROM   json_employees
-    WHERE  salary > ?
-        """,
-        75000,
+            """
+    SELECT name
+    FROM json_flat_employees
+    WHERE salary > ?
+            """,
+            75000,
     ).result() as result:
         for row in result:
-            print(row["employee_id"], row["name"], row["salary"])
+            print(f"Name: {row['name']}")
+
+Note that, in ``json-flat`` type, top-level columns must be explicitly
+specified while creating the mapping.
+
+The ``json-flat`` format comes with partial support for querying JSON in maps,
+Kafka topics, and files.
+
+For more information about working with JSON using SQL see
+`Working with JSON
+<https://docs.hazelcast.com/hazelcast/latest/sql/working-with-json>`__
+in Hazelcast reference manual.
+
 
 SQL Statements
 ~~~~~~~~~~~~~~
@@ -1706,6 +1808,7 @@ TIME                     datetime.time
 TIMESTAMP                datetime.datetime
 TIMESTAMP_WITH_TIME_ZONE datetime.datetime (with non-None tzinfo)
 OBJECT                   Any Python type
+JSON                     HazelcastJsonValue
 ======================== ========================================
 
 Functions and Operators
@@ -1741,15 +1844,8 @@ future releases.
 - You cannot run SQL queries on lite members.
 - The only supported Hazelcast data structure is map. You cannot query other
   data structures such as replicated maps.
-- No support for the ``CREATE INDEX`` statement. To create indexes for maps in
-  Hazelcast, see the :func:`add_index() <hazelcast.proxy.map.Map.add_index>`
-  method.
-- No support for the ``JSON`` type. You can’t use functions such as
-  ``JSON_VALUE`` or ``JSON_QUERY``.
 - Limited support for joins. See `Join Tables
   <https://docs.hazelcast.com/hazelcast/latest/sql/select.html#join-tables>`__.
-- No support for window functions. You cannot group or aggregate results in
-  streaming queries.
 
 Distributed Query
 -----------------

diff --git a/examples/sql/sql_json_example.py b/examples/sql/sql_json_example.py
@@ -0,0 +1,35 @@
+import hazelcast
+
+from hazelcast.core import HazelcastJsonValue
+
+client = hazelcast.HazelcastClient()
+employees = client.get_map("employees").blocking()
+
+# Populate some data
+employees.put(0, HazelcastJsonValue('{"name": "Alice", "age": 32}'))
+employees.put(1, HazelcastJsonValue('{"name": "John", "age": 42}'))
+employees.put(2, HazelcastJsonValue('{"name": "Jake", "age": 18}'))
+
+# Create mapping for the employees map. This needs to be done only once per map.
+client.sql.execute(
+    """
+CREATE OR REPLACE MAPPING employees
+TYPE IMap
+OPTIONS (
+    'keyFormat' = 'int',
+    'valueFormat' = 'json'
+)
+    """
+).result()
+
+# Select the names of employees older than 25
+result = client.sql.execute(
+    """
+SELECT JSON_VALUE(this, '$.name') AS name
+FROM employees
+WHERE JSON_VALUE(this, '$.age' RETURNING INT) > 25
+    """
+).result()
+
+for row in result:
+    print(f"Name: {row['name']}")
diff --git a/hazelcast/protocol/builtin.py b/hazelcast/protocol/builtin.py
@@ -745,9 +745,16 @@ def decode(msg):
                 columns[i] = column
             elif column_type_id == SqlColumnType.OBJECT:
                 columns[i] = ListMultiFrameCodec.decode_contains_nullable(msg, DataCodec.decode)
+            elif column_type_id == SqlColumnType.JSON:
+                columns[i] = ListMultiFrameCodec.decode_contains_nullable(
+                    msg, HazelcastJsonValueCodec.decode
+                )
             else:
                 raise ValueError("Unknown type %s" % column_type_id)
 
         CodecUtil.fast_forward_to_end_frame(msg)
 
         return _SqlPage(column_type_ids, columns, is_last)
+
+
+from hazelcast.protocol.codec.custom.hazelcast_json_value_codec import HazelcastJsonValueCodec
diff --git a/hazelcast/protocol/codec/custom/hazelcast_json_value_codec.py b/hazelcast/protocol/codec/custom/hazelcast_json_value_codec.py
@@ -0,0 +1,22 @@
+from hazelcast.protocol.builtin import CodecUtil
+from hazelcast.protocol.client_message import END_FRAME_BUF, END_FINAL_FRAME_BUF, BEGIN_FRAME_BUF
+from hazelcast.protocol.builtin import StringCodec
+from hazelcast.core import HazelcastJsonValue
+
+
+class HazelcastJsonValueCodec:
+    @staticmethod
+    def encode(buf, hazelcast_json_value, is_final=False):
+        buf.extend(BEGIN_FRAME_BUF)
+        StringCodec.encode(buf, hazelcast_json_value.value)
+        if is_final:
+            buf.extend(END_FINAL_FRAME_BUF)
+        else:
+            buf.extend(END_FRAME_BUF)
+
+    @staticmethod
+    def decode(msg):
+        msg.next_frame()
+        value = StringCodec.decode(msg)
+        CodecUtil.fast_forward_to_end_frame(msg)
+        return HazelcastJsonValue(value)
diff --git a/hazelcast/sql.py b/hazelcast/sql.py
@@ -388,6 +388,11 @@ class SqlColumnType:
     The only valid value of ``NULL`` type is ``None``.
     """
 
+    JSON = 15
+    """
+    Represented by :class:`hazelcast.core.HazelcastJsonValue`.
+    """
+
 
 class _SqlErrorCode:
 

diff --git a/start_rc.py b/start_rc.py
@@ -3,7 +3,7 @@
 import sys
 from os.path import isfile
 
-SERVER_VERSION = "5.0"
+SERVER_VERSION = "5.1"
 RC_VERSION = "0.8-SNAPSHOT"
 
 RELEASE_REPO = "https://repo1.maven.apache.org/maven2"

diff --git a/tests/integration/backward_compatible/sql_test.py b/tests/integration/backward_compatible/sql_test.py
@@ -6,6 +6,7 @@
 import unittest
 
 from hazelcast import HazelcastClient
+from hazelcast.core import HazelcastJsonValue
 from hazelcast.future import ImmediateFuture
 from hazelcast.serialization.api import Portable
 from tests.base import SingleMemberTestCase, HazelcastTestCase
@@ -793,6 +794,19 @@ def test_null_only_column(self):
         )
         self._validate_result(result, SqlColumnType.INTEGER, lambda _: None)
 
+    def test_json(self):
+        skip_if_client_version_older_than(self, "5.1")
+        skip_if_server_version_older_than(self, self.client, "5.1")
+
+        def value_factory(key):
+            return HazelcastJsonValue({"key": key})
+
+        self._create_mapping("JSON")
+        self._populate_map(value_factory=value_factory)
+
+        result = self.execute(f"SELECT __key, this FROM {self.map_name}")
+        self._validate_result(result, SqlColumnType.JSON, value_factory)
+
     def _validate_rows(self, expected_type, value_factory=lambda key: key):
         result = self.execute('SELECT __key, this FROM "%s"' % self.map_name)
         self._validate_result(result, expected_type, value_factory)