From e007f0f3eb30852bb24b9d094c4491cc034454ab Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Sat, 1 Jun 2024 23:46:10 -0400 Subject: [PATCH 1/8] feat: support JSON schema Signed-off-by: Jinzhe Zeng --- README.md | 1 + dargs/json_schema.py | 149 ++++++++++++++++++++++++++++++++++++++ docs/json_schema.md | 32 ++++++++ pyproject.toml | 1 + tests/dpmdargs.py | 9 +++ tests/test_json_schema.py | 14 ++++ 6 files changed, 206 insertions(+) create mode 100644 dargs/json_schema.py create mode 100644 docs/json_schema.md create mode 100644 tests/test_json_schema.py diff --git a/README.md b/README.md index d25a288..ec0d053 100644 --- a/README.md +++ b/README.md @@ -30,3 +30,4 @@ Please refer to test files for detailed usage. - [PEP 484](https://peps.python.org/pep-0484/) type annotations - Native integration with [Sphinx](https://github.com/sphinx-doc/sphinx), [DP-GUI](https://github.com/deepmodeling/dpgui), and [Jupyter Notebook](https://jupyter.org/) - JSON encoder for `Argument` and `Variant` classes +- Generate [JSON schema](https://json-schema.org/) from an `Argument`, which can be further integrated with JSON editors such as [Visual Studio Code](https://code.visualstudio.com/) diff --git a/dargs/json_schema.py b/dargs/json_schema.py new file mode 100644 index 0000000..ce05ee1 --- /dev/null +++ b/dargs/json_schema.py @@ -0,0 +1,149 @@ +"""Generate JSON schema from a given dargs.Argument.""" + +from dargs.dargs import _Flags, Argument + +try: + from typing import get_origin +except ImportError: + from typing_extensions import get_origin + + +def generate_json_schema(argument: Argument, id: str = "") -> dict: + """Generate JSON schema from a given dargs.Argument. + + Parameters + ---------- + argument : Argument + The argument to generate JSON schema. + id : str, optional + The URL of the schema, by default "". + + Returns + ------- + dict + The JSON schema. Use :func:`json.dump` to save it to a file + or :func:`json.dumps` to get a string. + + Examples + -------- + Dump the JSON schema of DeePMD-kit to a file: + + >>> from dargs.json_schema import generate_json_schema + >>> from deepmd.utils.argcheck import gen_args + >>> import json + >>> from dargs import Argument + >>> a = Argument("DeePMD-kit", dtype=dict, sub_fields=gen_args()) + >>> schema = generate_json_schema(a) + >>> with open("deepmd.json", "w") as f: + ... json.dump(schema, f, indent=2) + """ + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": id, + "title": argument.name, + **_convert_single_argument(argument), + } + return schema + + +def _convert_single_argument(argument: Argument) -> dict: + """Convert a single argument to JSON schema. + + Parameters + ---------- + argument : Argument + The argument to convert. + + Returns + ------- + dict + The JSON schema of the argument. + """ + data = { + "description": argument.doc, + "type": list({_convert_types(tt) for tt in argument.dtype}), + } + if argument.default is not _Flags.NONE: + data["default"] = argument.default + properties = { + **{ + nn: _convert_single_argument(aa) + for aa in argument.sub_fields.values() + for nn in (aa.name, *aa.alias) + }, + **{ + vv.flag_name: { + "type": "string", + "enum": list(vv.choice_dict.keys()), + "default": vv.default_tag, + "description": vv.doc, + } + for vv in argument.sub_variants.values() + }, + } + required = [ + aa.name + for aa in argument.sub_fields.values() + if not aa.optional and not aa.alias + ] + [vv.flag_name for vv in argument.sub_variants.values() if not vv.optional] + allof = [ + { + "if": { + "properties": {vv.flag_name: {"const": kk}}, + "required": [vv.flag_name] + if not (vv.optional and vv.default_tag == kk) + else [], + }, + "then": _convert_single_argument(aa), + } + for vv in argument.sub_variants.values() + for kk, aa in vv.choice_dict.items() + ] + allof += [ + {"oneOf": [{"required": [nn]} for nn in (aa.name, *aa.alias)]} + for aa in argument.sub_fields.values() + if not aa.optional and aa.alias + ] + if not argument.repeat: + data["properties"] = properties + data["required"] = required + if allof: + data["allOf"] = allof + else: + data["items"] = { + "type": "object", + "properties": properties, + "required": required, + } + if allof: + data["items"]["allOf"] = allof + return data + + +def _convert_types(T: type) -> str: + """Convert a type to JSON schema type. + + Parameters + ---------- + T : type + The type to convert. + + Returns + ------- + str + The JSON schema type. + """ + # string, number, integer, object, array, boolean, null + if T is None or T is type(None): + return "null" + elif T is str: + return "string" + elif T in (int, float): + return "number" + elif T is bool: + return "boolean" + elif T is list or get_origin(T) is list: + return "array" + elif T is dict or get_origin(T) is dict: + return "object" + raise ValueError(f"Unknown type: {T}") diff --git a/docs/json_schema.md b/docs/json_schema.md new file mode 100644 index 0000000..40aa1ee --- /dev/null +++ b/docs/json_schema.md @@ -0,0 +1,32 @@ +## Generate JSON schema from an argument + +One can use {func}`dargs.json_schema_generate_json_schema` to generate [JSON schema](https://json-schema.org/). + +```py +import json + +from dargs import Argument +from dargs.json_schema import generate_json_schema +from deepmd.utils.argcheck import gen_args + + +a = Argument("DeePMD-kit", dtype=dict, sub_fields=gen_args()) +schema = generate_json_schema(a) +with open("deepmd.json", "w") as f: + json.dump(schema, f, indent=2) +``` + +JSON schema can be used in several JSON editors. For example, in [Visual Studio Code](https://code.visualstudio.com/), you can [configure JSON schema](https://code.visualstudio.com/docs/languages/json#_json-schemas-and-settings) in the project `settings.json`: + +```json +{ + "json.schemas": [ + { + "fileMatch": [ + "/**/*.json" + ], + "url": "./deepmd.json" + } + ] +} +``` diff --git a/pyproject.toml b/pyproject.toml index 8e5b0e2..c4b0900 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ repository = "https://github.com/deepmodeling/dargs" [project.optional-dependencies] test = [ "ipython", + "jsonschema", ] typecheck = [ "basedpyright==1.12.2", diff --git a/tests/dpmdargs.py b/tests/dpmdargs.py index 8241083..0a53f62 100644 --- a/tests/dpmdargs.py +++ b/tests/dpmdargs.py @@ -764,6 +764,15 @@ def normalize(data): return data +def gen_args() -> Argument: + ma = model_args() + lra = learning_rate_args() + la = loss_args() + ta = training_args() + + base = Argument("base", dict, [ma, lra, la, ta]) + return base + example_json_str = """ { "_comment": " model parameters", diff --git a/tests/test_json_schema.py b/tests/test_json_schema.py new file mode 100644 index 0000000..5e04510 --- /dev/null +++ b/tests/test_json_schema.py @@ -0,0 +1,14 @@ +import json +import unittest +from .dpmdargs import gen_args, example_json_str + +from dargs.json_schema import generate_json_schema +from jsonschema import validate + + +class TestJsonSchema(unittest.TestCase): + def test_json_schema(self): + args = gen_args() + schema = generate_json_schema(args) + data = json.loads(example_json_str) + validate(data, schema) From 9e820e798ea697cb5960e23b7d93d2223a09314c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 2 Jun 2024 03:46:24 +0000 Subject: [PATCH 2/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- dargs/json_schema.py | 4 +++- tests/dpmdargs.py | 1 + tests/test_json_schema.py | 8 ++++++-- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/dargs/json_schema.py b/dargs/json_schema.py index ce05ee1..9932ad6 100644 --- a/dargs/json_schema.py +++ b/dargs/json_schema.py @@ -1,6 +1,8 @@ """Generate JSON schema from a given dargs.Argument.""" -from dargs.dargs import _Flags, Argument +from __future__ import annotations + +from dargs.dargs import Argument, _Flags try: from typing import get_origin diff --git a/tests/dpmdargs.py b/tests/dpmdargs.py index 0a53f62..e0e5e3c 100644 --- a/tests/dpmdargs.py +++ b/tests/dpmdargs.py @@ -773,6 +773,7 @@ def gen_args() -> Argument: base = Argument("base", dict, [ma, lra, la, ta]) return base + example_json_str = """ { "_comment": " model parameters", diff --git a/tests/test_json_schema.py b/tests/test_json_schema.py index 5e04510..285b64d 100644 --- a/tests/test_json_schema.py +++ b/tests/test_json_schema.py @@ -1,10 +1,14 @@ +from __future__ import annotations + import json import unittest -from .dpmdargs import gen_args, example_json_str -from dargs.json_schema import generate_json_schema from jsonschema import validate +from dargs.json_schema import generate_json_schema + +from .dpmdargs import example_json_str, gen_args + class TestJsonSchema(unittest.TestCase): def test_json_schema(self): From 103da4076d3f612311cb71d9cca42fe2903ef7c4 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Sat, 1 Jun 2024 23:57:22 -0400 Subject: [PATCH 3/8] fix typing Signed-off-by: Jinzhe Zeng --- dargs/json_schema.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dargs/json_schema.py b/dargs/json_schema.py index 9932ad6..b565b5a 100644 --- a/dargs/json_schema.py +++ b/dargs/json_schema.py @@ -1,6 +1,7 @@ """Generate JSON schema from a given dargs.Argument.""" from __future__ import annotations +from typing import Any from dargs.dargs import Argument, _Flags @@ -122,12 +123,12 @@ def _convert_single_argument(argument: Argument) -> dict: return data -def _convert_types(T: type) -> str: +def _convert_types(T: type | Any | None) -> str: """Convert a type to JSON schema type. Parameters ---------- - T : type + T : type | Any | None The type to convert. Returns From 2c6a79cabf2fc50eb80084734db96eae4d802fe4 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Sat, 1 Jun 2024 23:59:56 -0400 Subject: [PATCH 4/8] add tests for _convert_types Signed-off-by: Jinzhe Zeng --- tests/test_json_schema.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tests/test_json_schema.py b/tests/test_json_schema.py index 285b64d..62569d6 100644 --- a/tests/test_json_schema.py +++ b/tests/test_json_schema.py @@ -5,7 +5,7 @@ from jsonschema import validate -from dargs.json_schema import generate_json_schema +from dargs.json_schema import generate_json_schema, _convert_types from .dpmdargs import example_json_str, gen_args @@ -16,3 +16,15 @@ def test_json_schema(self): schema = generate_json_schema(args) data = json.loads(example_json_str) validate(data, schema) + + def test_convert_types(self): + self.assertEqual(_convert_types(int), "number") + self.assertEqual(_convert_types(str), "string") + self.assertEqual(_convert_types(float), "number") + self.assertEqual(_convert_types(bool), "boolean") + self.assertEqual(_convert_types(None), "null") + self.assertEqual(_convert_types(type(None)), "null") + self.assertEqual(_convert_types(list), "array") + self.assertEqual(_convert_types(dict), "object") + with self.assertRaises(ValueError): + _convert_types(set) From 84a16e1ae4d53bbb027c6d032c135c025d1459f4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 2 Jun 2024 04:00:09 +0000 Subject: [PATCH 5/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- dargs/json_schema.py | 1 + tests/test_json_schema.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/dargs/json_schema.py b/dargs/json_schema.py index b565b5a..a49cd4f 100644 --- a/dargs/json_schema.py +++ b/dargs/json_schema.py @@ -1,6 +1,7 @@ """Generate JSON schema from a given dargs.Argument.""" from __future__ import annotations + from typing import Any from dargs.dargs import Argument, _Flags diff --git a/tests/test_json_schema.py b/tests/test_json_schema.py index 62569d6..48dba41 100644 --- a/tests/test_json_schema.py +++ b/tests/test_json_schema.py @@ -5,7 +5,7 @@ from jsonschema import validate -from dargs.json_schema import generate_json_schema, _convert_types +from dargs.json_schema import _convert_types, generate_json_schema from .dpmdargs import example_json_str, gen_args From 88a2a3c8d3e162322a5130a57ebbde263e42e8b6 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Mon, 3 Jun 2024 02:01:14 -0400 Subject: [PATCH 6/8] allow $schema Signed-off-by: Jinzhe Zeng --- dargs/dargs.py | 6 +++++- docs/json_schema.md | 10 ++++++++++ tests/dpmdargs.py | 1 + 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/dargs/dargs.py b/dargs/dargs.py index 290f9fd..2529560 100644 --- a/dargs/dargs.py +++ b/dargs/dargs.py @@ -460,11 +460,15 @@ def _check_data(self, value: Any, path=None): ) def _check_strict(self, value: dict, path=None): - allowed_keys = self.flatten_sub(value, path).keys() + allowed_keys = set(self.flatten_sub(value, path).keys()) # curpath = [*path, self.name] if not len(allowed_keys): # no allowed keys defined, allow any keys return + # A special case to allow $schema in any dict to be compatible with vscode + json schema + # https://code.visualstudio.com/docs/languages/json#_mapping-in-the-json + # considering usually it's not a typo of users when they use $schema + allowed_keys.add("$schema") for name in value.keys(): if name not in allowed_keys: dym_message = did_you_mean(name, allowed_keys) diff --git a/docs/json_schema.md b/docs/json_schema.md index 40aa1ee..e7c1803 100644 --- a/docs/json_schema.md +++ b/docs/json_schema.md @@ -30,3 +30,13 @@ JSON schema can be used in several JSON editors. For example, in [Visual Studio ] } ``` + +VS Code also allows one to [specify the JSON schema in a JSON file](https://code.visualstudio.com/docs/languages/json#_mapping-in-the-json) with the `$schema` key. +To be compatible, dargs will not throw an error for `$schema` in the strict mode even if `$schema` is not defined in the argument. + +```json +{ + "$schema": "./deepmd.json", + "model": {} +} +``` diff --git a/tests/dpmdargs.py b/tests/dpmdargs.py index e0e5e3c..93a2edc 100644 --- a/tests/dpmdargs.py +++ b/tests/dpmdargs.py @@ -776,6 +776,7 @@ def gen_args() -> Argument: example_json_str = """ { + "$schema": "this should be ignored by dargs", "_comment": " model parameters", "model": { "type_map": ["O", "H"], From 047a4ec9537fc776b32a873bf84265a3e1feebfd Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Mon, 3 Jun 2024 02:36:28 -0400 Subject: [PATCH 7/8] handle Variant choice alias Signed-off-by: Jinzhe Zeng --- dargs/json_schema.py | 7 +++++-- tests/dpmdargs.py | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/dargs/json_schema.py b/dargs/json_schema.py index a49cd4f..405bce2 100644 --- a/dargs/json_schema.py +++ b/dargs/json_schema.py @@ -78,7 +78,7 @@ def _convert_single_argument(argument: Argument) -> dict: **{ vv.flag_name: { "type": "string", - "enum": list(vv.choice_dict.keys()), + "enum": list(vv.choice_dict.keys()) + list(vv.choice_alias.keys()), "default": vv.default_tag, "description": vv.doc, } @@ -93,7 +93,10 @@ def _convert_single_argument(argument: Argument) -> dict: allof = [ { "if": { - "properties": {vv.flag_name: {"const": kk}}, + "oneOf": [ + { "properties": {vv.flag_name: {"const": kkaa}}, } + for kkaa in (kk, *aa.alias) + ], "required": [vv.flag_name] if not (vv.optional and vv.default_tag == kk) else [], diff --git a/tests/dpmdargs.py b/tests/dpmdargs.py index 93a2edc..72b2519 100644 --- a/tests/dpmdargs.py +++ b/tests/dpmdargs.py @@ -216,7 +216,7 @@ def descrpt_hybrid_args(): "type", [ Argument("loc_frame", dict, descrpt_local_frame_args()), - Argument("se_a", dict, descrpt_se_a_args()), + Argument("se_e2_a", dict, descrpt_se_a_args(), alias=["se_a"]), Argument("se_r", dict, descrpt_se_r_args()), Argument( "se_a_3be", dict, descrpt_se_a_3be_args(), alias=["se_at"] From 7697058c482c03f7e0637a1859b7a47705b203a8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 3 Jun 2024 06:36:43 +0000 Subject: [PATCH 8/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- dargs/json_schema.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dargs/json_schema.py b/dargs/json_schema.py index 405bce2..221dd3b 100644 --- a/dargs/json_schema.py +++ b/dargs/json_schema.py @@ -94,7 +94,9 @@ def _convert_single_argument(argument: Argument) -> dict: { "if": { "oneOf": [ - { "properties": {vv.flag_name: {"const": kkaa}}, } + { + "properties": {vv.flag_name: {"const": kkaa}}, + } for kkaa in (kk, *aa.alias) ], "required": [vv.flag_name]