diff --git a/setup.py b/setup.py index a6ef121..2aab44f 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,8 @@ long_description_content_type="text/markdown", package_dir={"": "src"}, packages=setuptools.find_packages("src", exclude=["tests"]), - install_requires=["rstr", "faker", "smart_open", "jsonschema", "typer", "pydantic"], + install_requires=["rstr", "faker", "smart_open", "jsonschema", + "typer", "pydantic", "python-dateutil", "text_unidecode"], url="https://github.com/ghandic/jsf", classifiers=[ "Programming Language :: Python :: 3", diff --git a/src/jsf/parser.py b/src/jsf/parser.py index 1cdda79..636f230 100644 --- a/src/jsf/parser.py +++ b/src/jsf/parser.py @@ -7,7 +7,7 @@ from typing import Any, Dict, List, Optional, Tuple, Union from faker import Faker -from jsonschema import validate +from jsonschema import validate as val from pydantic import conlist from smart_open import open as s_open @@ -48,25 +48,30 @@ def __parse_primitive(self, name: str, path: str, schema: Dict[str, Any]) -> Pri def __parse_object(self, name: str, path: str, schema: Dict[str, Any]) -> Object: _, is_nullable = self.__is_field_nullable(schema) - model = Object.from_dict({"name": name, "path": path, "is_nullable": is_nullable, **schema}) + model = Object.from_dict( + {"name": name, "path": path, "is_nullable": is_nullable, **schema}) props = [] for _name, definition in schema.get("properties", {}).items(): - props.append(self.__parse_definition(_name, path=f"{path}/{_name}", schema=definition)) + props.append(self.__parse_definition( + _name, path=f"{path}/{_name}", schema=definition)) model.properties = props return model def __parse_array(self, name: str, path: str, schema: Dict[str, Any]) -> Array: _, is_nullable = self.__is_field_nullable(schema) - arr = Array.from_dict({"name": name, "path": path, "is_nullable": is_nullable, **schema}) + arr = Array.from_dict( + {"name": name, "path": path, "is_nullable": is_nullable, **schema}) arr.items = self.__parse_definition(name, name, schema["items"]) return arr def __parse_tuple(self, name: str, path: str, schema: Dict[str, Any]) -> JSFTuple: _, is_nullable = self.__is_field_nullable(schema) - arr = JSFTuple.from_dict({"name": name, "path": path, "is_nullable": is_nullable, **schema}) + arr = JSFTuple.from_dict( + {"name": name, "path": path, "is_nullable": is_nullable, **schema}) arr.items = [] for i, item in enumerate(schema["items"]): - arr.items.append(self.__parse_definition(name, path=f"{name}[{i}]", schema=item)) + arr.items.append(self.__parse_definition( + name, path=f"{name}[{i}]", schema=item)) return arr def __is_field_nullable(self, schema: Dict[str, Any]) -> Tuple[str, bool]: @@ -113,30 +118,35 @@ def __parse_definition(self, name: str, path: str, schema: Dict[str, Any]) -> Al cls.path = path return cls else: - raise ValueError(f"Cannot parse schema {repr(schema)}") # pragma: no cover + raise ValueError( + f"Cannot parse schema {repr(schema)}") # pragma: no cover def _parse(self, schema: Dict[str, Any]) -> AllTypes: for name, definition in schema.get("definitions", {}).items(): - item = self.__parse_definition(name, path="#/definitions", schema=definition) + item = self.__parse_definition( + name, path="#/definitions", schema=definition) self.definitions[f"#/definitions/{name}"] = item - self.root = self.__parse_definition(name="root", path="#", schema=schema) + self.root = self.__parse_definition( + name="root", path="#", schema=schema) @property def context(self): return {**self.base_context, "state": deepcopy(self.base_state)} - def generate(self, n: Optional[int] = None) -> Any: - if n is None or n == 1: - return self.root.generate(context=self.context) - return [self.root.generate(context=self.context) for _ in range(n)] + def generate(self, n: Optional[int] = None, validate: Optional[bool] = False) -> Any: + data = [self.root.generate(context=self.context) + for _ in range(n or 1)] + if validate: + [val(instance=d, schema=self.root_schema) for d in data] + return data[0] if n in [None, 1] else data def pydantic(self): return self.root.model(context=self.context)[0] - def generate_and_validate(self) -> None: + def validate(self) -> None: fake = self.root.generate(context=self.context) - validate(instance=fake, schema=self.root_schema) + val(instance=fake, schema=self.root_schema) def to_json(self, path: str) -> None: with open(path, "w") as f: diff --git a/src/tests/data/validate.json b/src/tests/data/validate.json new file mode 100644 index 0000000..5d8c12f --- /dev/null +++ b/src/tests/data/validate.json @@ -0,0 +1,38 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "title": "Employee Json Template Spec", + "type": "object", + "properties": { + "EmpID": { + "type": "string", + "maximum": 255 + }, + "UserID": { + "type": "number", + "maximum": 255 + }, + "Name": { + "type": "string", + "minimum": 2 + }, + "Email": { + "type": "string", + "minLength": 6, + "maxLength": 255 + }, + "Phone": { + "type": "string", + "maxLength": 10 + }, + "Salary": { + "type": "number" + }, + "Hiredate": { + "type": "string", + "format": "date" + }, + "RemainingHolidays": { + "type": "number" + } + } + } \ No newline at end of file diff --git a/src/tests/test_default_fake.py b/src/tests/test_default_fake.py index 5e4e85d..db00195 100644 --- a/src/tests/test_default_fake.py +++ b/src/tests/test_default_fake.py @@ -1,6 +1,6 @@ import json import re - +import pytest from ..jsf.parser import JSF @@ -40,7 +40,8 @@ def test_fake_enum(TestData): p = JSF(schema) assert isinstance(p.generate(), (str, type(None), int)) - assert all(p.generate() in ["red", "amber", "green", None, 42] for _ in range(100)) + assert all(p.generate() in ["red", "amber", + "green", None, 42] for _ in range(100)) def test_fake_string_enum(TestData): @@ -49,7 +50,8 @@ def test_fake_string_enum(TestData): p = JSF(schema) assert isinstance(p.generate(), str) - assert all(p.generate() in ["Street", "Avenue", "Boulevard"] for _ in range(100)) + assert all(p.generate() in ["Street", "Avenue", + "Boulevard"] for _ in range(100)) def test_fake_int(TestData): @@ -109,7 +111,8 @@ def test_fake_array(TestData): assert isinstance(p.generate(), list) fake_data = [p.generate() for _ in range(1000)] - assert all(set(d) - {"red", "amber", "green"} == set() for d in fake_data), fake_data + assert all(set(d) - {"red", "amber", "green"} == set() + for d in fake_data), fake_data assert all(len(set(d)) == len(d) for d in fake_data), fake_data assert all(len(d) <= 5 for d in fake_data), fake_data assert all(len(d) >= 1 for d in fake_data), fake_data @@ -122,7 +125,8 @@ def test_fake_array_fixed_int(TestData): assert isinstance(p.generate(), list) fake_data = [p.generate() for _ in range(1000)] - assert all(set(d) - {"red", "amber", "green"} == set() for d in fake_data), fake_data + assert all(set(d) - {"red", "amber", "green"} == set() + for d in fake_data), fake_data assert all(len(d) == 5 for d in fake_data), fake_data @@ -133,7 +137,8 @@ def test_fake_array_fixed_str(TestData): assert isinstance(p.generate(), list) fake_data = [p.generate() for _ in range(1000)] - assert all(set(d) - {"red", "amber", "green"} == set() for d in fake_data), fake_data + assert all(set(d) - {"red", "amber", "green"} == set() + for d in fake_data), fake_data assert all(len(d) == 50 for d in fake_data), fake_data @@ -147,7 +152,8 @@ def test_fake_tuple(TestData): for d in fake_data: assert isinstance(d[0], float) assert isinstance(d[1], str) - assert isinstance(d[2], str) and d[2] in ["Street", "Avenue", "Boulevard"] + assert isinstance(d[2], str) and d[2] in [ + "Street", "Avenue", "Boulevard"] assert isinstance(d[3], str) and d[3] in ["NW", "NE", "SW", "SE"] @@ -159,7 +165,8 @@ def test_fake_object(TestData): assert isinstance(p.generate(), dict) fake_data = [p.generate() for _ in range(1000)] assert all(isinstance(d["name"], str) for d in fake_data), fake_data - assert all(isinstance(d["credit_card"], float) for d in fake_data), fake_data + assert all(isinstance(d["credit_card"], float) + for d in fake_data), fake_data assert all(isinstance(d["test"], int) for d in fake_data), fake_data @@ -170,15 +177,21 @@ def test_fake_string_format(TestData): assert isinstance(p.generate(), dict) fake_data = [p.generate() for _ in range(10)] - assert all(bool(re.match(r".*@.*", d["email"])) for d in fake_data), fake_data - assert all(bool(re.match(r".*@.*", d["idn-email"])) for d in fake_data), fake_data + assert all(bool(re.match(r".*@.*", d["email"])) + for d in fake_data), fake_data + assert all(bool(re.match(r".*@.*", d["idn-email"])) + for d in fake_data), fake_data assert all( bool(re.match(r"\d{4}-\d{2}-\d{2}T\d{2}\:\d{2}\:\d{2}\+\d{2}\:\d{2}", d["date-time"])) for d in fake_data ), fake_data - assert all(bool(re.match(r"\d{4}-\d{2}-\d{2}", d["date"])) for d in fake_data), fake_data - assert all(bool(re.match(r"\d{2}\:\d{2}\:\d{2}\+\d{2}\:\d{2}", d["time"])) for d in fake_data), fake_data - assert all(bool(re.match(r"[a-zA-Z0-9+-\.]{1,33}\.[a-z]{2,4}", d["hostname"])) for d in fake_data) - assert all(bool(re.match(r"[a-zA-Z0-9+-\.]{1,33}\.[a-z]{2,4}", d["idn-hostname"])) for d in fake_data) + assert all( + bool(re.match(r"\d{4}-\d{2}-\d{2}", d["date"])) for d in fake_data), fake_data + assert all(bool(re.match( + r"\d{2}\:\d{2}\:\d{2}\+\d{2}\:\d{2}", d["time"])) for d in fake_data), fake_data + assert all(bool(re.match( + r"[a-zA-Z0-9+-\.]{1,33}\.[a-z]{2,4}", d["hostname"])) for d in fake_data) + assert all(bool(re.match( + r"[a-zA-Z0-9+-\.]{1,33}\.[a-z]{2,4}", d["idn-hostname"])) for d in fake_data) assert all(bool(re.match(r"[a-f0-9]{0,4}(:[a-f0-9]{0,4}){7}", d["ipv6"])) for d in fake_data), [ d["ipv6"] for d in fake_data ] @@ -196,7 +209,7 @@ def test_fake_string_format(TestData): # "regex" -## NO LONGER REQUIRED - dont think you can have unique items in a tuple? +# NO LONGER REQUIRED - dont think you can have unique items in a tuple? # def test_unique_items_tuple(TestData): # with open(TestData / f"unique-items-tuple.json", "r") as file: # schema = json.load(file) @@ -229,7 +242,6 @@ def test_const(TestData): assert isinstance(f["country"], str) assert f["country"] == "United States of America" - def test_external_ref(TestData): with open(TestData / f"external-ref.json", "r") as file: schema = json.load(file) @@ -242,11 +254,33 @@ def test_external_ref(TestData): assert isinstance(f["ReferenceToExternalSchema"], dict) assert isinstance(f["ReferenceToExternalSchema"]["src"], list) - assert all(isinstance(t, str) for t in f["ReferenceToExternalSchema"]["src"]) + assert all(isinstance(t, str) + for t in f["ReferenceToExternalSchema"]["src"]) + + +def test_validate(TestData): + with open(TestData / f"validate.json", "r") as file: + schema = json.load(file) + p = JSF(schema) + [p.validate() for _ in range(50)] + + +def test_validate_and_generate_single_record(TestData): + """validate and generate data from schema""" + with open(TestData / f"validate.json", "r") as file: + schema = json.load(file) + p = JSF(schema) + fake_data = p.generate(validate=True) + assert(isinstance(fake_data, dict)) -def test_gen_and_validate(TestData): - with open(TestData / f"custom.json", "r") as file: +def test_validate_and_generate__multiple_records(TestData): + """validate and generate data from schema""" + with open(TestData / f"validate.json", "r") as file: schema = json.load(file) p = JSF(schema) - [p.generate_and_validate() for _ in range(50)] + fake_data = p.generate(validate=True, n=100) + assert(isinstance(fake_data, list)) + assert(len(fake_data) == 100) + for x in fake_data: + assert(isinstance(x, dict))