From 90562be4a37eb02efefe0b144476b137bead1af8 Mon Sep 17 00:00:00 2001 From: Bruno Rocha Date: Sun, 4 Sep 2022 17:22:56 +0100 Subject: [PATCH] Added tomllib (vendored) as a replacement for toml fix #708 toml kept as a fallback until 4.0.0 to nor break compatibility - toml follows 0.5.0 spec - tomlib follows 1.0.0 spec - toml allows emojis and unicode chars unencoded - tomllib foolows the spec where only encoded chars are allowed --- dynaconf/cli.py | 12 +- dynaconf/loaders/base.py | 20 +- dynaconf/loaders/toml_loader.py | 100 +++- dynaconf/utils/parse_conf.py | 21 +- dynaconf/vendor/box/converters.py | 2 +- dynaconf/vendor/box/from_file.py | 4 +- dynaconf/vendor/toml/DEPRECATION.txt | 3 + dynaconf/vendor/tomllib/__init__.py | 16 + dynaconf/vendor/tomllib/_parser.py | 690 +++++++++++++++++++++++++++ dynaconf/vendor/tomllib/_re.py | 106 ++++ dynaconf/vendor/tomllib/_types.py | 9 + dynaconf/vendor/tomllib/_writer.py | 202 ++++++++ dynaconf/vendor/vendor.txt | 1 + dynaconf/vendor/vendor_history | 19 + dynaconf/vendor/vendor_update.sh | 31 -- tests/test_toml_loader.py | 31 ++ 16 files changed, 1197 insertions(+), 70 deletions(-) create mode 100644 dynaconf/vendor/toml/DEPRECATION.txt create mode 100644 dynaconf/vendor/tomllib/__init__.py create mode 100644 dynaconf/vendor/tomllib/_parser.py create mode 100644 dynaconf/vendor/tomllib/_re.py create mode 100644 dynaconf/vendor/tomllib/_types.py create mode 100644 dynaconf/vendor/tomllib/_writer.py delete mode 100755 dynaconf/vendor/vendor_update.sh diff --git a/dynaconf/cli.py b/dynaconf/cli.py index d6be7db23..efec9ec7c 100644 --- a/dynaconf/cli.py +++ b/dynaconf/cli.py @@ -25,6 +25,7 @@ from dynaconf.validator import Validator from dynaconf.vendor import click from dynaconf.vendor import toml +from dynaconf.vendor import tomllib os.environ["PYTHONIOENCODING"] = "utf-8" @@ -695,7 +696,16 @@ def validate(path): # pragma: no cover click.echo(click.style(f"{path} not found", fg="white", bg="red")) sys.exit(1) - validation_data = toml.load(open(str(path))) + try: # try tomlib first + validation_data = tomllib.load(open(str(path), "rb")) + except UnicodeDecodeError: # fallback to legacy toml (TBR in 4.0.0) + warnings.warn( + "TOML files should have only UTF-8 encoded characters. " + "starting on 4.0.0 dynaconf will stop allowing invalid chars.", + ) + validation_data = toml.load( + open(str(path), encoding=default_settings.ENCODING_FOR_DYNACONF), + ) success = True for env, name_data in validation_data.items(): diff --git a/dynaconf/loaders/base.py b/dynaconf/loaders/base.py index 0826ee75d..b0ced88d3 100644 --- a/dynaconf/loaders/base.py +++ b/dynaconf/loaders/base.py @@ -21,7 +21,14 @@ class BaseLoader: """ def __init__( - self, obj, env, identifier, extensions, file_reader, string_reader + self, + obj, + env, + identifier, + extensions, + file_reader, + string_reader, + opener_params=None, ): """Instantiates a loader for different sources""" self.obj = obj @@ -30,6 +37,10 @@ def __init__( self.extensions = extensions self.file_reader = file_reader self.string_reader = string_reader + self.opener_params = opener_params or { + "mode": "r", + "encoding": obj.get("ENCODING_FOR_DYNACONF", "utf-8"), + } @staticmethod def warn_not_installed(obj, identifier): # pragma: no cover @@ -77,12 +88,7 @@ def get_source_data(self, files): for source_file in files: if source_file.endswith(self.extensions): try: - with open( - source_file, - encoding=self.obj.get( - "ENCODING_FOR_DYNACONF", "utf-8" - ), - ) as open_file: + with open(source_file, **self.opener_params) as open_file: content = self.file_reader(open_file) self.obj._loaded_files.append(source_file) if content: diff --git a/dynaconf/loaders/toml_loader.py b/dynaconf/loaders/toml_loader.py index 372e0589e..22191acc7 100644 --- a/dynaconf/loaders/toml_loader.py +++ b/dynaconf/loaders/toml_loader.py @@ -1,13 +1,14 @@ from __future__ import annotations -import io +import warnings from pathlib import Path from dynaconf import default_settings from dynaconf.constants import TOML_EXTENSIONS from dynaconf.loaders.base import BaseLoader from dynaconf.utils import object_merge -from dynaconf.vendor import toml +from dynaconf.vendor import toml # Backwards compatibility with uiri/toml +from dynaconf.vendor import tomllib # New tomllib stdlib on py3.11 def load(obj, env=None, silent=True, key=None, filename=None): @@ -22,19 +23,54 @@ def load(obj, env=None, silent=True, key=None, filename=None): :return: None """ - loader = BaseLoader( - obj=obj, - env=env, - identifier="toml", - extensions=TOML_EXTENSIONS, - file_reader=toml.load, - string_reader=toml.loads, - ) - loader.load( - filename=filename, - key=key, - silent=silent, - ) + try: + loader = BaseLoader( + obj=obj, + env=env, + identifier="toml", + extensions=TOML_EXTENSIONS, + file_reader=tomllib.load, + string_reader=tomllib.loads, + opener_params={"mode": "rb"}, + ) + loader.load( + filename=filename, + key=key, + silent=silent, + ) + except UnicodeDecodeError: # pragma: no cover + """ + NOTE: Compat functions exists to keep backwards compatibility with + the new tomllib library. The old library was called `toml` and + the new one is called `tomllib`. + + The old lib uiri/toml allowed unicode characters and readed files + as string. + + The new tomllib (stdlib) does not allow unicode characters, only + utf-8 encoded, and read files as binary. + + NOTE: In dynaconf 4.0.0 we will drop support for the old library + removing the compat functions and calling directly the new lib. + """ + loader = BaseLoader( + obj=obj, + env=env, + identifier="toml", + extensions=TOML_EXTENSIONS, + file_reader=toml.load, + string_reader=toml.loads, + ) + loader.load( + filename=filename, + key=key, + silent=silent, + ) + + warnings.warn( + "TOML files should have only UTF-8 encoded characters. " + "starting on 4.0.0 dynaconf will stop allowing invalid chars.", + ) def write(settings_path, settings_data, merge=True): @@ -46,17 +82,33 @@ def write(settings_path, settings_data, merge=True): """ settings_path = Path(settings_path) if settings_path.exists() and merge: # pragma: no cover + try: # tomllib first + with open(str(settings_path), "rb") as open_file: + object_merge(tomllib.load(open_file), settings_data) + except UnicodeDecodeError: # pragma: no cover + # uiri/toml fallback (TBR on 4.0.0) + with open( + str(settings_path), + encoding=default_settings.ENCODING_FOR_DYNACONF, + ) as open_file: + object_merge(toml.load(open_file), settings_data) + + try: # tomllib first + with open(str(settings_path), "wb") as open_file: + tomllib.dump(encode_nulls(settings_data), open_file) + except UnicodeEncodeError: # pragma: no cover + # uiri/toml fallback (TBR on 4.0.0) with open( - str(settings_path), encoding=default_settings.ENCODING_FOR_DYNACONF + str(settings_path), + "w", + encoding=default_settings.ENCODING_FOR_DYNACONF, ) as open_file: - object_merge(toml.load(open_file), settings_data) - - with open( - str(settings_path), - "w", - encoding=default_settings.ENCODING_FOR_DYNACONF, - ) as open_file: - toml.dump(encode_nulls(settings_data), open_file) + toml.dump(encode_nulls(settings_data), open_file) + + warnings.warn( + "TOML files should have only UTF-8 encoded characters. " + "starting on 4.0.0 dynaconf will stop allowing invalid chars.", + ) def encode_nulls(data): diff --git a/dynaconf/utils/parse_conf.py b/dynaconf/utils/parse_conf.py index 5605393b5..0a6a32088 100644 --- a/dynaconf/utils/parse_conf.py +++ b/dynaconf/utils/parse_conf.py @@ -13,6 +13,7 @@ from dynaconf.utils.boxing import DynaBox from dynaconf.utils.functional import empty from dynaconf.vendor import toml +from dynaconf.vendor import tomllib try: from jinja2 import Environment @@ -277,10 +278,22 @@ def get_converter(converter_key, value, box_settings): def parse_with_toml(data): """Uses TOML syntax to parse data""" - try: - return toml.loads(f"key={data}")["key"] - except (toml.TomlDecodeError, KeyError): - return data + try: # try tomllib first + try: + return tomllib.loads(f"key={data}")["key"] + except (tomllib.TOMLDecodeError, KeyError): + return data + except UnicodeDecodeError: # pragma: no cover + # fallback to toml (TBR in 4.0.0) + try: + return toml.loads(f"key={data}")["key"] + except (toml.TomlDecodeError, KeyError): + return data + warnings.warn( + "TOML files should have only UTF-8 encoded characters. " + "starting on 4.0.0 dynaconf will stop allowing invalid chars.", + DeprecationWarning, + ) def _parse_conf_data(data, tomlfy=False, box_settings=None): diff --git a/dynaconf/vendor/box/converters.py b/dynaconf/vendor/box/converters.py index c9a2293be..08694fe1e 100644 --- a/dynaconf/vendor/box/converters.py +++ b/dynaconf/vendor/box/converters.py @@ -11,7 +11,7 @@ import dynaconf.vendor.ruamel.yaml as yaml from dynaconf.vendor.box.exceptions import BoxError, BoxWarning -from dynaconf.vendor import toml +from dynaconf.vendor import tomllib as toml BOX_PARAMETERS = ('default_box', 'default_box_attr', 'conversion_box', diff --git a/dynaconf/vendor/box/from_file.py b/dynaconf/vendor/box/from_file.py index 2e2a6ad7a..a82ac9659 100644 --- a/dynaconf/vendor/box/from_file.py +++ b/dynaconf/vendor/box/from_file.py @@ -3,7 +3,7 @@ from json import JSONDecodeError from pathlib import Path from typing import Union -from dynaconf.vendor.toml import TomlDecodeError +from dynaconf.vendor.tomllib import TOMLDecodeError from dynaconf.vendor.ruamel.yaml import YAMLError @@ -35,7 +35,7 @@ def _to_yaml(data): def _to_toml(data): try: return Box.from_toml(data) - except TomlDecodeError: + except TOMLDecodeError: raise BoxError('File is not TOML as expected') diff --git a/dynaconf/vendor/toml/DEPRECATION.txt b/dynaconf/vendor/toml/DEPRECATION.txt new file mode 100644 index 000000000..25cec54b9 --- /dev/null +++ b/dynaconf/vendor/toml/DEPRECATION.txt @@ -0,0 +1,3 @@ +This lib will be deprecated on 4.0.0 +toml_loader and all the other places +will default to tomllib. diff --git a/dynaconf/vendor/tomllib/__init__.py b/dynaconf/vendor/tomllib/__init__.py new file mode 100644 index 000000000..c4da93df5 --- /dev/null +++ b/dynaconf/vendor/tomllib/__init__.py @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: 2021 Taneli Hukkinen + +__all__ = ( + "loads", + "load", + "TOMLDecodeError", + "dump", + "dumps", +) + +from ._parser import TOMLDecodeError, load, loads +from ._writer import dump, dumps + +# Pretend this exception was created here. +TOMLDecodeError.__module__ = __name__ diff --git a/dynaconf/vendor/tomllib/_parser.py b/dynaconf/vendor/tomllib/_parser.py new file mode 100644 index 000000000..e1b3214fe --- /dev/null +++ b/dynaconf/vendor/tomllib/_parser.py @@ -0,0 +1,690 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: 2021 Taneli Hukkinen + +from __future__ import annotations + +from collections.abc import Iterable +import string +from types import MappingProxyType +from typing import Any, BinaryIO, NamedTuple + +from ._re import ( + RE_DATETIME, + RE_LOCALTIME, + RE_NUMBER, + match_to_datetime, + match_to_localtime, + match_to_number, +) +from ._types import Key, ParseFloat, Pos + +ASCII_CTRL = frozenset(chr(i) for i in range(32)) | frozenset(chr(127)) + +# Neither of these sets include quotation mark or backslash. They are +# currently handled as separate cases in the parser functions. +ILLEGAL_BASIC_STR_CHARS = ASCII_CTRL - frozenset("\t") +ILLEGAL_MULTILINE_BASIC_STR_CHARS = ASCII_CTRL - frozenset("\t\n") + +ILLEGAL_LITERAL_STR_CHARS = ILLEGAL_BASIC_STR_CHARS +ILLEGAL_MULTILINE_LITERAL_STR_CHARS = ILLEGAL_MULTILINE_BASIC_STR_CHARS + +ILLEGAL_COMMENT_CHARS = ILLEGAL_BASIC_STR_CHARS + +TOML_WS = frozenset(" \t") +TOML_WS_AND_NEWLINE = TOML_WS | frozenset("\n") +BARE_KEY_CHARS = frozenset(string.ascii_letters + string.digits + "-_") +KEY_INITIAL_CHARS = BARE_KEY_CHARS | frozenset("\"'") +HEXDIGIT_CHARS = frozenset(string.hexdigits) + +BASIC_STR_ESCAPE_REPLACEMENTS = MappingProxyType( + { + "\\b": "\u0008", # backspace + "\\t": "\u0009", # tab + "\\n": "\u000A", # linefeed + "\\f": "\u000C", # form feed + "\\r": "\u000D", # carriage return + '\\"': "\u0022", # quote + "\\\\": "\u005C", # backslash + } +) + + +class TOMLDecodeError(ValueError): + """An error raised if a document is not valid TOML.""" + + +def load(fp: BinaryIO, /, *, parse_float: ParseFloat = float) -> dict[str, Any]: + """Parse TOML from a binary file object.""" + b = fp.read() + try: + s = b.decode() + except AttributeError: + raise TypeError( + "File must be opened in binary mode, e.g. use `open('foo.toml', 'rb')`" + ) from None + return loads(s, parse_float=parse_float) + + +def loads(s: str, /, *, parse_float: ParseFloat = float) -> dict[str, Any]: # noqa: C901 + """Parse TOML from a string.""" + + # The spec allows converting "\r\n" to "\n", even in string + # literals. Let's do so to simplify parsing. + src = s.replace("\r\n", "\n") + pos = 0 + out = Output(NestedDict(), Flags()) + header: Key = () + parse_float = make_safe_parse_float(parse_float) + + # Parse one statement at a time + # (typically means one line in TOML source) + while True: + # 1. Skip line leading whitespace + pos = skip_chars(src, pos, TOML_WS) + + # 2. Parse rules. Expect one of the following: + # - end of file + # - end of line + # - comment + # - key/value pair + # - append dict to list (and move to its namespace) + # - create dict (and move to its namespace) + # Skip trailing whitespace when applicable. + try: + char = src[pos] + except IndexError: + break + if char == "\n": + pos += 1 + continue + if char in KEY_INITIAL_CHARS: + pos = key_value_rule(src, pos, out, header, parse_float) + pos = skip_chars(src, pos, TOML_WS) + elif char == "[": + try: + second_char: str | None = src[pos + 1] + except IndexError: + second_char = None + out.flags.finalize_pending() + if second_char == "[": + pos, header = create_list_rule(src, pos, out) + else: + pos, header = create_dict_rule(src, pos, out) + pos = skip_chars(src, pos, TOML_WS) + elif char != "#": + raise suffixed_err(src, pos, "Invalid statement") + + # 3. Skip comment + pos = skip_comment(src, pos) + + # 4. Expect end of line or end of file + try: + char = src[pos] + except IndexError: + break + if char != "\n": + raise suffixed_err( + src, pos, "Expected newline or end of document after a statement" + ) + pos += 1 + + return out.data.dict + + +class Flags: + """Flags that map to parsed keys/namespaces.""" + + # Marks an immutable namespace (inline array or inline table). + FROZEN = 0 + # Marks a nest that has been explicitly created and can no longer + # be opened using the "[table]" syntax. + EXPLICIT_NEST = 1 + + def __init__(self) -> None: + self._flags: dict[str, dict] = {} + self._pending_flags: set[tuple[Key, int]] = set() + + def add_pending(self, key: Key, flag: int) -> None: + self._pending_flags.add((key, flag)) + + def finalize_pending(self) -> None: + for key, flag in self._pending_flags: + self.set(key, flag, recursive=False) + self._pending_flags.clear() + + def unset_all(self, key: Key) -> None: + cont = self._flags + for k in key[:-1]: + if k not in cont: + return + cont = cont[k]["nested"] + cont.pop(key[-1], None) + + def set(self, key: Key, flag: int, *, recursive: bool) -> None: # noqa: A003 + cont = self._flags + key_parent, key_stem = key[:-1], key[-1] + for k in key_parent: + if k not in cont: + cont[k] = {"flags": set(), "recursive_flags": set(), "nested": {}} + cont = cont[k]["nested"] + if key_stem not in cont: + cont[key_stem] = {"flags": set(), "recursive_flags": set(), "nested": {}} + cont[key_stem]["recursive_flags" if recursive else "flags"].add(flag) + + def is_(self, key: Key, flag: int) -> bool: + if not key: + return False # document root has no flags + cont = self._flags + for k in key[:-1]: + if k not in cont: + return False + inner_cont = cont[k] + if flag in inner_cont["recursive_flags"]: + return True + cont = inner_cont["nested"] + key_stem = key[-1] + if key_stem in cont: + cont = cont[key_stem] + return flag in cont["flags"] or flag in cont["recursive_flags"] + return False + + +class NestedDict: + def __init__(self) -> None: + # The parsed content of the TOML document + self.dict: dict[str, Any] = {} + + def get_or_create_nest( + self, + key: Key, + *, + access_lists: bool = True, + ) -> dict: + cont: Any = self.dict + for k in key: + if k not in cont: + cont[k] = {} + cont = cont[k] + if access_lists and isinstance(cont, list): + cont = cont[-1] + if not isinstance(cont, dict): + raise KeyError("There is no nest behind this key") + return cont + + def append_nest_to_list(self, key: Key) -> None: + cont = self.get_or_create_nest(key[:-1]) + last_key = key[-1] + if last_key in cont: + list_ = cont[last_key] + if not isinstance(list_, list): + raise KeyError("An object other than list found behind this key") + list_.append({}) + else: + cont[last_key] = [{}] + + +class Output(NamedTuple): + data: NestedDict + flags: Flags + + +def skip_chars(src: str, pos: Pos, chars: Iterable[str]) -> Pos: + try: + while src[pos] in chars: + pos += 1 + except IndexError: + pass + return pos + + +def skip_until( + src: str, + pos: Pos, + expect: str, + *, + error_on: frozenset[str], + error_on_eof: bool, +) -> Pos: + try: + new_pos = src.index(expect, pos) + except ValueError: + new_pos = len(src) + if error_on_eof: + raise suffixed_err(src, new_pos, f"Expected {expect!r}") from None + + if not error_on.isdisjoint(src[pos:new_pos]): + while src[pos] not in error_on: + pos += 1 + raise suffixed_err(src, pos, f"Found invalid character {src[pos]!r}") + return new_pos + + +def skip_comment(src: str, pos: Pos) -> Pos: + try: + char: str | None = src[pos] + except IndexError: + char = None + if char == "#": + return skip_until( + src, pos + 1, "\n", error_on=ILLEGAL_COMMENT_CHARS, error_on_eof=False + ) + return pos + + +def skip_comments_and_array_ws(src: str, pos: Pos) -> Pos: + while True: + pos_before_skip = pos + pos = skip_chars(src, pos, TOML_WS_AND_NEWLINE) + pos = skip_comment(src, pos) + if pos == pos_before_skip: + return pos + + +def create_dict_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]: + pos += 1 # Skip "[" + pos = skip_chars(src, pos, TOML_WS) + pos, key = parse_key(src, pos) + + if out.flags.is_(key, Flags.EXPLICIT_NEST) or out.flags.is_(key, Flags.FROZEN): + raise suffixed_err(src, pos, f"Cannot declare {key} twice") + out.flags.set(key, Flags.EXPLICIT_NEST, recursive=False) + try: + out.data.get_or_create_nest(key) + except KeyError: + raise suffixed_err(src, pos, "Cannot overwrite a value") from None + + if not src.startswith("]", pos): + raise suffixed_err(src, pos, "Expected ']' at the end of a table declaration") + return pos + 1, key + + +def create_list_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]: + pos += 2 # Skip "[[" + pos = skip_chars(src, pos, TOML_WS) + pos, key = parse_key(src, pos) + + if out.flags.is_(key, Flags.FROZEN): + raise suffixed_err(src, pos, f"Cannot mutate immutable namespace {key}") + # Free the namespace now that it points to another empty list item... + out.flags.unset_all(key) + # ...but this key precisely is still prohibited from table declaration + out.flags.set(key, Flags.EXPLICIT_NEST, recursive=False) + try: + out.data.append_nest_to_list(key) + except KeyError: + raise suffixed_err(src, pos, "Cannot overwrite a value") from None + + if not src.startswith("]]", pos): + raise suffixed_err(src, pos, "Expected ']]' at the end of an array declaration") + return pos + 2, key + + +def key_value_rule( + src: str, pos: Pos, out: Output, header: Key, parse_float: ParseFloat +) -> Pos: + pos, key, value = parse_key_value_pair(src, pos, parse_float) + key_parent, key_stem = key[:-1], key[-1] + abs_key_parent = header + key_parent + + relative_path_cont_keys = (header + key[:i] for i in range(1, len(key))) + for cont_key in relative_path_cont_keys: + # Check that dotted key syntax does not redefine an existing table + if out.flags.is_(cont_key, Flags.EXPLICIT_NEST): + raise suffixed_err(src, pos, f"Cannot redefine namespace {cont_key}") + # Containers in the relative path can't be opened with the table syntax or + # dotted key/value syntax in following table sections. + out.flags.add_pending(cont_key, Flags.EXPLICIT_NEST) + + if out.flags.is_(abs_key_parent, Flags.FROZEN): + raise suffixed_err( + src, pos, f"Cannot mutate immutable namespace {abs_key_parent}" + ) + + try: + nest = out.data.get_or_create_nest(abs_key_parent) + except KeyError: + raise suffixed_err(src, pos, "Cannot overwrite a value") from None + if key_stem in nest: + raise suffixed_err(src, pos, "Cannot overwrite a value") + # Mark inline table and array namespaces recursively immutable + if isinstance(value, (dict, list)): + out.flags.set(header + key, Flags.FROZEN, recursive=True) + nest[key_stem] = value + return pos + + +def parse_key_value_pair( + src: str, pos: Pos, parse_float: ParseFloat +) -> tuple[Pos, Key, Any]: + pos, key = parse_key(src, pos) + try: + char: str | None = src[pos] + except IndexError: + char = None + if char != "=": + raise suffixed_err(src, pos, "Expected '=' after a key in a key/value pair") + pos += 1 + pos = skip_chars(src, pos, TOML_WS) + pos, value = parse_value(src, pos, parse_float) + return pos, key, value + + +def parse_key(src: str, pos: Pos) -> tuple[Pos, Key]: + pos, key_part = parse_key_part(src, pos) + key: Key = (key_part,) + pos = skip_chars(src, pos, TOML_WS) + while True: + try: + char: str | None = src[pos] + except IndexError: + char = None + if char != ".": + return pos, key + pos += 1 + pos = skip_chars(src, pos, TOML_WS) + pos, key_part = parse_key_part(src, pos) + key += (key_part,) + pos = skip_chars(src, pos, TOML_WS) + + +def parse_key_part(src: str, pos: Pos) -> tuple[Pos, str]: + try: + char: str | None = src[pos] + except IndexError: + char = None + if char in BARE_KEY_CHARS: + start_pos = pos + pos = skip_chars(src, pos, BARE_KEY_CHARS) + return pos, src[start_pos:pos] + if char == "'": + return parse_literal_str(src, pos) + if char == '"': + return parse_one_line_basic_str(src, pos) + raise suffixed_err(src, pos, "Invalid initial character for a key part") + + +def parse_one_line_basic_str(src: str, pos: Pos) -> tuple[Pos, str]: + pos += 1 + return parse_basic_str(src, pos, multiline=False) + + +def parse_array(src: str, pos: Pos, parse_float: ParseFloat) -> tuple[Pos, list]: + pos += 1 + array: list = [] + + pos = skip_comments_and_array_ws(src, pos) + if src.startswith("]", pos): + return pos + 1, array + while True: + pos, val = parse_value(src, pos, parse_float) + array.append(val) + pos = skip_comments_and_array_ws(src, pos) + + c = src[pos : pos + 1] + if c == "]": + return pos + 1, array + if c != ",": + raise suffixed_err(src, pos, "Unclosed array") + pos += 1 + + pos = skip_comments_and_array_ws(src, pos) + if src.startswith("]", pos): + return pos + 1, array + + +def parse_inline_table(src: str, pos: Pos, parse_float: ParseFloat) -> tuple[Pos, dict]: + pos += 1 + nested_dict = NestedDict() + flags = Flags() + + pos = skip_chars(src, pos, TOML_WS) + if src.startswith("}", pos): + return pos + 1, nested_dict.dict + while True: + pos, key, value = parse_key_value_pair(src, pos, parse_float) + key_parent, key_stem = key[:-1], key[-1] + if flags.is_(key, Flags.FROZEN): + raise suffixed_err(src, pos, f"Cannot mutate immutable namespace {key}") + try: + nest = nested_dict.get_or_create_nest(key_parent, access_lists=False) + except KeyError: + raise suffixed_err(src, pos, "Cannot overwrite a value") from None + if key_stem in nest: + raise suffixed_err(src, pos, f"Duplicate inline table key {key_stem!r}") + nest[key_stem] = value + pos = skip_chars(src, pos, TOML_WS) + c = src[pos : pos + 1] + if c == "}": + return pos + 1, nested_dict.dict + if c != ",": + raise suffixed_err(src, pos, "Unclosed inline table") + if isinstance(value, (dict, list)): + flags.set(key, Flags.FROZEN, recursive=True) + pos += 1 + pos = skip_chars(src, pos, TOML_WS) + + +def parse_basic_str_escape( + src: str, pos: Pos, *, multiline: bool = False +) -> tuple[Pos, str]: + escape_id = src[pos : pos + 2] + pos += 2 + if multiline and escape_id in {"\\ ", "\\\t", "\\\n"}: + # Skip whitespace until next non-whitespace character or end of + # the doc. Error if non-whitespace is found before newline. + if escape_id != "\\\n": + pos = skip_chars(src, pos, TOML_WS) + try: + char = src[pos] + except IndexError: + return pos, "" + if char != "\n": + raise suffixed_err(src, pos, "Unescaped '\\' in a string") + pos += 1 + pos = skip_chars(src, pos, TOML_WS_AND_NEWLINE) + return pos, "" + if escape_id == "\\u": + return parse_hex_char(src, pos, 4) + if escape_id == "\\U": + return parse_hex_char(src, pos, 8) + try: + return pos, BASIC_STR_ESCAPE_REPLACEMENTS[escape_id] + except KeyError: + raise suffixed_err(src, pos, "Unescaped '\\' in a string") from None + + +def parse_basic_str_escape_multiline(src: str, pos: Pos) -> tuple[Pos, str]: + return parse_basic_str_escape(src, pos, multiline=True) + + +def parse_hex_char(src: str, pos: Pos, hex_len: int) -> tuple[Pos, str]: + hex_str = src[pos : pos + hex_len] + if len(hex_str) != hex_len or not HEXDIGIT_CHARS.issuperset(hex_str): + raise suffixed_err(src, pos, "Invalid hex value") + pos += hex_len + hex_int = int(hex_str, 16) + if not is_unicode_scalar_value(hex_int): + raise suffixed_err(src, pos, "Escaped character is not a Unicode scalar value") + return pos, chr(hex_int) + + +def parse_literal_str(src: str, pos: Pos) -> tuple[Pos, str]: + pos += 1 # Skip starting apostrophe + start_pos = pos + pos = skip_until( + src, pos, "'", error_on=ILLEGAL_LITERAL_STR_CHARS, error_on_eof=True + ) + return pos + 1, src[start_pos:pos] # Skip ending apostrophe + + +def parse_multiline_str(src: str, pos: Pos, *, literal: bool) -> tuple[Pos, str]: + pos += 3 + if src.startswith("\n", pos): + pos += 1 + + if literal: + delim = "'" + end_pos = skip_until( + src, + pos, + "'''", + error_on=ILLEGAL_MULTILINE_LITERAL_STR_CHARS, + error_on_eof=True, + ) + result = src[pos:end_pos] + pos = end_pos + 3 + else: + delim = '"' + pos, result = parse_basic_str(src, pos, multiline=True) + + # Add at maximum two extra apostrophes/quotes if the end sequence + # is 4 or 5 chars long instead of just 3. + if not src.startswith(delim, pos): + return pos, result + pos += 1 + if not src.startswith(delim, pos): + return pos, result + delim + pos += 1 + return pos, result + (delim * 2) + + +def parse_basic_str(src: str, pos: Pos, *, multiline: bool) -> tuple[Pos, str]: + if multiline: + error_on = ILLEGAL_MULTILINE_BASIC_STR_CHARS + parse_escapes = parse_basic_str_escape_multiline + else: + error_on = ILLEGAL_BASIC_STR_CHARS + parse_escapes = parse_basic_str_escape + result = "" + start_pos = pos + while True: + try: + char = src[pos] + except IndexError: + raise suffixed_err(src, pos, "Unterminated string") from None + if char == '"': + if not multiline: + return pos + 1, result + src[start_pos:pos] + if src.startswith('"""', pos): + return pos + 3, result + src[start_pos:pos] + pos += 1 + continue + if char == "\\": + result += src[start_pos:pos] + pos, parsed_escape = parse_escapes(src, pos) + result += parsed_escape + start_pos = pos + continue + if char in error_on: + raise suffixed_err(src, pos, f"Illegal character {char!r}") + pos += 1 + + +def parse_value( # noqa: C901 + src: str, pos: Pos, parse_float: ParseFloat +) -> tuple[Pos, Any]: + try: + char: str | None = src[pos] + except IndexError: + char = None + + # IMPORTANT: order conditions based on speed of checking and likelihood + + # Basic strings + if char == '"': + if src.startswith('"""', pos): + return parse_multiline_str(src, pos, literal=False) + return parse_one_line_basic_str(src, pos) + + # Literal strings + if char == "'": + if src.startswith("'''", pos): + return parse_multiline_str(src, pos, literal=True) + return parse_literal_str(src, pos) + + # Booleans + if char == "t": + if src.startswith("true", pos): + return pos + 4, True + if char == "f": + if src.startswith("false", pos): + return pos + 5, False + + # Arrays + if char == "[": + return parse_array(src, pos, parse_float) + + # Inline tables + if char == "{": + return parse_inline_table(src, pos, parse_float) + + # Dates and times + datetime_match = RE_DATETIME.match(src, pos) + if datetime_match: + try: + datetime_obj = match_to_datetime(datetime_match) + except ValueError as e: + raise suffixed_err(src, pos, "Invalid date or datetime") from e + return datetime_match.end(), datetime_obj + localtime_match = RE_LOCALTIME.match(src, pos) + if localtime_match: + return localtime_match.end(), match_to_localtime(localtime_match) + + # Integers and "normal" floats. + # The regex will greedily match any type starting with a decimal + # char, so needs to be located after handling of dates and times. + number_match = RE_NUMBER.match(src, pos) + if number_match: + return number_match.end(), match_to_number(number_match, parse_float) + + # Special floats + first_three = src[pos : pos + 3] + if first_three in {"inf", "nan"}: + return pos + 3, parse_float(first_three) + first_four = src[pos : pos + 4] + if first_four in {"-inf", "+inf", "-nan", "+nan"}: + return pos + 4, parse_float(first_four) + + raise suffixed_err(src, pos, "Invalid value") + + +def suffixed_err(src: str, pos: Pos, msg: str) -> TOMLDecodeError: + """Return a `TOMLDecodeError` where error message is suffixed with + coordinates in source.""" + + def coord_repr(src: str, pos: Pos) -> str: + if pos >= len(src): + return "end of document" + line = src.count("\n", 0, pos) + 1 + if line == 1: + column = pos + 1 + else: + column = pos - src.rindex("\n", 0, pos) + return f"line {line}, column {column}" + + return TOMLDecodeError(f"{msg} (at {coord_repr(src, pos)})") + + +def is_unicode_scalar_value(codepoint: int) -> bool: + return (0 <= codepoint <= 55295) or (57344 <= codepoint <= 1114111) + + +def make_safe_parse_float(parse_float: ParseFloat) -> ParseFloat: + """A decorator to make `parse_float` safe. + + `parse_float` must not return dicts or lists, because these types + would be mixed with parsed TOML tables and arrays, thus confusing + the parser. The returned decorated callable raises `ValueError` + instead of returning illegal types. + """ + # The default `float` callable never returns illegal types. Optimize it. + if parse_float is float: # type: ignore[comparison-overlap] + return float + + def safe_parse_float(float_str: str) -> Any: + float_value = parse_float(float_str) + if isinstance(float_value, (dict, list)): + raise ValueError("parse_float must not return dicts or lists") + return float_value + + return safe_parse_float diff --git a/dynaconf/vendor/tomllib/_re.py b/dynaconf/vendor/tomllib/_re.py new file mode 100644 index 000000000..053634537 --- /dev/null +++ b/dynaconf/vendor/tomllib/_re.py @@ -0,0 +1,106 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: 2021 Taneli Hukkinen + +from __future__ import annotations + +from datetime import date, datetime, time, timedelta, timezone, tzinfo +from functools import lru_cache +import re +from typing import Any + +from ._types import ParseFloat + +# E.g. +# - 00:32:00.999999 +# - 00:32:00 +_TIME_RE_STR = r"([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])(?:\.([0-9]{1,6})[0-9]*)?" + +RE_NUMBER = re.compile( + r""" +0 +(?: + x[0-9A-Fa-f](?:_?[0-9A-Fa-f])* # hex + | + b[01](?:_?[01])* # bin + | + o[0-7](?:_?[0-7])* # oct +) +| +[+-]?(?:0|[1-9](?:_?[0-9])*) # dec, integer part +(?P + (?:\.[0-9](?:_?[0-9])*)? # optional fractional part + (?:[eE][+-]?[0-9](?:_?[0-9])*)? # optional exponent part +) +""", + flags=re.VERBOSE, +) +RE_LOCALTIME = re.compile(_TIME_RE_STR) +RE_DATETIME = re.compile( + rf""" +([0-9]{{4}})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01]) # date, e.g. 1988-10-27 +(?: + [Tt ] + {_TIME_RE_STR} + (?:([Zz])|([+-])([01][0-9]|2[0-3]):([0-5][0-9]))? # optional time offset +)? +""", + flags=re.VERBOSE, +) + + +def match_to_datetime(match: re.Match) -> datetime | date: + """Convert a `RE_DATETIME` match to `datetime.datetime` or `datetime.date`. + + Raises ValueError if the match does not correspond to a valid date + or datetime. + """ + ( + year_str, + month_str, + day_str, + hour_str, + minute_str, + sec_str, + micros_str, + zulu_time, + offset_sign_str, + offset_hour_str, + offset_minute_str, + ) = match.groups() + year, month, day = int(year_str), int(month_str), int(day_str) + if hour_str is None: + return date(year, month, day) + hour, minute, sec = int(hour_str), int(minute_str), int(sec_str) + micros = int(micros_str.ljust(6, "0")) if micros_str else 0 + if offset_sign_str: + tz: tzinfo | None = cached_tz( + offset_hour_str, offset_minute_str, offset_sign_str + ) + elif zulu_time: + tz = timezone.utc + else: # local date-time + tz = None + return datetime(year, month, day, hour, minute, sec, micros, tzinfo=tz) + + +@lru_cache(maxsize=None) +def cached_tz(hour_str: str, minute_str: str, sign_str: str) -> timezone: + sign = 1 if sign_str == "+" else -1 + return timezone( + timedelta( + hours=sign * int(hour_str), + minutes=sign * int(minute_str), + ) + ) + + +def match_to_localtime(match: re.Match) -> time: + hour_str, minute_str, sec_str, micros_str = match.groups() + micros = int(micros_str.ljust(6, "0")) if micros_str else 0 + return time(int(hour_str), int(minute_str), int(sec_str), micros) + + +def match_to_number(match: re.Match, parse_float: ParseFloat) -> Any: + if match.group("floatpart"): + return parse_float(match.group()) + return int(match.group(), 0) diff --git a/dynaconf/vendor/tomllib/_types.py b/dynaconf/vendor/tomllib/_types.py new file mode 100644 index 000000000..68d70d9f9 --- /dev/null +++ b/dynaconf/vendor/tomllib/_types.py @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: 2021 Taneli Hukkinen + +from typing import Any, Callable, Tuple + +# Type annotations +ParseFloat = Callable[[str], Any] +Key = Tuple[str, ...] +Pos = int diff --git a/dynaconf/vendor/tomllib/_writer.py b/dynaconf/vendor/tomllib/_writer.py new file mode 100644 index 000000000..e67e53963 --- /dev/null +++ b/dynaconf/vendor/tomllib/_writer.py @@ -0,0 +1,202 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: 2021 Taneli Hukkinen + +from __future__ import annotations + +from collections.abc import Generator, Mapping +from datetime import date, datetime, time +from decimal import Decimal +import string +from types import MappingProxyType +from typing import Any, BinaryIO, NamedTuple + +ASCII_CTRL = frozenset(chr(i) for i in range(32)) | frozenset(chr(127)) +ILLEGAL_BASIC_STR_CHARS = frozenset('"\\') | ASCII_CTRL - frozenset("\t") +BARE_KEY_CHARS = frozenset(string.ascii_letters + string.digits + "-_") +ARRAY_TYPES = (list, tuple) +ARRAY_INDENT = " " * 4 +MAX_LINE_LENGTH = 100 + +COMPACT_ESCAPES = MappingProxyType( + { + "\u0008": "\\b", # backspace + "\u000A": "\\n", # linefeed + "\u000C": "\\f", # form feed + "\u000D": "\\r", # carriage return + "\u0022": '\\"', # quote + "\u005C": "\\\\", # backslash + } +) + + +def dump( + __obj: dict[str, Any], __fp: BinaryIO, *, multiline_strings: bool = False +) -> None: + ctx = Context(multiline_strings, {}) + for chunk in gen_table_chunks(__obj, ctx, name=""): + __fp.write(chunk.encode()) + + +def dumps(__obj: dict[str, Any], *, multiline_strings: bool = False) -> str: + ctx = Context(multiline_strings, {}) + return "".join(gen_table_chunks(__obj, ctx, name="")) + + +class Context(NamedTuple): + allow_multiline: bool + # cache rendered inline tables (mapping from object id to rendered inline table) + inline_table_cache: dict[int, str] + + +def gen_table_chunks( + table: Mapping[str, Any], + ctx: Context, + *, + name: str, + inside_aot: bool = False, +) -> Generator[str, None, None]: + yielded = False + literals = [] + tables: list[tuple[str, Any, bool]] = [] # => [(key, value, inside_aot)] + for k, v in table.items(): + if isinstance(v, dict): + tables.append((k, v, False)) + elif is_aot(v) and not all(is_suitable_inline_table(t, ctx) for t in v): + tables.extend((k, t, True) for t in v) + else: + literals.append((k, v)) + + if inside_aot or name and (literals or not tables): + yielded = True + yield f"[[{name}]]\n" if inside_aot else f"[{name}]\n" + + if literals: + yielded = True + for k, v in literals: + yield f"{format_key_part(k)} = {format_literal(v, ctx)}\n" + + for k, v, in_aot in tables: + if yielded: + yield "\n" + else: + yielded = True + key_part = format_key_part(k) + display_name = f"{name}.{key_part}" if name else key_part + yield from gen_table_chunks(v, ctx, name=display_name, inside_aot=in_aot) + + +def format_literal(obj: object, ctx: Context, *, nest_level: int = 0) -> str: + if isinstance(obj, bool): + return "true" if obj else "false" + if isinstance(obj, (int, float, date, datetime)): + return str(obj) + if isinstance(obj, Decimal): + return format_decimal(obj) + if isinstance(obj, time): + if obj.tzinfo: + raise ValueError("TOML does not support offset times") + return str(obj) + if isinstance(obj, str): + return format_string(obj, allow_multiline=ctx.allow_multiline) + if isinstance(obj, ARRAY_TYPES): + return format_inline_array(obj, ctx, nest_level) + if isinstance(obj, dict): + return format_inline_table(obj, ctx) + raise TypeError(f"Object of type {type(obj)} is not TOML serializable") + + +def format_decimal(obj: Decimal) -> str: + if obj.is_nan(): + return "nan" + if obj == Decimal("inf"): + return "inf" + if obj == Decimal("-inf"): + return "-inf" + return str(obj) + + +def format_inline_table(obj: dict, ctx: Context) -> str: + # check cache first + obj_id = id(obj) + if obj_id in ctx.inline_table_cache: + return ctx.inline_table_cache[obj_id] + + if not obj: + rendered = "{}" + else: + rendered = ( + "{ " + + ", ".join( + f"{format_key_part(k)} = {format_literal(v, ctx)}" + for k, v in obj.items() + ) + + " }" + ) + ctx.inline_table_cache[obj_id] = rendered + return rendered + + +def format_inline_array(obj: tuple | list, ctx: Context, nest_level: int) -> str: + if not obj: + return "[]" + item_indent = ARRAY_INDENT * (1 + nest_level) + closing_bracket_indent = ARRAY_INDENT * nest_level + return ( + "[\n" + + ",\n".join( + item_indent + format_literal(item, ctx, nest_level=nest_level + 1) + for item in obj + ) + + f",\n{closing_bracket_indent}]" + ) + + +def format_key_part(part: str) -> str: + if part and BARE_KEY_CHARS.issuperset(part): + return part + return format_string(part, allow_multiline=False) + + +def format_string(s: str, *, allow_multiline: bool) -> str: + do_multiline = allow_multiline and "\n" in s + if do_multiline: + result = '"""\n' + s = s.replace("\r\n", "\n") + else: + result = '"' + + pos = seq_start = 0 + while True: + try: + char = s[pos] + except IndexError: + result += s[seq_start:pos] + if do_multiline: + return result + '"""' + return result + '"' + if char in ILLEGAL_BASIC_STR_CHARS: + result += s[seq_start:pos] + if char in COMPACT_ESCAPES: + if do_multiline and char == "\n": + result += "\n" + else: + result += COMPACT_ESCAPES[char] + else: + result += "\\u" + hex(ord(char))[2:].rjust(4, "0") + seq_start = pos + 1 + pos += 1 + + +def is_aot(obj: Any) -> bool: + """Decides if an object behaves as an array of tables (i.e. a nonempty list + of dicts).""" + return bool( + isinstance(obj, ARRAY_TYPES) and obj and all(isinstance(v, dict) for v in obj) + ) + + +def is_suitable_inline_table(obj: dict, ctx: Context) -> bool: + """Use heuristics to decide if the inline-style representation is a good + choice for a given table.""" + rendered_inline = f"{ARRAY_INDENT}{format_inline_table(obj, ctx)}," + return len(rendered_inline) <= MAX_LINE_LENGTH and "\n" not in rendered_inline diff --git a/dynaconf/vendor/vendor.txt b/dynaconf/vendor/vendor.txt index add308df5..65f74aa33 100644 --- a/dynaconf/vendor/vendor.txt +++ b/dynaconf/vendor/vendor.txt @@ -1,5 +1,6 @@ python-box==4.2.3 toml==0.10.8 +tomli==2.0.1 click==7.1.x python-dotenv==0.13.0 ruamel.yaml==0.16.10 diff --git a/dynaconf/vendor/vendor_history b/dynaconf/vendor/vendor_history index 9fbac1e0f..1eef3a204 100644 --- a/dynaconf/vendor/vendor_history +++ b/dynaconf/vendor/vendor_history @@ -1,3 +1,22 @@ +## TOMLLIB + +- Sept 4, 2022 + +Added tomli as a vendored library to replace uiri/toml +this lib also has MIT license. +PAckage renamed to `tomllib` to be compatible with std lib on python 3.11 +Added tomli-w._write to the tomllib. + +## TOML + +- Sept 4, 2022 + +uiri/toml is kept as a backwards compatibility but tomllib has been +introduces as the default TOML parser. + +`toml` is a fallback if tomllib fails to parse the file. +that was made because `toml` allows unicode characters while tomllib +follows the spec strictly. ## BOX diff --git a/dynaconf/vendor/vendor_update.sh b/dynaconf/vendor/vendor_update.sh deleted file mode 100755 index 46606528d..000000000 --- a/dynaconf/vendor/vendor_update.sh +++ /dev/null @@ -1,31 +0,0 @@ -# NOTES: -# WE ARE NOT TOUCHING ruamel/yaml or box libraries -# We update only click, toml and dotenv - -rm -rf /tmp/dynaconf_vendoring -mkdir -p /tmp/dynaconf_vendoring - -# For each library clone its repository into /tmp/dynaconf_vendoring/ -git clone -b 8.1.3 https://github.com/pallets/click --depth 1 /tmp/dynaconf_vendoring/click -git clone -b 2.0.1 https://github.com/hukkin/tomli --depth 1 /tmp/dynaconf_vendoring/tomli -git clone -b v0.21.0 https://github.com/theskumar/python-dotenv --depth 1 /tmp/dynaconf_vendoring/python-dotenv - -# For each library copy its source code to dynaconf/vendor - -# click -rm -rf click -cp -r /tmp/dynaconf_vendoring/click/src/click click -# toml -rm -rf toml -cp -r /tmp/dynaconf_vendoring/tomli/src/tomli toml -# dotenv -rm -rf dotenv -cp -r /tmp/dynaconf_vendoring/python-dotenv/src/dotenv dotenv - -echo "Some import paths must be manually resolved" -git grep "import click" -git grep "from click" -git grep "import toml" -git grep "from toml" -git grep "import dotenv" -git grep "from dotenv" diff --git a/tests/test_toml_loader.py b/tests/test_toml_loader.py index 9a5276c95..ea7aa71b1 100644 --- a/tests/test_toml_loader.py +++ b/tests/test_toml_loader.py @@ -49,9 +49,40 @@ host = "othertoml.com" """ +INVALID_TOML_TO_BE_REMOVED_ON_4_0_0 = """ +[global] +secret = "@float 42" +password = 123456.0 +host = "othertoml.com" +emojis = "😀😀😀😀" +encoded_variable="This has accents like � and � � � � just to test encoding �" +# The above is not alowed by TOML, but it is allowed by Dynaconf < 4.0.0 +""" + + TOMLS = [TOML, TOML2] +def test_load_from_toml_with_invalid_unicode(tmpdir): + # THIS TEST MUST FAIL AND BE REMOVED ON 4.0.0 + load(settings, filename=INVALID_TOML_TO_BE_REMOVED_ON_4_0_0) + assert settings.ENCODED_VARIABLE == ( + "This has accents like � and � � � � just to test encoding �" + ) + + tmpfile = tmpdir.join("settings.toml") + with open(tmpfile.strpath, "w", encoding="utf-8") as f: + f.write(INVALID_TOML_TO_BE_REMOVED_ON_4_0_0) + + _settings = LazySettings( + settings_files=[tmpfile.strpath], environments=True + ) + assert _settings.ENCODED_VARIABLE == ( + "This has accents like � and � � � � just to test encoding �" + ) + assert _settings.EMOJIS == "😀😀😀😀" + + def test_load_from_toml(): """Assert loads from TOML string""" load(settings, filename=TOML)