Add RandomStringField for per-row DB-generated random strings

davegaeddert · davegaeddert · commit 34858abbd585 · 2026-04-16T10:33:57.000-05:00
Ship `RandomStringField(length=, alphabet=)` as a DB-side replacement
for the `default=callable_returning_secrets.token_hex()` pattern. The
column carries a `DEFAULT` built from concatenated `substr(...,
mod(get_byte(gen_random_uuid()...)))` so Postgres generates a fresh
string per row — no Python callable involved.

Also folds in the fixes needed to make the feature work end-to-end:

- `Field.deconstruct` now shortens `plain.postgres.fields.&lt;sub&gt;.X` to
  `plain.postgres.X` only when the class is re-exported at the top
  level. Fixes a latent post-split bug where migrations emitted
  `plain.postgres.text.XField` against a module that doesn't exist,
  and lets `EncryptedTextField` / `EncryptedJSONField` drop their
  manual path overrides.
- `normalize_default_sql` gains `_strip_redundant_parens` to flatten
  pg_get_expr's grouping parens (`(gen_random_uuid())`,
  `(1 + mod(...))`) without touching function-call or row-constructor
  parens. Drops the old trailing `_strip_balanced_parens` since the
  new pass covers the same cases without eating tuple wrappers.
diff --git a/plain-postgres/plain/postgres/README.md b/plain-postgres/plain/postgres/README.md
@@ -732,10 +732,11 @@ class Product(postgres.Model):
 **Other fields:**
 
 - [`BooleanField`](./fields/__init__.py#BooleanField) - True/False
-- [`UUIDField`](./fields/__init__.py#UUIDField) - UUID
+- [`UUIDField`](./fields/__init__.py#UUIDField) - UUID (pass `generate=True` for a per-row `gen_random_uuid()` default)
 - [`BinaryField`](./fields/__init__.py#BinaryField) - Raw binary data
 - [`JSONField`](./fields/json.py#JSONField) - JSON data
 - [`GenericIPAddressField`](./fields/__init__.py#GenericIPAddressField) - IPv4 or IPv6 address
+- [`RandomStringField`](./fields/text.py#RandomStringField) - Per-row random string generated by Postgres (`length=`, `alphabet=`) — use for tokens, slugs, short IDs instead of a Python callable default. Pass a power-of-two `alphabet=` (16/32/64 chars) for a uniform distribution; the default 36-char alphabet has a small modulo bias and isn't suitable for cryptographically-sensitive tokens
 
 **Encrypted fields:**
 
diff --git a/plain-postgres/plain/postgres/__init__.py b/plain-postgres/plain/postgres/__init__.py
@@ -23,6 +23,7 @@
     GenericIPAddressField,
     IntegerField,
     PrimaryKeyField,
+    RandomStringField,
     SmallIntegerField,
     TextField,
     TimeField,
@@ -68,6 +69,7 @@
     "GenericIPAddressField",
     "IntegerField",
     "PrimaryKeyField",
+    "RandomStringField",
     "SmallIntegerField",
     "TextField",
     "TimeField",
diff --git a/plain-postgres/plain/postgres/fields/__init__.py b/plain-postgres/plain/postgres/fields/__init__.py
@@ -22,7 +22,7 @@
     SmallIntegerField,
 )
 from .temporal import DateField, DateTimeField, TimeField
-from .text import EmailField, TextField, URLField
+from .text import EmailField, RandomStringField, TextField, URLField
 from .uuid import UUIDField
 
 __all__ = [
@@ -42,6 +42,7 @@
     "GenericIPAddressField",
     "IntegerField",
     "NOT_PROVIDED",
+    "RandomStringField",
     "SmallIntegerField",
     "TextField",
     "TimeField",
diff --git a/plain-postgres/plain/postgres/fields/base.py b/plain-postgres/plain/postgres/fields/base.py
@@ -249,19 +249,17 @@ def deconstruct(self) -> tuple[str | None, str, list[Any], dict[str, Any]]:
         values.
         """
         keywords: dict[str, Any] = {}
-        # Work out path - we shorten it for known Plain core fields
         path = f"{self.__class__.__module__}.{self.__class__.__qualname__}"
-        if path.startswith("plain.postgres.fields.related"):
-            path = path.replace("plain.postgres.fields.related", "plain.postgres")
-        elif path.startswith("plain.postgres.fields.json"):
-            path = path.replace("plain.postgres.fields.json", "plain.postgres")
-        elif path.startswith("plain.postgres.fields.proxy"):
-            path = path.replace("plain.postgres.fields.proxy", "plain.postgres")
-        elif path.startswith("plain.postgres.fields.timezones"):
-            path = path.replace("plain.postgres.fields.timezones", "plain.postgres")
-        elif path.startswith("plain.postgres.fields"):
-            path = path.replace("plain.postgres.fields", "plain.postgres")
-        # Return basic info - other fields should override this.
+        # Shorten `plain.postgres.fields.<submod>.X` to `plain.postgres.X`
+        # when the class is re-exported at the top-level `plain.postgres`
+        # namespace. The real submodule (`plain.postgres.fields.text`) is
+        # importable but the shortened form is what migration files use.
+        if path.startswith("plain.postgres.fields."):
+            import plain.postgres as _postgres_root
+
+            cls_name = self.__class__.__qualname__
+            if getattr(_postgres_root, cls_name, None) is self.__class__:
+                path = f"plain.postgres.{cls_name}"
         # Note: self.name can be None during migration state rendering when fields are cloned
         return (self.name, path, [], keywords)
 
diff --git a/plain-postgres/plain/postgres/fields/encrypted.py b/plain-postgres/plain/postgres/fields/encrypted.py
@@ -248,10 +248,6 @@ def from_db_value(
 
     def deconstruct(self) -> tuple[str | None, str, list[Any], dict[str, Any]]:
         name, path, args, kwargs = super().deconstruct()
-        # Override the path rewrite from Field.deconstruct() which would
-        # shorten "plain.postgres.fields.encrypted" to "plain.postgres.encrypted"
-        # (a module that doesn't exist).
-        path = f"{self.__class__.__module__}.{self.__class__.__qualname__}"
         if self.max_length is not None:
             kwargs["max_length"] = self.max_length
         return name, path, args, kwargs
@@ -297,9 +293,6 @@ def __init__(
 
     def deconstruct(self) -> tuple[str | None, str, list[Any], dict[str, Any]]:
         name, path, args, kwargs = super().deconstruct()
-        # Override the path rewrite from Field.deconstruct() which would
-        # shorten to a nonexistent module (same pattern as EncryptedTextField).
-        path = f"{self.__class__.__module__}.{self.__class__.__qualname__}"
         if self.encoder is not None:
             kwargs["encoder"] = self.encoder
         if self.decoder is not None:
diff --git a/plain-postgres/plain/postgres/fields/text.py b/plain-postgres/plain/postgres/fields/text.py
@@ -1,13 +1,16 @@
 from __future__ import annotations
 
 from collections.abc import Callable, Sequence
-from typing import Any
+from typing import TYPE_CHECKING, Any
 
 from plain import validators
 from plain.preflight import PreflightResult
 from plain.validators import MaxLengthValidator
 
-from .base import NOT_PROVIDED, ChoicesField
+from .base import NOT_PROVIDED, ChoicesField, ColumnField
+
+if TYPE_CHECKING:
+    from plain.postgres.functions.random import RandomString
 
 
 class TextField(ChoicesField[str]):
@@ -90,3 +93,48 @@ class EmailField(TextField):
 
 class URLField(TextField):
     default_validators = [validators.URLValidator()]
+
+
+class RandomStringField(ColumnField[str]):
+    """Text column whose value is generated by Postgres on INSERT.
+
+    The column carries a ``DEFAULT`` that evaluates per row, so raw SQL and
+    ORM inserts both get a fresh random string of exactly ``length``
+    characters drawn from ``alphabet``. Pass an explicit value at
+    ``create()`` time to override.
+    """
+
+    db_type_sql = "text"
+
+    def __init__(
+        self,
+        *,
+        length: int,
+        alphabet: str | None = None,
+        required: bool = True,
+        allow_null: bool = False,
+        validators: Sequence[Callable[..., Any]] = (),
+    ):
+        from plain.postgres.functions.random import DEFAULT_ALPHABET, RandomString
+
+        self._expression = RandomString(
+            length=length,
+            alphabet=DEFAULT_ALPHABET if alphabet is None else alphabet,
+        )
+        super().__init__(
+            required=required,
+            allow_null=allow_null,
+            validators=validators,
+        )
+
+    def get_db_default_expression(self) -> RandomString:
+        return self._expression
+
+    def deconstruct(self) -> tuple[str | None, str, list[Any], dict[str, Any]]:
+        from plain.postgres.functions.random import DEFAULT_ALPHABET
+
+        name, path, args, kwargs = super().deconstruct()
+        kwargs["length"] = self._expression.length
+        if self._expression.alphabet != DEFAULT_ALPHABET:
+            kwargs["alphabet"] = self._expression.alphabet
+        return name, path, args, kwargs
diff --git a/plain-postgres/plain/postgres/functions/__init__.py b/plain-postgres/plain/postgres/functions/__init__.py
@@ -50,6 +50,7 @@
     Sqrt,
     Tan,
 )
+from .random import RandomString
 from .text import (
     MD5,
     SHA1,
@@ -175,6 +176,8 @@
     "Substr",
     "Trim",
     "Upper",
+    # random
+    "RandomString",
     # uuid
     "GenRandomUUID",
     # window
diff --git a/plain-postgres/plain/postgres/functions/random.py b/plain-postgres/plain/postgres/functions/random.py
@@ -0,0 +1,76 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+
+from plain.postgres.expressions import Func
+from plain.postgres.fields import TextField
+
+if TYPE_CHECKING:
+    from plain.postgres.connection import DatabaseConnection
+    from plain.postgres.sql.compiler import SQLCompiler
+
+
+DEFAULT_ALPHABET = "abcdefghijklmnopqrstuvwxyz0123456789"
+
+
+class RandomString(Func):
+    """Parameter-free SQL expression that produces an N-char random string.
+
+    Randomness comes from ``gen_random_uuid()`` (OS CSPRNG-backed). Each
+    character draws one byte (0-255) and reduces it via ``mod(byte, len)``,
+    so any ``len(alphabet)`` that isn't a power of two (16, 32, 64, 128)
+    produces a non-uniform distribution. The default 36-char alphabet has
+    ~12% over-representation on the first 4 characters (``256 mod 36 == 4``).
+
+    Intended for short identifiers, slugs, and tokens. Pass a power-of-two
+    ``alphabet=`` when uniformity matters; use a different mechanism entirely
+    for anything security-sensitive.
+    """
+
+    output_field = TextField()
+
+    def __init__(
+        self,
+        length: int,
+        alphabet: str = DEFAULT_ALPHABET,
+    ) -> None:
+        if length < 1:
+            raise ValueError("RandomString length must be >= 1")
+        if not alphabet:
+            raise ValueError("RandomString alphabet must be non-empty")
+        if len(alphabet) > 256:
+            raise ValueError(
+                "RandomString alphabet must be at most 256 characters "
+                f"(got {len(alphabet)})."
+            )
+        # `%` collides with psycopg's placeholder syntax and `'` would need
+        # escaping inside the DDL string literal. Neither is a reasonable
+        # character for a token/slug alphabet; reject both so the SQL stays
+        # simple and the generated DEFAULT compares cleanly byte-for-byte
+        # against pg_get_expr output.
+        if "%" in alphabet or "'" in alphabet:
+            raise ValueError("RandomString alphabet must not contain '%' or \"'\".")
+        self.length = length
+        self.alphabet = alphabet
+        super().__init__()
+
+    def as_sql(
+        self,
+        compiler: SQLCompiler,
+        connection: DatabaseConnection,
+        function: str | None = None,
+        template: str | None = None,
+        arg_joiner: str | None = None,
+        **extra_context: Any,
+    ) -> tuple[str, list[Any]]:
+        # `mod(a, b)` rather than `a % b` — psycopg would mistake `%` for a
+        # placeholder. Alphabet is guaranteed by __init__ to contain neither
+        # `%` nor `'`, so no escaping is needed here.
+        alpha_len = len(self.alphabet)
+        char_sql = (
+            f"substr('{self.alphabet}', "
+            f"1 + mod(get_byte("
+            f"decode(replace(gen_random_uuid()::text, '-', ''), 'hex'), 0"
+            f"), {alpha_len}), 1)"
+        )
+        return "(" + " || ".join([char_sql] * self.length) + ")", []
diff --git a/plain-postgres/plain/postgres/introspection/schema.py b/plain-postgres/plain/postgres/introspection/schema.py
@@ -209,6 +209,115 @@ def _strip_balanced_parens(s: str) -> str:
     return s
 
 
+def _strip_redundant_parens(s: str) -> str:
+    """Strip balanced ``(...)`` groups that don't alter expression meaning.
+
+    pg_get_expr rewrites stored defaults with aggressive grouping parens
+    (e.g. ``(gen_random_uuid())``, ``(1 + mod(...))``) that the ORM compiler
+    doesn't emit. For DEFAULT-expression drift comparison we normalize both
+    sides by flattening every redundant paren pair outside string literals.
+
+    Caveat: this does not preserve operator precedence — `(a + b) * c` and
+    `a + b * c` would normalize identically. That's acceptable here because
+    both sides come from the same expression source, so precedence is
+    consistent.
+    """
+    if "(" not in s:
+        return s
+    out: list[str] = []
+    n = len(s)
+    i = 0
+    in_single = False
+    while i < n:
+        ch = s[i]
+        if in_single:
+            out.append(ch)
+            if ch == "'":
+                # SQL doubles single quotes to escape them inside literals.
+                if i + 1 < n and s[i + 1] == "'":
+                    out.append(s[i + 1])
+                    i += 2
+                    continue
+                in_single = False
+            i += 1
+            continue
+        if ch == "'":
+            out.append(ch)
+            in_single = True
+            i += 1
+            continue
+        if ch == "(":
+            # Find the matching `)` at the same depth.
+            depth = 1
+            j = i + 1
+            j_in_single = False
+            while j < n and depth:
+                cj = s[j]
+                if j_in_single:
+                    if cj == "'":
+                        if j + 1 < n and s[j + 1] == "'":
+                            j += 2
+                            continue
+                        j_in_single = False
+                elif cj == "'":
+                    j_in_single = True
+                elif cj == "(":
+                    depth += 1
+                elif cj == ")":
+                    depth -= 1
+                j += 1
+            if depth != 0:
+                # Unbalanced — leave the rest alone.
+                out.append(s[i:])
+                break
+            inner = s[i + 1 : j - 1]
+            stripped_inner = _strip_redundant_parens(inner)
+            # A `(...)` is a function call's argument list when the char
+            # immediately before it is an identifier char — those parens are
+            # part of the call syntax and must stay.
+            prev = out[-1] if out else ""
+            is_function_args = bool(prev) and (prev.isalnum() or prev == "_")
+            # Otherwise the parens are grouping: redundant iff the enclosed
+            # expression contains no top-level comma (a comma would mean
+            # we're inside a tuple/row-constructor, not a grouping).
+            if is_function_args or _has_top_level_comma(stripped_inner):
+                out.append("(" + stripped_inner + ")")
+            else:
+                out.append(stripped_inner)
+            i = j
+            continue
+        out.append(ch)
+        i += 1
+    return "".join(out)
+
+
+def _has_top_level_comma(s: str) -> bool:
+    depth = 0
+    in_single = False
+    i = 0
+    n = len(s)
+    while i < n:
+        ch = s[i]
+        if in_single:
+            if ch == "'":
+                if i + 1 < n and s[i + 1] == "'":
+                    i += 2
+                    continue
+                in_single = False
+            i += 1
+            continue
+        if ch == "'":
+            in_single = True
+        elif ch == "(":
+            depth += 1
+        elif ch == ")":
+            depth -= 1
+        elif ch == "," and depth == 0:
+            return True
+        i += 1
+    return False
+
+
 def _normalize_sql(s: str) -> str:
     """Lowercase keywords/identifiers, strip quotes, collapse whitespace."""
     s = sqlparse.format(
@@ -420,7 +529,7 @@ def normalize_default_sql(s: str) -> str:
     """
     s = _normalize_sql(s)
     s = _strip_type_casts(s)
-    s = _strip_balanced_parens(s)
+    s = _strip_redundant_parens(s)
     return s
 
 
diff --git a/plain-postgres/plain/postgres/types.py b/plain-postgres/plain/postgres/types.py
@@ -31,6 +31,7 @@ class User(postgres.Model):
     GenericIPAddressField,
     IntegerField,
     PrimaryKeyField,
+    RandomStringField,
     SmallIntegerField,
     TextField,
     TimeField,
@@ -75,6 +76,7 @@ class User(postgres.Model):
     "ReverseForeignKeyManager",
     "ReverseManyToMany",
     "PrimaryKeyField",
+    "RandomStringField",
     "SmallIntegerField",
     "TextField",
     "TimeField",
diff --git a/plain-postgres/plain/postgres/types.pyi b/plain-postgres/plain/postgres/types.pyi
diff --git a/plain-postgres/tests/app/examples/migrations/0017_random_string_token.py b/plain-postgres/tests/app/examples/migrations/0017_random_string_token.py
diff --git a/plain-postgres/tests/app/examples/models/defaults.py b/plain-postgres/tests/app/examples/models/defaults.py
diff --git a/plain-postgres/tests/test_introspection.py b/plain-postgres/tests/test_introspection.py
diff --git a/plain-postgres/tests/test_random_string_field.py b/plain-postgres/tests/test_random_string_field.py