Skip to content

Commit 34858ab

Browse files
committed
Add RandomStringField for per-row DB-generated random strings
Ship `RandomStringField(length=, alphabet=)` as a DB-side replacement for the `default=callable_returning_secrets.token_hex()` pattern. The column carries a `DEFAULT` built from concatenated `substr(..., mod(get_byte(gen_random_uuid()...)))` so Postgres generates a fresh string per row — no Python callable involved. Also folds in the fixes needed to make the feature work end-to-end: - `Field.deconstruct` now shortens `plain.postgres.fields.<sub>.X` to `plain.postgres.X` only when the class is re-exported at the top level. Fixes a latent post-split bug where migrations emitted `plain.postgres.text.XField` against a module that doesn't exist, and lets `EncryptedTextField` / `EncryptedJSONField` drop their manual path overrides. - `normalize_default_sql` gains `_strip_redundant_parens` to flatten pg_get_expr's grouping parens (`(gen_random_uuid())`, `(1 + mod(...))`) without touching function-call or row-constructor parens. Drops the old trailing `_strip_balanced_parens` since the new pass covers the same cases without eating tuple wrappers.
1 parent c59473d commit 34858ab

15 files changed

Lines changed: 442 additions & 24 deletions

File tree

plain-postgres/plain/postgres/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -732,10 +732,11 @@ class Product(postgres.Model):
732732
**Other fields:**
733733

734734
- [`BooleanField`](./fields/__init__.py#BooleanField) - True/False
735-
- [`UUIDField`](./fields/__init__.py#UUIDField) - UUID
735+
- [`UUIDField`](./fields/__init__.py#UUIDField) - UUID (pass `generate=True` for a per-row `gen_random_uuid()` default)
736736
- [`BinaryField`](./fields/__init__.py#BinaryField) - Raw binary data
737737
- [`JSONField`](./fields/json.py#JSONField) - JSON data
738738
- [`GenericIPAddressField`](./fields/__init__.py#GenericIPAddressField) - IPv4 or IPv6 address
739+
- [`RandomStringField`](./fields/text.py#RandomStringField) - Per-row random string generated by Postgres (`length=`, `alphabet=`) — use for tokens, slugs, short IDs instead of a Python callable default. Pass a power-of-two `alphabet=` (16/32/64 chars) for a uniform distribution; the default 36-char alphabet has a small modulo bias and isn't suitable for cryptographically-sensitive tokens
739740

740741
**Encrypted fields:**
741742

plain-postgres/plain/postgres/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
GenericIPAddressField,
2424
IntegerField,
2525
PrimaryKeyField,
26+
RandomStringField,
2627
SmallIntegerField,
2728
TextField,
2829
TimeField,
@@ -68,6 +69,7 @@
6869
"GenericIPAddressField",
6970
"IntegerField",
7071
"PrimaryKeyField",
72+
"RandomStringField",
7173
"SmallIntegerField",
7274
"TextField",
7375
"TimeField",

plain-postgres/plain/postgres/fields/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
SmallIntegerField,
2323
)
2424
from .temporal import DateField, DateTimeField, TimeField
25-
from .text import EmailField, TextField, URLField
25+
from .text import EmailField, RandomStringField, TextField, URLField
2626
from .uuid import UUIDField
2727

2828
__all__ = [
@@ -42,6 +42,7 @@
4242
"GenericIPAddressField",
4343
"IntegerField",
4444
"NOT_PROVIDED",
45+
"RandomStringField",
4546
"SmallIntegerField",
4647
"TextField",
4748
"TimeField",

plain-postgres/plain/postgres/fields/base.py

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -249,19 +249,17 @@ def deconstruct(self) -> tuple[str | None, str, list[Any], dict[str, Any]]:
249249
values.
250250
"""
251251
keywords: dict[str, Any] = {}
252-
# Work out path - we shorten it for known Plain core fields
253252
path = f"{self.__class__.__module__}.{self.__class__.__qualname__}"
254-
if path.startswith("plain.postgres.fields.related"):
255-
path = path.replace("plain.postgres.fields.related", "plain.postgres")
256-
elif path.startswith("plain.postgres.fields.json"):
257-
path = path.replace("plain.postgres.fields.json", "plain.postgres")
258-
elif path.startswith("plain.postgres.fields.proxy"):
259-
path = path.replace("plain.postgres.fields.proxy", "plain.postgres")
260-
elif path.startswith("plain.postgres.fields.timezones"):
261-
path = path.replace("plain.postgres.fields.timezones", "plain.postgres")
262-
elif path.startswith("plain.postgres.fields"):
263-
path = path.replace("plain.postgres.fields", "plain.postgres")
264-
# Return basic info - other fields should override this.
253+
# Shorten `plain.postgres.fields.<submod>.X` to `plain.postgres.X`
254+
# when the class is re-exported at the top-level `plain.postgres`
255+
# namespace. The real submodule (`plain.postgres.fields.text`) is
256+
# importable but the shortened form is what migration files use.
257+
if path.startswith("plain.postgres.fields."):
258+
import plain.postgres as _postgres_root
259+
260+
cls_name = self.__class__.__qualname__
261+
if getattr(_postgres_root, cls_name, None) is self.__class__:
262+
path = f"plain.postgres.{cls_name}"
265263
# Note: self.name can be None during migration state rendering when fields are cloned
266264
return (self.name, path, [], keywords)
267265

plain-postgres/plain/postgres/fields/encrypted.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -248,10 +248,6 @@ def from_db_value(
248248

249249
def deconstruct(self) -> tuple[str | None, str, list[Any], dict[str, Any]]:
250250
name, path, args, kwargs = super().deconstruct()
251-
# Override the path rewrite from Field.deconstruct() which would
252-
# shorten "plain.postgres.fields.encrypted" to "plain.postgres.encrypted"
253-
# (a module that doesn't exist).
254-
path = f"{self.__class__.__module__}.{self.__class__.__qualname__}"
255251
if self.max_length is not None:
256252
kwargs["max_length"] = self.max_length
257253
return name, path, args, kwargs
@@ -297,9 +293,6 @@ def __init__(
297293

298294
def deconstruct(self) -> tuple[str | None, str, list[Any], dict[str, Any]]:
299295
name, path, args, kwargs = super().deconstruct()
300-
# Override the path rewrite from Field.deconstruct() which would
301-
# shorten to a nonexistent module (same pattern as EncryptedTextField).
302-
path = f"{self.__class__.__module__}.{self.__class__.__qualname__}"
303296
if self.encoder is not None:
304297
kwargs["encoder"] = self.encoder
305298
if self.decoder is not None:

plain-postgres/plain/postgres/fields/text.py

Lines changed: 50 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
from __future__ import annotations
22

33
from collections.abc import Callable, Sequence
4-
from typing import Any
4+
from typing import TYPE_CHECKING, Any
55

66
from plain import validators
77
from plain.preflight import PreflightResult
88
from plain.validators import MaxLengthValidator
99

10-
from .base import NOT_PROVIDED, ChoicesField
10+
from .base import NOT_PROVIDED, ChoicesField, ColumnField
11+
12+
if TYPE_CHECKING:
13+
from plain.postgres.functions.random import RandomString
1114

1215

1316
class TextField(ChoicesField[str]):
@@ -90,3 +93,48 @@ class EmailField(TextField):
9093

9194
class URLField(TextField):
9295
default_validators = [validators.URLValidator()]
96+
97+
98+
class RandomStringField(ColumnField[str]):
99+
"""Text column whose value is generated by Postgres on INSERT.
100+
101+
The column carries a ``DEFAULT`` that evaluates per row, so raw SQL and
102+
ORM inserts both get a fresh random string of exactly ``length``
103+
characters drawn from ``alphabet``. Pass an explicit value at
104+
``create()`` time to override.
105+
"""
106+
107+
db_type_sql = "text"
108+
109+
def __init__(
110+
self,
111+
*,
112+
length: int,
113+
alphabet: str | None = None,
114+
required: bool = True,
115+
allow_null: bool = False,
116+
validators: Sequence[Callable[..., Any]] = (),
117+
):
118+
from plain.postgres.functions.random import DEFAULT_ALPHABET, RandomString
119+
120+
self._expression = RandomString(
121+
length=length,
122+
alphabet=DEFAULT_ALPHABET if alphabet is None else alphabet,
123+
)
124+
super().__init__(
125+
required=required,
126+
allow_null=allow_null,
127+
validators=validators,
128+
)
129+
130+
def get_db_default_expression(self) -> RandomString:
131+
return self._expression
132+
133+
def deconstruct(self) -> tuple[str | None, str, list[Any], dict[str, Any]]:
134+
from plain.postgres.functions.random import DEFAULT_ALPHABET
135+
136+
name, path, args, kwargs = super().deconstruct()
137+
kwargs["length"] = self._expression.length
138+
if self._expression.alphabet != DEFAULT_ALPHABET:
139+
kwargs["alphabet"] = self._expression.alphabet
140+
return name, path, args, kwargs

plain-postgres/plain/postgres/functions/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
Sqrt,
5151
Tan,
5252
)
53+
from .random import RandomString
5354
from .text import (
5455
MD5,
5556
SHA1,
@@ -175,6 +176,8 @@
175176
"Substr",
176177
"Trim",
177178
"Upper",
179+
# random
180+
"RandomString",
178181
# uuid
179182
"GenRandomUUID",
180183
# window
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
from __future__ import annotations
2+
3+
from typing import TYPE_CHECKING, Any
4+
5+
from plain.postgres.expressions import Func
6+
from plain.postgres.fields import TextField
7+
8+
if TYPE_CHECKING:
9+
from plain.postgres.connection import DatabaseConnection
10+
from plain.postgres.sql.compiler import SQLCompiler
11+
12+
13+
DEFAULT_ALPHABET = "abcdefghijklmnopqrstuvwxyz0123456789"
14+
15+
16+
class RandomString(Func):
17+
"""Parameter-free SQL expression that produces an N-char random string.
18+
19+
Randomness comes from ``gen_random_uuid()`` (OS CSPRNG-backed). Each
20+
character draws one byte (0-255) and reduces it via ``mod(byte, len)``,
21+
so any ``len(alphabet)`` that isn't a power of two (16, 32, 64, 128)
22+
produces a non-uniform distribution. The default 36-char alphabet has
23+
~12% over-representation on the first 4 characters (``256 mod 36 == 4``).
24+
25+
Intended for short identifiers, slugs, and tokens. Pass a power-of-two
26+
``alphabet=`` when uniformity matters; use a different mechanism entirely
27+
for anything security-sensitive.
28+
"""
29+
30+
output_field = TextField()
31+
32+
def __init__(
33+
self,
34+
length: int,
35+
alphabet: str = DEFAULT_ALPHABET,
36+
) -> None:
37+
if length < 1:
38+
raise ValueError("RandomString length must be >= 1")
39+
if not alphabet:
40+
raise ValueError("RandomString alphabet must be non-empty")
41+
if len(alphabet) > 256:
42+
raise ValueError(
43+
"RandomString alphabet must be at most 256 characters "
44+
f"(got {len(alphabet)})."
45+
)
46+
# `%` collides with psycopg's placeholder syntax and `'` would need
47+
# escaping inside the DDL string literal. Neither is a reasonable
48+
# character for a token/slug alphabet; reject both so the SQL stays
49+
# simple and the generated DEFAULT compares cleanly byte-for-byte
50+
# against pg_get_expr output.
51+
if "%" in alphabet or "'" in alphabet:
52+
raise ValueError("RandomString alphabet must not contain '%' or \"'\".")
53+
self.length = length
54+
self.alphabet = alphabet
55+
super().__init__()
56+
57+
def as_sql(
58+
self,
59+
compiler: SQLCompiler,
60+
connection: DatabaseConnection,
61+
function: str | None = None,
62+
template: str | None = None,
63+
arg_joiner: str | None = None,
64+
**extra_context: Any,
65+
) -> tuple[str, list[Any]]:
66+
# `mod(a, b)` rather than `a % b` — psycopg would mistake `%` for a
67+
# placeholder. Alphabet is guaranteed by __init__ to contain neither
68+
# `%` nor `'`, so no escaping is needed here.
69+
alpha_len = len(self.alphabet)
70+
char_sql = (
71+
f"substr('{self.alphabet}', "
72+
f"1 + mod(get_byte("
73+
f"decode(replace(gen_random_uuid()::text, '-', ''), 'hex'), 0"
74+
f"), {alpha_len}), 1)"
75+
)
76+
return "(" + " || ".join([char_sql] * self.length) + ")", []

plain-postgres/plain/postgres/introspection/schema.py

Lines changed: 110 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,115 @@ def _strip_balanced_parens(s: str) -> str:
209209
return s
210210

211211

212+
def _strip_redundant_parens(s: str) -> str:
213+
"""Strip balanced ``(...)`` groups that don't alter expression meaning.
214+
215+
pg_get_expr rewrites stored defaults with aggressive grouping parens
216+
(e.g. ``(gen_random_uuid())``, ``(1 + mod(...))``) that the ORM compiler
217+
doesn't emit. For DEFAULT-expression drift comparison we normalize both
218+
sides by flattening every redundant paren pair outside string literals.
219+
220+
Caveat: this does not preserve operator precedence — `(a + b) * c` and
221+
`a + b * c` would normalize identically. That's acceptable here because
222+
both sides come from the same expression source, so precedence is
223+
consistent.
224+
"""
225+
if "(" not in s:
226+
return s
227+
out: list[str] = []
228+
n = len(s)
229+
i = 0
230+
in_single = False
231+
while i < n:
232+
ch = s[i]
233+
if in_single:
234+
out.append(ch)
235+
if ch == "'":
236+
# SQL doubles single quotes to escape them inside literals.
237+
if i + 1 < n and s[i + 1] == "'":
238+
out.append(s[i + 1])
239+
i += 2
240+
continue
241+
in_single = False
242+
i += 1
243+
continue
244+
if ch == "'":
245+
out.append(ch)
246+
in_single = True
247+
i += 1
248+
continue
249+
if ch == "(":
250+
# Find the matching `)` at the same depth.
251+
depth = 1
252+
j = i + 1
253+
j_in_single = False
254+
while j < n and depth:
255+
cj = s[j]
256+
if j_in_single:
257+
if cj == "'":
258+
if j + 1 < n and s[j + 1] == "'":
259+
j += 2
260+
continue
261+
j_in_single = False
262+
elif cj == "'":
263+
j_in_single = True
264+
elif cj == "(":
265+
depth += 1
266+
elif cj == ")":
267+
depth -= 1
268+
j += 1
269+
if depth != 0:
270+
# Unbalanced — leave the rest alone.
271+
out.append(s[i:])
272+
break
273+
inner = s[i + 1 : j - 1]
274+
stripped_inner = _strip_redundant_parens(inner)
275+
# A `(...)` is a function call's argument list when the char
276+
# immediately before it is an identifier char — those parens are
277+
# part of the call syntax and must stay.
278+
prev = out[-1] if out else ""
279+
is_function_args = bool(prev) and (prev.isalnum() or prev == "_")
280+
# Otherwise the parens are grouping: redundant iff the enclosed
281+
# expression contains no top-level comma (a comma would mean
282+
# we're inside a tuple/row-constructor, not a grouping).
283+
if is_function_args or _has_top_level_comma(stripped_inner):
284+
out.append("(" + stripped_inner + ")")
285+
else:
286+
out.append(stripped_inner)
287+
i = j
288+
continue
289+
out.append(ch)
290+
i += 1
291+
return "".join(out)
292+
293+
294+
def _has_top_level_comma(s: str) -> bool:
295+
depth = 0
296+
in_single = False
297+
i = 0
298+
n = len(s)
299+
while i < n:
300+
ch = s[i]
301+
if in_single:
302+
if ch == "'":
303+
if i + 1 < n and s[i + 1] == "'":
304+
i += 2
305+
continue
306+
in_single = False
307+
i += 1
308+
continue
309+
if ch == "'":
310+
in_single = True
311+
elif ch == "(":
312+
depth += 1
313+
elif ch == ")":
314+
depth -= 1
315+
elif ch == "," and depth == 0:
316+
return True
317+
i += 1
318+
return False
319+
320+
212321
def _normalize_sql(s: str) -> str:
213322
"""Lowercase keywords/identifiers, strip quotes, collapse whitespace."""
214323
s = sqlparse.format(
@@ -420,7 +529,7 @@ def normalize_default_sql(s: str) -> str:
420529
"""
421530
s = _normalize_sql(s)
422531
s = _strip_type_casts(s)
423-
s = _strip_balanced_parens(s)
532+
s = _strip_redundant_parens(s)
424533
return s
425534

426535

plain-postgres/plain/postgres/types.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class User(postgres.Model):
3131
GenericIPAddressField,
3232
IntegerField,
3333
PrimaryKeyField,
34+
RandomStringField,
3435
SmallIntegerField,
3536
TextField,
3637
TimeField,
@@ -75,6 +76,7 @@ class User(postgres.Model):
7576
"ReverseForeignKeyManager",
7677
"ReverseManyToMany",
7778
"PrimaryKeyField",
79+
"RandomStringField",
7880
"SmallIntegerField",
7981
"TextField",
8082
"TimeField",

0 commit comments

Comments
 (0)