Skip to content

Commit

Permalink
perf: use base32 space for random names instead of base16 (#25497)
Browse files Browse the repository at this point in the history
(cherry picked from commit adf24b2)

# Conflicts:
#	frappe/model/naming.py
  • Loading branch information
ankush authored and mergify[bot] committed Mar 19, 2024
1 parent 8a3ebc2 commit 228660f
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 1 deletion.
29 changes: 29 additions & 0 deletions frappe/model/naming.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright (c) 2015, Frappe Technologies Pvt. Ltd. and Contributors
# License: MIT. See LICENSE

import base64
import datetime
import re
from collections.abc import Callable
Expand Down Expand Up @@ -262,12 +263,40 @@ def make_autoname(key="", doctype="", doc="", *, ignore_validate=False):
DE/09/01/00001 where 09 is the year, 01 is the month and 00001 is the series
"""
if key == "hash":
<<<<<<< HEAD
return frappe.generate_hash(length=10)
=======
# Makeshift "ULID": first 4 chars are based on timestamp, other 6 are random
return _get_timestamp_prefix() + _generate_random_string(6)
>>>>>>> adf24b24d4 (perf: use base32 space for random names instead of base16 (#25497))

series = NamingSeries(key)
return series.generate_next_name(doc, ignore_validate=ignore_validate)


def _get_timestamp_prefix():
ts = int(time.time() * 10) # time in deciseconds
# we ~~don't need~~ can't get ordering over entire lifetime, so we wrap the time.
ts = ts % (32**4)
return base64.b32hexencode(ts.to_bytes(length=5, byteorder="big")).decode()[-4:].lower()


def _generate_random_string(length=10):
"""Better version of frappe.generate_hash for naming.

This uses entire base32 instead of base16 used by generate_hash. So it has twice as many
characters and hence more likely to have shorter common prefixes. i.e. slighly faster comparisons and less conflicts.

Why not base36?
It's not in standard library else using all characters is probably better approach.
Why not base64?
MySQL is case-insensitive, we can't use both upper and lower case characters.
"""
from secrets import token_bytes as get_random_bytes

return base64.b32hexencode(get_random_bytes(length)).decode()[:length].lower()


def parse_naming_series(
parts: list[str] | str,
doctype=None,
Expand Down
19 changes: 18 additions & 1 deletion frappe/tests/test_naming.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# Copyright (c) 2018, Frappe Technologies Pvt. Ltd. and Contributors
# License: MIT. See LICENSE

from unittest.mock import patch
import time

from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_full_jitter

import frappe
from frappe.core.doctype.doctype.test_doctype import new_doctype
Expand All @@ -11,6 +13,7 @@
append_number_if_name_exists,
determine_consecutive_week_number,
getseries,
make_autoname,
parse_naming_series,
revert_series_if_last,
)
Expand Down Expand Up @@ -390,6 +393,20 @@ def test_custom_parser(self):
expected_name = "TODO-" + nowdate().split("-")[1] + "-" + "0001"
self.assertEqual(name, expected_name)

@retry(
retry=retry_if_exception_type(AssertionError),
stop=stop_after_attempt(3),
wait=wait_full_jitter(),
reraise=True,
)
def test_hash_naming_is_roughly_sequential(self):
"""hash naming is supposed to be sequential *most of the time*"""
names = []
for _ in range(10):
time.sleep(0.1)
names.append(make_autoname("hash"))
self.assertEqual(names, sorted(names))


def parse_naming_series_variable(doc, variable):
if variable == "PM":
Expand Down

0 comments on commit 228660f

Please sign in to comment.