From a65676c5bab85a287f3dea462125ce2f585a5b01 Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Wed, 21 May 2025 01:49:08 +0000 Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=EF=B8=8F=20Speed=20up=20function=20`e?= =?UTF-8?q?ncoded=5Ftokens=5Flen`=20by=2070%=20in=20PR=20#231=20(`remove-t?= =?UTF-8?q?iktoken`)=20Here=20is=20an=20optimized=20version=20of=20your=20?= =?UTF-8?q?code.=20The=20bottleneck=20is=20minimal=20as=20the=20computatio?= =?UTF-8?q?n=20is=20a=20single=20multiplication=20and=20a=20cast=20to=20in?= =?UTF-8?q?t,=20which=20is=20already=20fast.=20However,=20a=20very=20minor?= =?UTF-8?q?=20optimization=20can=20be=20done=20by=20avoiding=20the=20`int(?= =?UTF-8?q?)`=20call=20for=20many=20cases=20by=20using=20integer=20divisio?= =?UTF-8?q?n=20directly.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit You can also remove the `__future__` import, as `annotations` is default since Python 3.7. Here is an optimized version. This avoids floating point multiplication and conversion overhead, and gives the same result as `int(len(s)*0.25)` for non-negative integer `len(s)`. --- codeflash/code_utils/code_utils.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/codeflash/code_utils/code_utils.py b/codeflash/code_utils/code_utils.py index 507e79f74..9e24e68ad 100644 --- a/codeflash/code_utils/code_utils.py +++ b/codeflash/code_utils/code_utils.py @@ -10,10 +10,14 @@ from codeflash.cli_cmds.console import logger + def encoded_tokens_len(s: str) -> int: - '''Function for returning the approximate length of the encoded tokens - It's an approximation of BPE encoding (https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf)''' - return int(len(s)*0.25) + """Function for returning the approximate length of the encoded tokens + It's an approximation of BPE encoding (https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf) + """ + # Use integer division for better performance + return len(s) // 4 + def get_qualified_name(module_name: str, full_qualified_name: str) -> str: if not full_qualified_name: