From 1ab085dfab0b70ca8d6c9d172deb0c10121cfaa2 Mon Sep 17 00:00:00 2001 From: Damien Elmes Date: Sat, 13 Mar 2021 10:23:32 +1000 Subject: [PATCH] ensure fields normalized before checksumming https://forums.ankiweb.net/t/python-checksum-rust-checksum/8195 --- pylib/anki/utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pylib/anki/utils.py b/pylib/anki/utils.py index 7322f80024b..51f093969b3 100644 --- a/pylib/anki/utils.py +++ b/pylib/anki/utils.py @@ -15,6 +15,7 @@ import tempfile import time import traceback +import unicodedata from contextlib import contextmanager from hashlib import sha1 from html.entities import name2codepoint @@ -201,8 +202,11 @@ def checksum(data: Union[bytes, str]) -> str: def fieldChecksum(data: str) -> int: + without_html = stripHTMLMedia(data) + normalized = unicodedata.normalize("NFC", without_html) + utf8_text = normalized.encode("utf-8") # 32 bit unsigned number from first 8 digits of sha1 hash - return int(checksum(stripHTMLMedia(data).encode("utf-8"))[:8], 16) + return int(checksum(utf8_text)[:8], 16) # Temp files