From 2d586516999ddb32df773cc86e0bf2df19e64811 Mon Sep 17 00:00:00 2001 From: USAMI Kenta Date: Mon, 5 Apr 2021 22:35:58 +0900 Subject: [PATCH] optimize filename normalize (#1460) * Normalize slug using ucs-normalize-NFC-string after remove Unicode spacing mark * Add org-roam-slug--preserve-chars-from-normalization for Unicode Normalization * Add org-roam-slug-trim-chars instead of org-roam-slug--preserve-chars-from-normalization --- org-roam.el | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/org-roam.el b/org-roam.el index 4e61119c7b..7633777209 100644 --- a/org-roam.el +++ b/org-roam.el @@ -234,6 +234,35 @@ Function should return a filename string based on title." :type 'function :group 'org-roam) +(defcustom org-roam-slug-trim-chars + '(;; Combining Diacritical Marks https://www.unicode.org/charts/PDF/U0300.pdf + 768 ; U+0300 COMBINING GRAVE ACCENT + 769 ; U+0301 COMBINING ACUTE ACCENT + 770 ; U+0302 COMBINING CIRCUMFLEX ACCENT + 771 ; U+0303 COMBINING TILDE + 772 ; U+0304 COMBINING MACRON + 774 ; U+0306 COMBINING BREVE + 775 ; U+0307 COMBINING DOT ABOVE + 776 ; U+0308 COMBINING DIAERESIS + 777 ; U+0309 COMBINING HOOK ABOVE + 778 ; U+030A COMBINING RING ABOVE + 780 ; U+030C COMBINING CARON + 795 ; U+031B COMBINING HORN + 803 ; U+0323 COMBINING DOT BELOW + 804 ; U+0324 COMBINING DIAERESIS BELOW + 805 ; U+0325 COMBINING RING BELOW + 807 ; U+0327 COMBINING CEDILLA + 813 ; U+032D COMBINING CIRCUMFLEX ACCENT BELOW + 814 ; U+032E COMBINING BREVE BELOW + 816 ; U+0330 COMBINING TILDE BELOW + 817 ; U+0331 COMBINING MACRON BELOW + ) + "Characters to trim from Unicode normalization for slug. + +By default, the characters are specified to remove Diacritical Marks from the Latin alphabet." + :type '(repeat character) + :group 'org-roam) + (defcustom org-roam-title-sources '((title headline) alias) "The list of sources from which to retrieve a note title. Each element in the list is either: @@ -794,10 +823,11 @@ Each ref is returned as a cons of its type and its key." (defun org-roam--title-to-slug (title) "Convert TITLE to a filename-suitable slug." (cl-flet* ((nonspacing-mark-p (char) - (eq 'Mn (get-char-code-property char 'general-category))) + (memq char org-roam-slug-trim-chars)) (strip-nonspacing-marks (s) - (apply #'string (seq-remove #'nonspacing-mark-p - (ucs-normalize-NFD-string s)))) + (ucs-normalize-NFC-string + (apply #'string (seq-remove #'nonspacing-mark-p + (ucs-normalize-NFD-string s))))) (cl-replace (title pair) (replace-regexp-in-string (car pair) (cdr pair) title))) (let* ((pairs `(("[^[:alnum:][:digit:]]" . "_") ;; convert anything not alphanumeric