From 5434ce64616f99153d6e6c8c087fef041a3bb1e5 Mon Sep 17 00:00:00 2001 From: thecotne Date: Mon, 29 Feb 2016 16:09:20 +0400 Subject: [PATCH] add unicode support in slugs --- js/lib/utils/string.js | 23 +++++++++++++++++------ src/Util/Str.php | 24 ++++++++++++++++-------- 2 files changed, 33 insertions(+), 14 deletions(-) diff --git a/js/lib/utils/string.js b/js/lib/utils/string.js index d8d11d2578..a9834f0243 100644 --- a/js/lib/utils/string.js +++ b/js/lib/utils/string.js @@ -13,17 +13,28 @@ export function truncate(string, length, start = 0) { } /** - * Create a slug out of the given string. Non-alphanumeric characters are - * converted to hyphens. + * Create a slug out of the given string. + * + * nonsafe URL characters are converted to hyphens. * * @param {String} string * @return {String} */ export function slug(string) { - return string.toLowerCase() - .replace(/[^a-z0-9]/gi, '-') - .replace(/-+/g, '-') - .replace(/-$|^-/g, '') || '-'; + // Regex for finding the nonsafe URL characters (many need escaping): & +$,:;=?@"#{}|^~[`%!']./()*\ + var nonsafeChars = /[& +$,:;=?@"#{}|^~[`%!'\]\.\/\(\)\*\\]/g; + + // Note: we trim hyphens after truncating because truncating can cause dangling hyphens. + // Example string: // " ⚡⚡ Don't forget: URL fragments should be i18n-friendly, hyphenated, short, and clean." + string = string.trim() // "⚡⚡ Don't forget: URL fragments should be i18n-friendly, hyphenated, short, and clean." + .replace(/\'/gi, '') // "⚡⚡ Dont forget: URL fragments should be i18n-friendly, hyphenated, short, and clean." + .replace(nonsafeChars, '-') // "⚡⚡-Dont-forget--URL-fragments-should-be-i18n-friendly--hyphenated--short--and-clean-" + .replace(/-{2,}/g, '-') // "⚡⚡-Dont-forget-URL-fragments-should-be-i18n-friendly-hyphenated-short-and-clean-" + .substring(0, 64) // "⚡⚡-Dont-forget-URL-fragments-should-be-i18n-friendly-hyphenated-" + .replace(/^-+|-+$/gm, '') // "⚡⚡-Dont-forget-URL-fragments-should-be-i18n-friendly-hyphenated" + .toLowerCase(); // "⚡⚡-dont-forget-url-fragments-should-be-i18n-friendly-hyphenated" + + return $string || '-'; } /** diff --git a/src/Util/Str.php b/src/Util/Str.php index 46e010a96d..9ca163ecb6 100644 --- a/src/Util/Str.php +++ b/src/Util/Str.php @@ -15,18 +15,26 @@ class Str /** * Create a slug out of the given string. * - * Non-alphanumeric characters are converted to hyphens. + * nonsafe URL characters are converted to hyphens. * - * @param string $str + * @param string $string * @return string */ - public static function slug($str) + public static function slug($string) { - $str = strtolower($str); - $str = preg_replace('/[^a-z0-9]/i', '-', $str); - $str = preg_replace('/-+/', '-', $str); - $str = preg_replace('/-$|^-/', '', $str); + // Regex for finding the nonsafe URL characters (many need escaping): & +$,:;=?@"#{}|^~[`%!']./()*\ + $nonsafeChars = '/[& +$,:;=?@"#{}|^~[`%!\'\]\.\/\(\)\*\\]/g'; - return $str ?: '-'; + // Note: we trim hyphens after truncating because truncating can cause dangling hyphens. + // Example string: // " ⚡⚡ Don't forget: URL fragments should be i18n-friendly, hyphenated, short, and clean." + $string = trim($string); // "⚡⚡ Don't forget: URL fragments should be i18n-friendly, hyphenated, short, and clean." + $string = preg_replace('/\'/gi', '', $string); // "⚡⚡ Dont forget: URL fragments should be i18n-friendly, hyphenated, short, and clean." + $string = preg_replace($nonsafeChars, '-', $string); // "⚡⚡-Dont-forget--URL-fragments-should-be-i18n-friendly--hyphenated--short--and-clean-" + $string = preg_replace('/-{2,}/g', '-', $string); // "⚡⚡-Dont-forget-URL-fragments-should-be-i18n-friendly-hyphenated-short-and-clean-" + $string = substr($string, 0, 64); // "⚡⚡-Dont-forget-URL-fragments-should-be-i18n-friendly-hyphenated-" + $string = preg_replace('/^-+|-+$/gm', '', $string); // "⚡⚡-Dont-forget-URL-fragments-should-be-i18n-friendly-hyphenated" + $string = strtolower($string); // "⚡⚡-dont-forget-url-fragments-should-be-i18n-friendly-hyphenated" + + return $string ?: '-'; } }