Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Slug: Use urlencoding to support non-ASCII characters #70691

Merged
merged 5 commits into from Jul 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
57 changes: 40 additions & 17 deletions pkg/infra/slugify/slugify.go
Expand Up @@ -32,7 +32,7 @@ package slugify

import (
"bytes"
"encoding/base64"
"fmt"
"strings"
"unicode/utf8"

Expand All @@ -42,20 +42,18 @@ import (
var (
simpleSlugger = &slugger{
isValidCharacter: validCharacter,
replaceCharacter: '-',
replacementMap: getDefaultReplacements(),
omitMap: getDefaultOmitments(),
}
)

// Slugify creates a URL safe latin slug for a given value
// Slugify creates a URL safe version from a given string that is at most 50 bytes long.
func Slugify(value string) string {
s := simpleSlugger.Slugify(value)
if s == "" {
s = base64.RawURLEncoding.EncodeToString([]byte(value))
if len(s) > 50 || s == "" {
s = uuid.NewSHA1(uuid.NameSpaceOID, []byte(value)).String()
}
s := simpleSlugger.Slugify(strings.TrimSpace(value))
if len(s) > 50 || s == "" {
s = uuid.NewSHA1(uuid.NameSpaceOID, []byte(value)).String()
}

return s
}

Expand All @@ -66,46 +64,71 @@ func validCharacter(c rune) bool {
if c >= '0' && c <= '9' {
return true
}
if c == '_' || c == '-' {
return true
}
return false
}

// Slugifier based on settings
type slugger struct {
isValidCharacter func(c rune) bool
replaceCharacter rune
replacementMap map[rune]string
omitMap map[rune]struct{}
}

// Slugify creates a slug for a string
func (s slugger) Slugify(value string) string {
value = strings.ToLower(value)
var buffer bytes.Buffer
lastCharacterWasInvalid := false

for len(value) > 0 {
c, size := utf8.DecodeRuneInString(value)
value = value[size:]

if newCharacter, ok := s.replacementMap[c]; ok {
buffer.WriteString(newCharacter)
lastCharacterWasInvalid = false
continue
}

if s.isValidCharacter(c) {
buffer.WriteRune(c)
lastCharacterWasInvalid = false
} else if !lastCharacterWasInvalid {
buffer.WriteRune(s.replaceCharacter)
lastCharacterWasInvalid = true
continue
}

if _, ok := s.omitMap[c]; ok {
continue
}

p := make([]byte, 4)
size = utf8.EncodeRune(p, c)
for i := 0; i < size; i++ {
buffer.WriteString(fmt.Sprintf("%%%x", p[i]))
}
}

return strings.Trim(buffer.String(), string(s.replaceCharacter))
return buffer.String()
}

func getDefaultOmitments() map[rune]struct{} {
return map[rune]struct{}{
',': {},
'"': {},
'\'': {},
'\n': {},
'\r': {},
'\x00': {},
'?': {},
'.': {},
'(': {},
')': {},
}
}

func getDefaultReplacements() map[rune]string {
return map[rune]string{
' ': "-",

'&': "and",
'@': "at",
'©': "c",
Expand Down
28 changes: 14 additions & 14 deletions pkg/infra/slugify/slugify_test.go
Expand Up @@ -7,12 +7,12 @@ import (
func TestSlugify(t *testing.T) {
results := make(map[string]string)
results["hello-playground"] = "Hello, playground"
results["hello-it-s-paradise"] = "😢 😣 😤 😥 😦 😧 😨 😩 😪 😫 😬 Hello, it's paradise"
results["00a4bc92-3695-5702-9ddf-6719fdf11567"] = "😢 😣 😤 😥 😦 😧 😨 😩 😪 😫 😬 Hello, it's paradise"
results["61db60b5-f1e7-5853-9b81-0f074fc268ea"] = "😢 😣 😤 😥 😦 😧 😨 😩 😪 😫 😬"
results["8J-YoiAt"] = "😢 -"
results["a"] = "?,a . \n "
results["%f0%9f%98%a2--"] = "😢 -"
results["a-"] = "?,a . \n "
results["0a68eb57-c88a-5f34-9e9d-27f85e68af4f"] = "" // empty input has a slug!
results["hi-this-is-a-test"] = "方向盤後面 hi this is a test خلف المقو"
results["3cbb528a-0ebf-54ad-bed2-2a188cd1824e"] = "方向盤後面 hi this is a test خلف المقو"
results["cong-hoa-xa-hoi-chu-nghia-viet-nam"] = "Cộng hòa xã hội chủ nghĩa Việt Nam"
results["noi-nang-canh-canh-ben-long-bieng-khuay"] = "Nỗi nàng canh cánh bên lòng biếng khuây" // This line in a poem called Truyen Kieu

Expand All @@ -39,16 +39,16 @@ func BenchmarkSlugifyLongString(b *testing.B) {
😢 😣 😤 😥 😦 😧 😨 😩 😪 😫 😬 Hello, it's paradise
😢 😣 😤 😥 😦 😧 😨 😩 😪 😫 😬 Hello, it's paradise
Lorem ipsum dolor sit amet, consectetur adipiscing elit.
Aliquam sapien nisl, laoreet quis vestibulum ut, cursus
in turpis. Sed magna mi, blandit id nisi vel, imperdiet
mollis turpis. Fusce vel fringilla mauris. Donec cursus
rhoncus bibendum. Aliquam erat volutpat. Maecenas
faucibus turpis ex, quis lacinia ligula ultrices non.
Sed gravida justo augue. Nulla bibendum dignissim tellus
vitae lobortis. Suspendisse fermentum vel purus in pulvinar.
Vivamus eu fermentum purus, sit amet tempor orci.
Praesent congue convallis turpis, ac ullamcorper lorem
semper id.
Aliquam sapien nisl, laoreet quis vestibulum ut, cursus
in turpis. Sed magna mi, blandit id nisi vel, imperdiet
mollis turpis. Fusce vel fringilla mauris. Donec cursus
rhoncus bibendum. Aliquam erat volutpat. Maecenas
faucibus turpis ex, quis lacinia ligula ultrices non.
Sed gravida justo augue. Nulla bibendum dignissim tellus
vitae lobortis. Suspendisse fermentum vel purus in pulvinar.
Vivamus eu fermentum purus, sit amet tempor orci.
Praesent congue convallis turpis, ac ullamcorper lorem
semper id.
`)
}
}
2 changes: 1 addition & 1 deletion pkg/services/accesscontrol/models_test.go
Expand Up @@ -63,7 +63,7 @@ func TestSaveExternalServiceRoleCommand_Validate(t *testing.T) {
Permissions: []Permission{{Action: "users:read", Scope: "users:id:1"}},
},
wantErr: false,
wantID: "thisis-a-very-strange-app-name",
wantID: "thisis-a-very-strange-___-app-name",
},
{
name: "invalid empty Action",
Expand Down
6 changes: 3 additions & 3 deletions pkg/services/dashboards/models_test.go
Expand Up @@ -77,9 +77,9 @@ func TestSlugifyTitle(t *testing.T) {
testCases := map[string]string{
"Grafana Play Home": "grafana-play-home",
"snöräv-över-ån": "snorav-over-an",
"漢字": "5ryi5a2X", // "han-zi", // Hanzi for hanzi
"🇦🇶": "8J-HpvCfh7Y", // flag of Antarctica-emoji, using fallback
"𒆠": "8JKGoA", // cuneiform Ki, using fallback
"漢字": "%e6%bc%a2%e5%ad%97", // "han-zi", // Hanzi for hanzi
"🇦🇶": "%f0%9f%87%a6%f0%9f%87%b6", // flag of Antarctica-emoji, using fallback
"𒆠": "%f0%92%86%a0", // cuneiform Ki, using fallback
}

for input, expected := range testCases {
Expand Down