-
Notifications
You must be signed in to change notification settings - Fork 0
/
profile.go
49 lines (45 loc) · 1.85 KB
/
profile.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
package thai
import (
"github.com/abhabongse/fuzzymatch-go/candidate/diacritics"
"github.com/abhabongse/fuzzymatch-go/factory"
fuzzyTransform "github.com/abhabongse/fuzzymatch-go/transform"
"golang.org/x/text/cases"
"golang.org/x/text/runes"
"golang.org/x/text/secure/precis"
"golang.org/x/text/transform"
)
// Sanitize extends on the LatinExtendedSanitize
// by additionally sanitize an input string containing Thai scripts.
var Sanitize = factory.MakeStringTransformFunction(
PrecisProfile.NewTransformer(),
)
// PrecisProfile is a Unicode PRECIS profile which prepare strings for a more secure comparison.
var PrecisProfile = precis.NewFreeform(
precis.FoldWidth,
precis.AdditionalMapping(func() transform.Transformer {
return transform.Chain(AdditionalMapping...)
}),
precis.FoldCase(cases.HandleFinalSigma(true)),
)
// AdditionalMapping contains a slice of all string transformers
// chained together which is used as additional mapping for PRECIS profile.
var AdditionalMapping = []transform.Transformer{
runes.ReplaceIllFormed(),
// Remove non-printing rune characters
fuzzyTransform.StripNonPrintTransformer,
// Replace all white-spaces to normal space
fuzzyTransform.ToNormalSpaceTransformer,
// Replace all dashes and hyphens to normal hyphen
fuzzyTransform.ToNormalHyphenTransformer,
// Remove diacritical marks above latin characters
diacritics.AsciiFoldTransformer,
diacritics.StripDiacriticalMarksTransformer,
// Special rule: combine characters for sara-ae and sara-am
BigramRecombineTransformer,
// Special rule: remove accidentally repeated non-spacing marks such as
// tonal marks, ascending vowels, descending vowels, etc.
RemoveRepeatedMarksTransformer,
// Respacing the entire string by stripping out leading and trailing spaces,
// and then replacing inter-word spaces with a single normal space
fuzzyTransform.RespaceTransformer,
}