-
Notifications
You must be signed in to change notification settings - Fork 0
/
thai_name.go
27 lines (25 loc) · 1.21 KB
/
thai_name.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
package preset
import (
"github.com/abhabongse/fuzzymatch-go/candidate/nametitle"
"github.com/abhabongse/fuzzymatch-go/editdist"
editdistThai "github.com/abhabongse/fuzzymatch-go/editdist/thai"
"github.com/abhabongse/fuzzymatch-go/factory"
sanitaryThai "github.com/abhabongse/fuzzymatch-go/transform/thai"
)
// ThaiNameSimilarityScore computes the similarity score between two input strings
// with the following functionalities:
// 1. Each input string will be sanitized via sanitaryThai.Sanitize function
// (e.g. removing diacritics from latin scripts, removing repeated Thai tonal marks, etc.)
// 2. Each input string will be used to generate bare names
// (i.e. attempting to remove English and Thai titles such as Mrs. or dek-chai)
// 3. For optimal alignment distance metric over string space,
// the specialized substitution/transposition penalty functions are used instead.
var ThaiNameSimilarityScore = factory.PrependStringSanitizerForSimilarityScore(
sanitaryThai.Sanitize,
factory.MaxFromCandidatesProduct(
nametitle.GenerateNamesWithoutTitles,
editdist.MakeStringSimilarityFunction(
editdist.MakeOptimalAlignmentDistFunction(editdistThai.SubstPenalty, editdistThai.TransPenalty),
),
),
)