-
Notifications
You must be signed in to change notification settings - Fork 11
/
title.go
102 lines (82 loc) · 3.48 KB
/
title.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
package processor
import (
"fmt"
"regexp"
"strings"
)
func processTitle(title string, matchRelease bool) []string {
// Checking if the title is empty.
if strings.TrimSpace(title) == "" {
return nil
}
// cleans year like (2020) from arr title
//var re = regexp.MustCompile(`(?m)\s(\(\d+\))`)
//title = re.ReplaceAllString(title, "")
t := NewTitleSlice()
// Regex patterns
// https://www.regular-expressions.info/unicode.html#category
// https://www.ncbi.nlm.nih.gov/staff/beck/charents/hex.html
replaceRegexp := regexp.MustCompile(`[\p{P}\p{Z}\x{00C0}-\x{017E}\x{00AE}]`)
questionmarkRegexp := regexp.MustCompile(`[?]{2,}`)
regionCodeRegexp := regexp.MustCompile(`\(.+\)$`)
parenthesesEndRegexp := regexp.MustCompile(`\)$`)
if replaceRegexp.ReplaceAllString(title, "") == "" {
t.Add(title, matchRelease)
} else {
// title with all non-alphanumeric characters replaced by "?"
apostropheTitle := parenthesesEndRegexp.ReplaceAllString(title, "?")
apostropheTitle = replaceRegexp.ReplaceAllString(apostropheTitle, "?")
apostropheTitle = questionmarkRegexp.ReplaceAllString(apostropheTitle, "*")
t.Add(apostropheTitle, matchRelease)
t.Add(strings.TrimRight(apostropheTitle, "?* "), matchRelease)
// title with apostrophes removed and all non-alphanumeric characters replaced by "?"
noApostropheTitle := parenthesesEndRegexp.ReplaceAllString(title, "?")
noApostropheTitle = strings.ReplaceAll(noApostropheTitle, "'", "")
noApostropheTitle = replaceRegexp.ReplaceAllString(noApostropheTitle, "?")
noApostropheTitle = questionmarkRegexp.ReplaceAllString(noApostropheTitle, "*")
t.Add(noApostropheTitle, matchRelease)
t.Add(strings.TrimRight(noApostropheTitle, "?* "), matchRelease)
// title with regions in parentheses removed and all non-alphanumeric characters replaced by "?"
removedRegionCodeApostrophe := regionCodeRegexp.ReplaceAllString(title, "")
removedRegionCodeApostrophe = strings.TrimRight(removedRegionCodeApostrophe, " ")
removedRegionCodeApostrophe = replaceRegexp.ReplaceAllString(removedRegionCodeApostrophe, "?")
removedRegionCodeApostrophe = questionmarkRegexp.ReplaceAllString(removedRegionCodeApostrophe, "*")
t.Add(removedRegionCodeApostrophe, matchRelease)
t.Add(strings.TrimRight(removedRegionCodeApostrophe, "?* "), matchRelease)
// title with regions in parentheses and apostrophes removed and all non-alphanumeric characters replaced by "?"
removedRegionCodeNoApostrophe := regionCodeRegexp.ReplaceAllString(title, "")
removedRegionCodeNoApostrophe = strings.TrimRight(removedRegionCodeNoApostrophe, " ")
removedRegionCodeNoApostrophe = strings.ReplaceAll(removedRegionCodeNoApostrophe, "'", "")
removedRegionCodeNoApostrophe = replaceRegexp.ReplaceAllString(removedRegionCodeNoApostrophe, "?")
removedRegionCodeNoApostrophe = questionmarkRegexp.ReplaceAllString(removedRegionCodeNoApostrophe, "*")
t.Add(removedRegionCodeNoApostrophe, matchRelease)
t.Add(strings.TrimRight(removedRegionCodeNoApostrophe, "?* "), matchRelease)
}
return t.Titles()
}
type Titles struct {
tm map[string]struct{}
}
func NewTitleSlice() *Titles {
ts := Titles{
tm: map[string]struct{}{},
}
return &ts
}
func (ts *Titles) Add(title string, matchRelease bool) {
if matchRelease {
title = strings.Trim(title, "?")
title = fmt.Sprintf("*%v*", title)
}
_, ok := ts.tm[title]
if !ok {
ts.tm[title] = struct{}{}
}
}
func (ts *Titles) Titles() []string {
titles := []string{}
for key := range ts.tm {
titles = append(titles, key)
}
return titles
}