forked from haofree/opendmm-1
-
Notifications
You must be signed in to change notification settings - Fork 0
/
stages.go
119 lines (110 loc) · 2.84 KB
/
stages.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
package opendmm
import (
"reflect"
"regexp"
"strings"
"github.com/golang/glog"
)
// ProcessStage is a pipe of MovieMeta
type ProcessStage func(chan MovieMeta) chan MovieMeta
func deduplicate(in chan MovieMeta) chan MovieMeta {
out := make(chan MovieMeta)
go func(out chan MovieMeta) {
defer close(out)
for meta := range in {
segments := regexp.MustCompile("\\s").Split(meta.Title, -1)
for i, segment := range segments {
if segment == meta.Code {
segments[i] = ""
} else {
for _, actress := range meta.Actresses {
if segment == actress {
segments[i] = ""
break
}
}
}
}
meta.Title = strings.Join(segments, " ")
out <- meta
}
}(out)
return out
}
func trimSpaces(in chan MovieMeta) chan MovieMeta {
out := make(chan MovieMeta)
go func(out chan MovieMeta) {
defer close(out)
for meta := range in {
value := reflect.ValueOf(&meta).Elem()
for fi := 0; fi < value.NumField(); fi++ {
field := value.Field(fi)
switch field.Interface().(type) {
case string:
str := field.String()
str = strings.TrimSpace(str)
str = regexp.MustCompile("\\s+").ReplaceAllString(str, " ")
field.SetString(str)
case []string:
for ei := 0; ei < field.Len(); ei++ {
elem := field.Index(ei)
str := elem.String()
str = strings.TrimSpace(str)
str = regexp.MustCompile("\\s+").ReplaceAllString(str, " ")
elem.SetString(str)
}
}
}
out <- meta
}
}(out)
return out
}
func validateFields(in chan MovieMeta) chan MovieMeta {
out := make(chan MovieMeta)
go func(out chan MovieMeta) {
defer close(out)
for meta := range in {
if meta.Code == "" ||
meta.Title == "" ||
meta.CoverImage == "" ||
strings.HasPrefix(meta.CoverImage, "javascript") {
glog.V(2).Infof("Validate failed: %+v", meta)
} else {
out <- meta
}
}
}(out)
return out
}
func normalizeCode(in chan MovieMeta) chan MovieMeta {
out := make(chan MovieMeta)
re := regexp.MustCompile("^(\\w+-)0+(\\d{3,})$")
go func(out chan MovieMeta) {
defer close(out)
for meta := range in {
glog.V(2).Infof("Code before normalization: %+v", meta.Code)
meta.Code = re.ReplaceAllString(meta.Code, "$1$2")
glog.V(2).Infof("Code after normalization: %+v", meta.Code)
out <- meta
}
}(out)
return out
}
func normalizeURLFields(in chan MovieMeta) chan MovieMeta {
out := make(chan MovieMeta)
go func(out chan MovieMeta) {
defer close(out)
for meta := range in {
meta.CoverImage = normalizeURL(meta.CoverImage)
meta.Page = normalizeURL(meta.Page)
meta.SampleImages = normalizeURLs(meta.SampleImages)
meta.ThumbnailImage = normalizeURL(meta.ThumbnailImage)
out <- meta
}
}(out)
return out
}
func postprocess(in chan MovieMeta) chan MovieMeta {
return normalizeURLFields(normalizeCode(validateFields(trimSpaces(deduplicate(in)))))
}