-
Notifications
You must be signed in to change notification settings - Fork 6
/
pururin.go
executable file
·97 lines (79 loc) · 2.15 KB
/
pururin.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
package pururin
import (
"fmt"
"regexp"
"strconv"
"strings"
"github.com/gan-of-culture/get-sauce/request"
"github.com/gan-of-culture/get-sauce/static"
"github.com/gan-of-culture/get-sauce/utils"
)
const site = "https://pururin.to/"
const cdn = "https://i.pururin.to/%s/%d.jpg"
var reID = regexp.MustCompile(fmt.Sprintf("%sgallery/(\\d*)/[^\"]*", site))
var rePageInfo = regexp.MustCompile(`(\d+)</span>\s*\(\s?[\d.]+ \w+\s?\)`)
type extractor struct{}
// New returns a pururin extractor
func New() static.Extractor {
return &extractor{}
}
func (e *extractor) Extract(URL string) ([]*static.Data, error) {
URLs := parseURL(URL)
if len(URLs) == 0 {
return nil, static.ErrURLParseFailed
}
data := []*static.Data{}
for _, u := range URLs {
d, err := extractData(u)
if err != nil {
return nil, utils.Wrap(err, u)
}
data = append(data, d)
}
return data, nil
}
func parseURL(URL string) []string {
if ok, _ := regexp.MatchString(fmt.Sprintf("%sgallery/\\d*/[^\"]*", site), URL); ok {
return []string{URL}
}
htmlString, err := request.Get(URL)
if err != nil {
return []string{}
}
htmlString = strings.Split(htmlString, "Popular right now")[0]
re := regexp.MustCompile(fmt.Sprintf(`%sgallery/\d+/[^\"]*\"`, site))
URLs := []string{}
for _, v := range re.FindAllString(htmlString, -1) {
URLs = append(URLs, strings.TrimSuffix(v, ">"))
}
return URLs
}
func extractData(URL string) (*static.Data, error) {
htmlString, err := request.Get(URL)
if err != nil {
return nil, err
}
ID := utils.GetLastItemString(reID.FindStringSubmatch(URL))
matchedPageInfo := rePageInfo.FindStringSubmatch(htmlString) //1=numPages
pages, _ := strconv.Atoi(matchedPageInfo[1])
URLs := []*static.URL{}
for _, pageNumber := range utils.NeedDownloadList(pages) {
URLs = append(URLs, &static.URL{
URL: fmt.Sprintf(cdn, ID, pageNumber),
Ext: "jpg",
})
}
return &static.Data{
Site: site,
Title: strings.TrimSpace(strings.Split(utils.GetMeta(&htmlString, "og:title"), "/")[0]),
Type: static.DataTypeImage,
Streams: map[string]*static.Stream{
"0": {
Type: static.DataTypeImage,
URLs: URLs,
Info: ID,
},
},
URL: URL,
}, nil
}