-
Notifications
You must be signed in to change notification settings - Fork 9
/
miohentai.go
108 lines (90 loc) · 2.28 KB
/
miohentai.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
package miohentai
import (
"html"
"regexp"
"strings"
"github.com/gan-of-culture/get-sauce/request"
"github.com/gan-of-culture/get-sauce/static"
"github.com/gan-of-culture/get-sauce/utils"
)
const site = "https://miohentai.com/"
var reShortLink = regexp.MustCompile(site + `\?p=\d+`)
var reSourceURL = regexp.MustCompile(`[^"]*cdn\.miohentai[^"]*`)
type extractor struct{}
// New returns a miohentai extractor
func New() static.Extractor {
return &extractor{}
}
func (e *extractor) Extract(URL string) ([]*static.Data, error) {
URLs := parseURL(URL)
if len(URLs) == 0 {
return nil, static.ErrURLParseFailed
}
data := []*static.Data{}
for _, u := range URLs {
d, err := extractData(u)
if err != nil {
return nil, utils.Wrap(err, u)
}
data = append(data, d)
}
return data, nil
}
func parseURL(URL string) []string {
htmlString, err := request.Get(URL)
if err != nil {
return nil
}
if matchedShortLink := reShortLink.FindString(htmlString); matchedShortLink != "" {
return []string{matchedShortLink}
}
re := regexp.MustCompile(`post-\d+`)
posts := re.FindAllString(htmlString, -1)
URLs := []string{}
for _, v := range utils.RemoveAdjDuplicates(posts) {
URLs = append(URLs, site+"?p="+strings.Split(v, "-")[1])
}
return URLs
}
func extractData(URL string) (*static.Data, error) {
htmlString, err := request.Get(URL)
if err != nil {
return nil, err
}
title := html.UnescapeString(utils.GetH1(&htmlString, -1))
if title == "" {
splitURL := strings.Split(utils.GetMeta(&htmlString, "og:url"), "/")
title = splitURL[len(splitURL)-2]
}
srcURL := reSourceURL.FindString(htmlString)
headers, err := request.Headers(srcURL, URL)
if err != nil {
return nil, err
}
size, err := request.GetSizeFromHeaders(&headers)
if err != nil {
return nil, err
}
ext := utils.GetLastItemString(strings.Split(srcURL, "."))
if strings.Contains(srcURL, "index.php") {
ext = strings.Split(headers.Get("content-type"), "/")[1]
}
return &static.Data{
Site: site,
Title: title,
Type: utils.GetMediaType(strings.Split(headers.Get("content-type"), "/")[1]),
Streams: map[string]*static.Stream{
"0": {
Type: static.DataTypeVideo,
URLs: []*static.URL{
{
URL: srcURL,
Ext: ext,
},
},
Size: size,
},
},
URL: URL,
}, nil
}