forked from badoux/goscraper
-
Notifications
You must be signed in to change notification settings - Fork 0
/
goscraper_test.go
109 lines (100 loc) · 3.57 KB
/
goscraper_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
// 2017/12/29 20:44:28 Fri
package goscraper
import (
"fmt"
"testing"
"github.com/hms58/goscraper"
)
// var imgRE = regexp.MustCompile(`<img[^>]+\bsrc=["']([^"']+)["']`)
// // if your img's are properly formed with doublequotes then use this, it's more efficient.
// // var imgRE = regexp.MustCompile(`<img[^>]+\bsrc="([^"]+)"`)
// func findImages(htm string) []string {
// imgs := imgRE.FindAllStringSubmatch(htm, -1)
// out := make([]string, len(imgs))
// for i := range out {
// out[i] = imgs[i][1]
// }
// return out
// }
// func main() {
func testGoscraper(t *testing.T) {
s, err := goscraper.ScrapeRedirect(&goscraper.Options{
Url: "https://www.jianshu.com/p/569bdd440b09",
Handler: &goscraper.DefaultHandler{},
})
if err != nil {
fmt.Println(err)
return
}
// fmt.Printf("Title : %s\n", s.Preview.Title)
// fmt.Printf("Description : %s\n", s.Preview.Description)
// fmt.Printf("Image: %s\n", s.Preview.Images[0])
fmt.Printf("Image: %d\n%v\n", len(s.Preview.Images), s.Preview.Images)
fmt.Printf("RawScripts: %d\n%v\n", len(s.Preview.RawScripts), s.Preview.RawScripts)
// fmt.Printf("Url : %s\n", s.Preview.Link)
}
func TestGoscraperBody(t *testing.T) {
body := `
<html>
<body>
<img src="http://test.com/test.png?x=y"/>
<img src="//test.com/test.png?x=y"/>
<img src="/test.com/test.png?x=y"/>
<img src="warning/test.png?x=y"/>
</body>
</html>
`
s, err := goscraper.ScrapeRedirect(&goscraper.Options{
Url: "http://www.jianshu.com/p/99b7f266a7ec",
Body: body,
Handler: &goscraper.DefaultHandler{},
})
if err != nil {
fmt.Println(err)
return
}
// fmt.Printf("Title : %s\n", s.Preview.Title)
// fmt.Printf("Description : %s\n", s.Preview.Description)
// fmt.Printf("Image: %s\n", s.Preview.Images[0])
fmt.Printf("Image: %d\n%v\n", len(s.Preview.Images), s.Preview.Images)
fmt.Printf("Image: %d\n%v\n", len(s.Preview.RawImages), s.Preview.RawImages)
// fmt.Printf("Url : %s\n", s.Preview.Link)
}
func TestGoscraperFile(t *testing.T) {
htmlFile := "test/jianshu.html"
s, err := goscraper.ScrapeRedirect(&goscraper.Options{
Url: "https://www.jianshu.com/p/fa24238d84a9",
HtmlFile: htmlFile,
Handler: &goscraper.DefaultHandler{},
})
if err != nil {
fmt.Println(err)
return
}
fmt.Printf("Title : %s\n", s.Preview.Title)
// fmt.Printf("Description : %s\n", s.Preview.Description)
// fmt.Printf("Image: %s\n", s.Preview.Images[0])
// fmt.Printf("Image: %d\n%v\n", len(s.Preview.Images), s.Preview.Images)
// fmt.Printf("RawImage: %d\n%v\n", len(s.Preview.RawImages), s.Preview.RawImages)
fmt.Printf("RawScripts: %d\n%v\n", len(s.Preview.RawScripts), s.Preview.RawScripts)
// fmt.Printf("Url : %s\n", s.Preview.Link)
}
func TestGoscraperFile2(t *testing.T) {
htmlFile := "test/wechat.html"
s, err := goscraper.ScrapeRedirect(&goscraper.Options{
Url: "https://mp.weixin.qq.com/s?__biz=MjM5ODYxMDA5OQ==&mid=2651960726&idx=1&sn=0fdaf0e7040318aabfeba553f815d691&chksm=bd2d004a8a5a895ca80180443cc0f18e66b3d15dbbbd120dabaf3e6d4ef00fbc1030bf41c24b&scene=21",
HtmlFile: htmlFile,
Handler: &goscraper.DefaultHandler{},
})
if err != nil {
fmt.Println(err)
return
}
fmt.Printf("Title : %s\n", s.Preview.Title)
// fmt.Printf("Description : %s\n", s.Preview.Description)
// fmt.Printf("Image: %s\n", s.Preview.Images[0])
// fmt.Printf("Image: %d\n%v\n", len(s.Preview.Images), s.Preview.Images)
fmt.Printf("RawImage: %d\n%v\n", len(s.Preview.RawImages), s.Preview.RawImages)
fmt.Printf("RawScripts: %d\n%v\n", len(s.Preview.RawScripts), s.Preview.RawScripts)
// fmt.Printf("Url : %s\n", s.Preview.Link)
}