-
Notifications
You must be signed in to change notification settings - Fork 0
/
scrape_test.go
65 lines (54 loc) · 1.41 KB
/
scrape_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
package main
import (
"net/http"
"net/http/httptest"
"testing"
"github.com/gocolly/colly/v2"
"github.com/stretchr/testify/assert"
)
// dummyHandler implements http.Handler, always returning Response with 200 status.
type dummyHandler struct {
Response []byte
}
func (h *dummyHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
w.Write(h.Response)
}
func Test_ElementToArticle(t *testing.T) {
t.Parallel()
assert := assert.New(t)
type testCase struct {
html []byte
}
testCases := []testCase{
testCase{html: []byte(`<a href="/blah/blah"><h2>Blah Blah</h2></a>`)},
testCase{html: []byte(`<a href="/blah/blah"><h3>Blah Blah</h3></a>`)},
}
for i, tc := range testCases {
t.Logf("test case %d", i)
func() {
h := &dummyHandler{
[]byte(tc.html),
}
server := httptest.NewServer(h)
defer server.Close()
client := server.Client()
var err error
var article *Article
t := mustTimeParse("2006-01-02T15:04:05Z", "2006-01-02T15:04:05Z")
c := colly.NewCollector()
c.SetClient(client)
c.OnHTML("a", func(e *colly.HTMLElement) {
article, err = elementToArticle(e, "http://example.com", t)
})
visitErr := c.Visit(server.URL)
assert.Nil(visitErr)
assert.Nil(err)
assert.NotNil(article)
if article == nil {
return
}
assert.Equal("http://example.com/blah/blah", jsonURLToURL(article.URL).String())
assert.Equal("Blah Blah", article.Title)
}()
}
}