Skip to content

Commit

Permalink
[fix] update request data after redirect - closes #74
Browse files Browse the repository at this point in the history
  • Loading branch information
asciimoo committed Dec 29, 2017
1 parent 4ecbc22 commit 37c1a91
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 0 deletions.
4 changes: 4 additions & 0 deletions colly.go
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,10 @@ func (c *Collector) scrape(u, method string, depth int, requestData io.Reader, c
if err := c.handleOnError(response, err, request, ctx); err != nil {
return err
}
if req.URL.String() != parsedURL.String() {
request.URL = req.URL
request.Headers = &req.Header
}
atomic.AddUint32(&c.responseCount, 1)
response.Ctx = ctx
response.Request = request
Expand Down
20 changes: 20 additions & 0 deletions colly_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"log"
"net"
"net/http"
"strings"
"testing"

"github.com/PuerkitoBio/goquery"
Expand Down Expand Up @@ -70,6 +71,14 @@ func init() {
w.Write([]byte("disallowed"))
})

http.Handle("/redirect", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
http.Redirect(w, r, "/redirected/", http.StatusSeeOther)

}))
http.Handle("/redirected/", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
fmt.Fprintf(w, `<a href="test">test</a>`)
}))

http.HandleFunc("/set_cookie", func(w http.ResponseWriter, r *http.Request) {
c := &http.Cookie{Name: "test", Value: "testv", HttpOnly: false}
http.SetCookie(w, c)
Expand Down Expand Up @@ -228,6 +237,17 @@ func TestCollectorPost(t *testing.T) {
})
}

func TestRedirect(t *testing.T) {
c := NewCollector()
c.OnHTML("a[href]", func(e *HTMLElement) {
u := e.Request.AbsoluteURL(e.Attr("href"))
if !strings.HasSuffix(u, "/redirected/test") {
t.Error("Invalid URL after redirect: " + u)
}
})
c.Visit(testServerRootURL + "redirect")
}

func TestCollectorCookies(t *testing.T) {
c := NewCollector()

Expand Down
1 change: 1 addition & 0 deletions http_backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ func (h *httpBackend) Do(request *http.Request, bodySize int) (*Response, error)
if err != nil {
return nil, err
}
*request = *res.Request

var bodyReader io.Reader = res.Body
if bodySize > 0 {
Expand Down

0 comments on commit 37c1a91

Please sign in to comment.