Skip to content

Commit

Permalink
Use *httptest.Server in tests, remove init func
Browse files Browse the repository at this point in the history
Related to #83
  • Loading branch information
peterhellberg committed Jan 6, 2018
1 parent 8ce8057 commit 132289a
Showing 1 changed file with 59 additions and 40 deletions.
99 changes: 59 additions & 40 deletions colly_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@ package colly
import (
"bytes"
"fmt"
"log"
"net"
"net/http"
"net/http/httptest"
"reflect"
"regexp"
"strings"
Expand All @@ -16,28 +15,21 @@ import (
"github.com/gocolly/colly/debug"
)

var testServerPort = 31337
var testServerAddr = fmt.Sprintf("127.0.0.1:%d", testServerPort)
var testServerRootURL = fmt.Sprintf("http://%s/", testServerAddr)
var serverIndexResponse = []byte("hello world\n")
var robotsFile = `
User-agent: *
Allow: /allowed
Disallow: /disallowed
`

func init() {
srv := &http.Server{}
listener, err := net.ListenTCP("tcp4", &net.TCPAddr{IP: net.IPv4(127, 0, 0, 1), Port: testServerPort})
if err != nil {
panic(err)
}
func newTestServer() *httptest.Server {
mux := http.NewServeMux()

http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
w.Write(serverIndexResponse)
})

http.HandleFunc("/html", func(w http.ResponseWriter, r *http.Request) {
mux.HandleFunc("/html", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Conent-Type", "text/html")
w.Write([]byte(`<!DOCTYPE html>
<html>
Expand All @@ -53,44 +45,45 @@ func init() {
`))
})

http.HandleFunc("/login", func(w http.ResponseWriter, r *http.Request) {
mux.HandleFunc("/login", func(w http.ResponseWriter, r *http.Request) {
if r.Method == "POST" {
w.Header().Set("Conent-Type", "text/html")
w.Write([]byte(r.FormValue("name")))
}
})

http.HandleFunc("/robots.txt", func(w http.ResponseWriter, r *http.Request) {
mux.HandleFunc("/robots.txt", func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(200)
w.Write([]byte(robotsFile))
})

http.HandleFunc("/allowed", func(w http.ResponseWriter, r *http.Request) {
mux.HandleFunc("/allowed", func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(200)
w.Write([]byte("allowed"))
})

http.HandleFunc("/disallowed", func(w http.ResponseWriter, r *http.Request) {
mux.HandleFunc("/disallowed", func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(200)
w.Write([]byte("disallowed"))
})

http.Handle("/redirect", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
mux.Handle("/redirect", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
http.Redirect(w, r, "/redirected/", http.StatusSeeOther)

}))
http.Handle("/redirected/", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {

mux.Handle("/redirected/", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
fmt.Fprintf(w, `<a href="test">test</a>`)
}))

http.HandleFunc("/set_cookie", func(w http.ResponseWriter, r *http.Request) {
mux.HandleFunc("/set_cookie", func(w http.ResponseWriter, r *http.Request) {
c := &http.Cookie{Name: "test", Value: "testv", HttpOnly: false}
http.SetCookie(w, c)
w.WriteHeader(200)
w.Write([]byte("ok"))
})

http.HandleFunc("/check_cookie", func(w http.ResponseWriter, r *http.Request) {
mux.HandleFunc("/check_cookie", func(w http.ResponseWriter, r *http.Request) {
cs := r.Cookies()
if len(cs) != 1 || r.Cookies()[0].Value != "testv" {
w.WriteHeader(500)
Expand All @@ -101,11 +94,7 @@ func init() {
w.Write([]byte("ok"))
})

go func() {
if err := srv.Serve(listener); err != nil {
log.Printf("Httpserver: ListenAndServe() error: %s", err)
}
}()
return httptest.NewServer(mux)
}

var newCollectorTests = map[string]func(*testing.T){
Expand Down Expand Up @@ -254,6 +243,9 @@ func TestNewCollector(t *testing.T) {
}

func TestCollectorVisit(t *testing.T) {
ts := newTestServer()
defer ts.Close()

c := NewCollector()

onRequestCalled := false
Expand Down Expand Up @@ -289,7 +281,7 @@ func TestCollectorVisit(t *testing.T) {
onScrapedCalled = true
})

c.Visit(testServerRootURL)
c.Visit(ts.URL)

if !onRequestCalled {
t.Error("Failed to call OnRequest callback")
Expand All @@ -305,6 +297,9 @@ func TestCollectorVisit(t *testing.T) {
}

func TestCollectorOnHTML(t *testing.T) {
ts := newTestServer()
defer ts.Close()

c := NewCollector()

titleCallbackCalled := false
Expand Down Expand Up @@ -334,7 +329,7 @@ func TestCollectorOnHTML(t *testing.T) {
}
})

c.Visit(testServerRootURL + "html")
c.Visit(ts.URL + "/html")

if !titleCallbackCalled {
t.Error("Failed to call OnHTML callback for <title> tag")
Expand All @@ -346,6 +341,9 @@ func TestCollectorOnHTML(t *testing.T) {
}

func TestCollectorURLRevisit(t *testing.T) {
ts := newTestServer()
defer ts.Close()

c := NewCollector()

visitCount := 0
Expand All @@ -354,24 +352,27 @@ func TestCollectorURLRevisit(t *testing.T) {
visitCount++
})

c.Visit(testServerRootURL)
c.Visit(testServerRootURL)
c.Visit(ts.URL)
c.Visit(ts.URL)

if visitCount != 1 {
t.Error("URL revisited")
}

c.AllowURLRevisit = true

c.Visit(testServerRootURL)
c.Visit(testServerRootURL)
c.Visit(ts.URL)
c.Visit(ts.URL)

if visitCount != 3 {
t.Error("URL not revisited")
}
}

func TestCollectorPost(t *testing.T) {
ts := newTestServer()
defer ts.Close()

postValue := "hello"
c := NewCollector()

Expand All @@ -381,44 +382,56 @@ func TestCollectorPost(t *testing.T) {
}
})

c.Post(testServerRootURL+"login", map[string]string{
c.Post(ts.URL+"/login", map[string]string{
"name": postValue,
})
}

func TestRedirect(t *testing.T) {
ts := newTestServer()
defer ts.Close()

c := NewCollector()
c.OnHTML("a[href]", func(e *HTMLElement) {
u := e.Request.AbsoluteURL(e.Attr("href"))
if !strings.HasSuffix(u, "/redirected/test") {
t.Error("Invalid URL after redirect: " + u)
}
})
c.Visit(testServerRootURL + "redirect")
c.Visit(ts.URL + "/redirect")
}

func TestCollectorCookies(t *testing.T) {
ts := newTestServer()
defer ts.Close()

c := NewCollector()

if err := c.Visit(testServerRootURL + "set_cookie"); err != nil {
if err := c.Visit(ts.URL + "/set_cookie"); err != nil {
t.Fatal(err)
}

if err := c.Visit(testServerRootURL + "check_cookie"); err != nil {
if err := c.Visit(ts.URL + "/check_cookie"); err != nil {
t.Fatalf("Failed to use previously set cookies: %s", err)
}
}

func BenchmarkVisit(b *testing.B) {
ts := newTestServer()
defer ts.Close()

c := NewCollector()
c.OnHTML("p", func(_ *HTMLElement) {})

for n := 0; n < b.N; n++ {
c.Visit(fmt.Sprintf("%shtml?q=%d", testServerRootURL, n))
c.Visit(fmt.Sprintf("%s/html?q=%d", ts.URL, n))
}
}

func TestRobotsWhenAllowed(t *testing.T) {
ts := newTestServer()
defer ts.Close()

c := NewCollector()
c.IgnoreRobotsTxt = false

Expand All @@ -428,28 +441,34 @@ func TestRobotsWhenAllowed(t *testing.T) {
}
})

err := c.Visit(testServerRootURL + "allowed")
err := c.Visit(ts.URL + "/allowed")

if err != nil {
t.Fatal(err)
}
}

func TestRobotsWhenDisallowed(t *testing.T) {
ts := newTestServer()
defer ts.Close()

c := NewCollector()
c.IgnoreRobotsTxt = false

c.OnResponse(func(resp *Response) {
t.Fatalf("Received response: %d", resp.StatusCode)
})

err := c.Visit(testServerRootURL + "disallowed")
err := c.Visit(ts.URL + "/disallowed")
if err.Error() != "URL blocked by robots.txt" {
t.Fatalf("wrong error message: %v", err)
}
}

func TestIgnoreRobotsWhenDisallowed(t *testing.T) {
ts := newTestServer()
defer ts.Close()

c := NewCollector()
c.IgnoreRobotsTxt = true

Expand All @@ -459,7 +478,7 @@ func TestIgnoreRobotsWhenDisallowed(t *testing.T) {
}
})

err := c.Visit(testServerRootURL + "disallowed")
err := c.Visit(ts.URL + "/disallowed")

if err != nil {
t.Fatal(err)
Expand Down

0 comments on commit 132289a

Please sign in to comment.