-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.go
41 lines (35 loc) · 1018 Bytes
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
package main
import (
"flag"
"fmt"
"go.roman.zone/crawl/crawler"
"log"
"net/http"
_ "net/http/pprof"
"net/url"
"strings"
)
var (
seedURL = flag.String("seed", "https://example.com", "URL of the page to use as a seed")
keywordsStr = flag.String("keywords", "", "Comma-separated list of keywords that define a topic")
targetCount = flag.Int("index-target", 1000, "Number of unique pages to index")
timeLimit = flag.Duration("time-limit", 0, "Maximum time the crawler should run for")
)
func main() {
go func() {
log.Println(http.ListenAndServe("localhost:6060", nil))
}()
flag.Parse()
seedURLParsed, err := url.Parse(*seedURL)
check(err)
keywords := strings.Split(*keywordsStr, ",")
fmt.Printf("Crawling using %s as a seed with keywords: %s.\n",
seedURLParsed.String(), strings.Join(keywords, ","))
urls := crawler.Crawl(*seedURLParsed, keywords, *targetCount, *timeLimit)
fmt.Printf("Indexed %d pages.\n", len(urls))
}
func check(err error) {
if err != nil {
log.Fatal(err)
}
}