/
main.go
69 lines (59 loc) · 1.59 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
package main
import (
"flag"
"os"
"strconv"
"github.com/golang/glog"
"github.com/k0kubun/pp"
"gopkg.in/urfave/cli.v2"
"github.com/crackcomm/crawl"
"github.com/crackcomm/crawl/nsq/consumer"
google "github.com/crackcomm/go-google-search/spider"
)
func init() {
consumer.Flags = append(consumer.Flags, &cli.StringFlag{
Name: "output-topic",
EnvVars: []string{"OUTPUT_TOPIC"},
Usage: "search results output nsq topic (required)",
Value: "google_results",
})
}
func main() {
defer glog.Flush()
// CRAWL_DEBUG environment variable turns on debug mode
// crawler then can spit out logs using glog.V(3)
var verbosity string
if yes, _ := strconv.ParseBool(os.Getenv("CRAWL_DEBUG")); yes {
verbosity = "-v=3"
}
// We are setting glog to log to stderr
flag.CommandLine.Parse([]string{"-logtostderr", verbosity})
// Start consumer
app := consumer.New(
consumer.WithSpiderConstructor(func(app *consumer.App) consumer.Spider {
// Get NSQ topic for results
outputTopic := app.Ctx.String("output-topic")
// Spider constructor
return func(crawler crawl.Crawler) {
// Google spider
spider := &google.Spider{
Crawler: app.Crawler(),
Output: func(result *google.SearchResult) error {
// Pretty print result to stdout
pp.Print(result)
// Publish result to NSQ on a given topic
return app.Producer.PublishJSON(outputTopic, result)
},
}
spider.Register()
}
}),
)
// Command line usage
app.Name = "google"
app.Usage = "google search crawler"
app.Version = "0.0.2"
if err := app.Run(os.Args); err != nil {
glog.Fatal(err)
}
}