/
crawl.go
87 lines (72 loc) · 2.24 KB
/
crawl.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
package metadata
import (
"context"
"fmt"
"log"
"time"
"github.com/eviltomorrow/king/lib/etcd"
grpcclient "github.com/eviltomorrow/king/lib/grpc/client"
"github.com/eviltomorrow/king/lib/grpc/lb"
"github.com/eviltomorrow/king/lib/opentrace"
"github.com/spf13/cobra"
"go.opentelemetry.io/otel/attribute"
"google.golang.org/grpc/resolver"
"google.golang.org/protobuf/types/known/wrapperspb"
)
var sourceVar string
func init() {
CrawlCommand.PersistentFlags().StringVar(&sourceVar, "source", "sina", "crawl data from [sina/net126]")
CrawlCommand.MarkPersistentFlagRequired("source")
RootCommand.AddCommand(CrawlCommand)
}
var CrawlCommand = &cobra.Command{
Use: "crawl",
Short: "Crawl data manual from specify source[sina/net126]",
Run: func(cmd *cobra.Command, args []string) {
if err := loadConfig(); err != nil {
log.Printf("[F] Load config failure, nest error: %v", err)
return
}
opentrace.OtelDSN = cfg.Otel.DSN
destroy, err := opentrace.InitTraceProvider()
if err != nil {
log.Printf("[F] Init trace provider failure, nest error: %v", err)
return
}
defer destroy()
ctx, span := opentrace.DefaultTracer().Start(context.Background(), "Manual crawl metadata")
defer span.End()
etcd.Endpoints = cfg.Etcd.Endpoints
client, err := etcd.NewClient()
if err != nil {
log.Printf("[F] Create etcd client failure, nest error: %v", err)
span.RecordError(err)
return
}
defer client.Close()
resolver.Register(lb.NewBuilder(client))
if err := crawl(ctx); err != nil {
log.Printf("[F] Crawl data failure, nest error: %v", err)
span.RecordError(err)
return
}
},
}
func crawl(ctx context.Context) error {
begin := time.Now()
stub, closeFunc, err := grpcclient.NewCollectorWithEtcd()
if err != nil {
return err
}
defer closeFunc()
ctx, span := opentrace.DefaultTracer().Start(ctx, "CrawlMetadata")
defer span.End()
span.SetAttributes(attribute.String("source", sourceVar))
resp, err := stub.CrawlMetadata(ctx, &wrapperspb.StringValue{Value: sourceVar})
if err != nil {
span.RecordError(err)
return err
}
fmt.Printf("[Status] Complete, Source: %s, Total: %d, Ignore: %d, Actual: %d, Cost: %v\r\n", sourceVar, resp.Total, resp.Ignore, resp.Total-resp.Ignore, time.Since(begin))
return nil
}