-
Notifications
You must be signed in to change notification settings - Fork 0
/
googleutf.go
151 lines (119 loc) · 2.98 KB
/
googleutf.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
package googleutf
import (
"encoding/json"
"net/http"
"net/netip"
"time"
"github.com/jonhadfield/ip-fetcher/internal/pflog"
"github.com/sirupsen/logrus"
"github.com/hashicorp/go-retryablehttp"
"github.com/jonhadfield/ip-fetcher/internal/web"
)
const (
ShortName = "googleutf"
FullName = "Google User-Triggered Fetchers"
HostType = "crawlers"
SourceURL = "https://developers.google.com/search/docs/crawling-indexing/verifying-googlebot"
DownloadURL = "https://developers.google.com/static/search/apis/ipranges/user-triggered-fetchers.json"
downloadedFileTimeFormat = "2006-01-02T15:04:05.999999"
)
func New() Googleutf {
pflog.SetLogLevel()
c := web.NewHTTPClient()
if logrus.GetLevel() < logrus.DebugLevel {
c.Logger = nil
}
return Googleutf{
DownloadURL: DownloadURL,
Client: c,
}
}
type Googleutf struct {
Client *retryablehttp.Client
DownloadURL string
}
type RawDoc struct {
CreationTime string `json:"creationTime"`
LastRequested time.Time
Entries []json.RawMessage `json:"prefixes"`
}
func (gu *Googleutf) FetchData() (data []byte, headers http.Header, status int, err error) {
if gu.DownloadURL == "" {
gu.DownloadURL = DownloadURL
}
return web.Request(gu.Client, gu.DownloadURL, http.MethodGet, nil, nil, 10*time.Second)
}
func (gu *Googleutf) Fetch() (doc Doc, err error) {
data, _, _, err := gu.FetchData()
if err != nil {
return
}
return ProcessData(data)
}
func ProcessData(data []byte) (doc Doc, err error) {
var rawDoc RawDoc
err = json.Unmarshal(data, &rawDoc)
if err != nil {
return
}
doc.IPv4Prefixes, doc.IPv6Prefixes, err = castEntries(rawDoc.Entries)
if err != nil {
return
}
ct, err := time.Parse(downloadedFileTimeFormat, rawDoc.CreationTime)
if err != nil {
return
}
doc.CreationTime = ct
return
}
func castEntries(prefixes []json.RawMessage) (ipv4 []IPv4Entry, ipv6 []IPv6Entry, err error) {
for _, pr := range prefixes {
var ipv4entry RawIPv4Entry
var ipv6entry RawIPv6Entry
// try 4
err = json.Unmarshal(pr, &ipv4entry)
if err == nil {
ipv4Prefix, parseError := netip.ParsePrefix(ipv4entry.IPv4Prefix)
if parseError == nil {
ipv4 = append(ipv4, IPv4Entry{
IPv4Prefix: ipv4Prefix,
})
continue
}
}
// try 6
err = json.Unmarshal(pr, &ipv6entry)
if err == nil {
ipv6Prefix, parseError := netip.ParsePrefix(ipv6entry.IPv6Prefix)
if parseError != nil {
return ipv4, ipv6, parseError
}
ipv6 = append(ipv6, IPv6Entry{
IPv6Prefix: ipv6Prefix,
})
continue
}
if err != nil {
return
}
}
return
}
type RawIPv4Entry struct {
IPv4Prefix string `json:"ipv4Prefix"`
}
type RawIPv6Entry struct {
IPv6Prefix string `json:"ipv6Prefix"`
}
type IPv4Entry struct {
IPv4Prefix netip.Prefix `json:"ipv4Prefix"`
}
type IPv6Entry struct {
IPv6Prefix netip.Prefix `json:"ipv6Prefix"`
}
type Doc struct {
CreationTime time.Time
IPv4Prefixes []IPv4Entry
IPv6Prefixes []IPv6Entry
}