-
Notifications
You must be signed in to change notification settings - Fork 115
/
http.go
160 lines (121 loc) · 3.63 KB
/
http.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
package exporter
import (
"fmt"
"io"
"net/http"
neturl "net/url"
"strconv"
"time"
log "github.com/sirupsen/logrus"
"github.com/tomnomnom/linkheader"
)
// RateLimitExceededStatus is the status response from github when the rate limit is exceeded.
const RateLimitExceededStatus = "403 rate limit exceeded"
func asyncHTTPGets(targets []string, token string) ([]*Response, error) {
// Expand targets by following GitHub pagination links
targets = paginateTargets(targets, token)
// Channels used to enable concurrent requests
ch := make(chan *Response, len(targets))
responses := []*Response{}
for _, url := range targets {
go func(url string) {
err := getResponse(url, token, ch)
if err != nil {
ch <- &Response{url, nil, []byte{}, err}
}
}(url)
}
for {
select {
case r := <-ch:
if r.err != nil {
log.Errorf("Error scraping API, Error: %v", r.err)
return nil, r.err
}
responses = append(responses, r)
if len(responses) == len(targets) {
return responses, nil
}
}
}
}
// paginateTargets returns all pages for the provided targets
func paginateTargets(targets []string, token string) []string {
paginated := targets
for _, url := range targets {
// make a request to the original target to get link header if it exists
resp, err := getHTTPResponse(url, token)
if err != nil {
log.Errorf("Error retrieving Link headers, Error: %s", err)
continue
}
if resp.Header["Link"] != nil {
links := linkheader.Parse(resp.Header["Link"][0])
for _, link := range links {
if link.Rel == "last" {
u, err := neturl.Parse(link.URL)
if err != nil {
log.Errorf("Unable to parse page URL, Error: %s", err)
}
q := u.Query()
lastPage, err := strconv.Atoi(q.Get("page"))
if err != nil {
log.Errorf("Unable to convert page substring to int, Error: %s", err)
}
// add all pages to the slice of targets to return
for page := 2; page <= lastPage; page++ {
q.Set("page", strconv.Itoa(page))
u.RawQuery = q.Encode()
paginated = append(paginated, u.String())
}
break
}
}
}
}
return paginated
}
// getResponse collects an individual http.response and returns a *Response
func getResponse(url string, token string, ch chan<- *Response) error {
log.Infof("Fetching %s \n", url)
resp, err := getHTTPResponse(url, token) // do this earlier
if err != nil {
return fmt.Errorf("Error fetching http response: %v", err)
}
defer resp.Body.Close()
// Read the body to a byte array so it can be used elsewhere
body, err := io.ReadAll(resp.Body)
if err != nil {
return fmt.Errorf("Error converting body to byte array: %v", err)
}
// Triggers if a user specifies an invalid or not visible repository
if resp.StatusCode == 404 {
return fmt.Errorf("Error: Received 404 status from Github API, ensure the repository URL is correct. If it's a private repository, also check the oauth token is correct")
}
ch <- &Response{url, resp, body, err}
return nil
}
// getHTTPResponse handles the http client creation, token setting and returns the *http.response
func getHTTPResponse(url string, token string) (*http.Response, error) {
client := &http.Client{
Timeout: time.Second * 10,
}
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, err
}
// If a token is present, add it to the http.request
if token != "" {
req.Header.Add("Authorization", "token "+token)
}
resp, err := client.Do(req)
if err != nil {
return nil, err
}
// check rate limit exceeded.
if resp.Status == RateLimitExceededStatus {
resp.Body.Close()
return nil, fmt.Errorf("%s", resp.Status)
}
return resp, err
}