-
-
Notifications
You must be signed in to change notification settings - Fork 278
/
acmeclient.go
388 lines (352 loc) · 12.3 KB
/
acmeclient.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
// Copyright 2015 Matthew Holt
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package certmagic
import (
"context"
"crypto/tls"
"fmt"
"log"
weakrand "math/rand"
"net"
"net/http"
"net/url"
"strconv"
"strings"
"sync"
"time"
"github.com/go-acme/lego/v3/acme"
"github.com/go-acme/lego/v3/certificate"
"github.com/go-acme/lego/v3/challenge"
"github.com/go-acme/lego/v3/lego"
"github.com/go-acme/lego/v3/registration"
)
func init() {
weakrand.Seed(time.Now().UnixNano())
}
// acmeClient is a wrapper over lego's acme.Client with
// some custom state attached. It is used to obtain,
// renew, and revoke certificates with ACME. Use
// ACMEManager.newACMEClient() or
// ACMEManager.newACMEClientWithRetry() to get a valid
// one for real use.
type acmeClient struct {
caURL string
mgr *ACMEManager
acmeClient *lego.Client
challenges []challenge.Type
}
// newACMEClientWithRetry is the same as newACMEClient, but with
// automatic retry capabilities. Sometimes network connections or
// HTTP requests fail intermittently, even when requesting the
// directory endpoint for example, so we can avoid that by just
// retrying once. Failures here are rare and sporadic, usually,
// so a simple retry is an easy fix.
func (am *ACMEManager) newACMEClientWithRetry(useTestCA bool) (*acmeClient, error) {
var client *acmeClient
var err error
const maxTries = 2
for i := 0; i < maxTries; i++ {
client, err = am.newACMEClient(useTestCA, false) // TODO: move logic that requires interactivity to way before this part of the process...
if err == nil {
break
}
if acmeErr, ok := err.(acme.ProblemDetails); ok {
if acmeErr.HTTPStatus == http.StatusTooManyRequests {
return nil, fmt.Errorf("too many requests making new ACME client: %+v - aborting", acmeErr)
}
}
log.Printf("[ERROR] Making new ACME client: %v (attempt %d/%d)", err, i+1, maxTries)
time.Sleep(1 * time.Second)
}
return client, err
}
// newACMEClient creates the underlying ACME library client type.
// If useTestCA is true, am.TestCA will be used if it is set;
// otherwise, the primary CA will still be used.
func (am *ACMEManager) newACMEClient(useTestCA, interactive bool) (*acmeClient, error) {
acmeClientsMu.Lock()
defer acmeClientsMu.Unlock()
// ensure defaults are filled in
certObtainTimeout := am.CertObtainTimeout
if certObtainTimeout == 0 {
certObtainTimeout = DefaultACME.CertObtainTimeout
}
var caURL string
if useTestCA {
caURL = am.TestCA
// only use the default test CA if the CA is also
// the default CA; no point in testing against
// Let's Encrypt's staging server if we are not
// using their production server too
if caURL == "" && am.CA == DefaultACME.CA {
caURL = DefaultACME.TestCA
}
}
if caURL == "" {
caURL = am.CA
}
if caURL == "" {
caURL = DefaultACME.CA
}
// ensure endpoint is secure (assume HTTPS if scheme is missing)
if !strings.Contains(caURL, "://") {
caURL = "https://" + caURL
}
u, err := url.Parse(caURL)
if err != nil {
return nil, err
}
if u.Scheme != "https" && !isLoopback(u.Host) && !isInternal(u.Host) {
return nil, fmt.Errorf("%s: insecure CA URL (HTTPS required)", caURL)
}
// look up or create the user account
leUser, err := am.getUser(caURL, am.Email)
if err != nil {
return nil, err
}
// if a lego client with this configuration already exists, reuse it
clientKey := caURL + leUser.Email
client, ok := acmeClients[clientKey]
if !ok {
// the client facilitates our communication with the CA server
legoCfg := lego.NewConfig(leUser)
legoCfg.CADirURL = caURL
legoCfg.UserAgent = buildUAString()
legoCfg.HTTPClient.Timeout = HTTPTimeout
legoCfg.Certificate = lego.CertificateConfig{
Timeout: am.CertObtainTimeout,
}
if am.TrustedRoots != nil {
if ht, ok := legoCfg.HTTPClient.Transport.(*http.Transport); ok {
if ht.TLSClientConfig == nil {
ht.TLSClientConfig = new(tls.Config)
ht.ForceAttemptHTTP2 = true
}
ht.TLSClientConfig.RootCAs = am.TrustedRoots
}
}
client, err = lego.NewClient(legoCfg)
if err != nil {
return nil, err
}
acmeClients[clientKey] = client
}
// if not registered, the user must register an account
// with the CA and agree to terms
if leUser.Registration == nil {
if interactive { // can't prompt a user who isn't there
termsURL := client.GetToSURL()
if !am.Agreed && termsURL != "" {
am.Agreed = am.askUserAgreement(client.GetToSURL())
}
if !am.Agreed && termsURL != "" {
return nil, fmt.Errorf("user must agree to CA terms")
}
}
reg, err := client.Registration.Register(registration.RegisterOptions{TermsOfServiceAgreed: am.Agreed})
if err != nil {
return nil, err
}
leUser.Registration = reg
// persist the user to storage
err = am.saveUser(caURL, leUser)
if err != nil {
return nil, fmt.Errorf("could not save user: %v", err)
}
}
c := &acmeClient{
caURL: caURL,
mgr: am,
acmeClient: client,
}
return c, nil
}
// initialChallenges returns the initial set of challenges
// to try using c.config as a basis.
func (c *acmeClient) initialChallenges() []challenge.Type {
// if configured, use DNS challenge exclusively
if c.mgr.DNSProvider != nil {
return []challenge.Type{challenge.DNS01}
}
// otherwise, use HTTP and TLS-ALPN challenges if enabled
var chal []challenge.Type
if !c.mgr.DisableHTTPChallenge {
chal = append(chal, challenge.HTTP01)
}
if !c.mgr.DisableTLSALPNChallenge {
chal = append(chal, challenge.TLSALPN01)
}
return chal
}
// nextChallenge chooses a challenge randomly from the given list of
// available challenges and configures c.acmeClient to use that challenge
// according to c.config. It pops the chosen challenge from the list and
// returns that challenge along with the new list without that challenge.
// If len(available) == 0, this is a no-op.
//
// Don't even get me started on how dumb it is we need to do this here
// instead of the upstream lego library doing it for us. Lego used to
// randomize the challenge order, thus allowing another one to be used
// if the first one failed. https://github.com/go-acme/lego/issues/842
// (It also has an awkward API for adjusting the available challenges.)
// At time of writing, lego doesn't try anything other than the TLS-ALPN
// challenge, even if the HTTP challenge is also enabled. So we take
// matters into our own hands and enable only one challenge at a time
// in the underlying client, randomly selected by us.
func (c *acmeClient) nextChallenge(available []challenge.Type) (challenge.Type, []challenge.Type) {
if len(available) == 0 {
return "", available
}
// make sure we choose a challenge randomly, which lego used to do but
// the critical feature was surreptitiously removed in ~2018 in a commit
// too large to review, oh well - choose one, then remove it from the
// list of available challenges so it doesn't get retried
randIdx := weakrand.Intn(len(available))
randomChallenge := available[randIdx]
available = append(available[:randIdx], available[randIdx+1:]...)
// clean the slate, since we reuse clients
c.acmeClient.Challenge.Remove(challenge.HTTP01)
c.acmeClient.Challenge.Remove(challenge.TLSALPN01)
c.acmeClient.Challenge.Remove(challenge.DNS01)
switch randomChallenge {
case challenge.HTTP01:
useHTTPPort := HTTPChallengePort
if HTTPPort > 0 && HTTPPort != HTTPChallengePort {
useHTTPPort = HTTPPort
}
if c.mgr.AltHTTPPort > 0 {
useHTTPPort = c.mgr.AltHTTPPort
}
c.acmeClient.Challenge.SetHTTP01Provider(distributedSolver{
acmeManager: c.mgr,
providerServer: &httpSolver{
acmeManager: c.mgr,
address: net.JoinHostPort(c.mgr.ListenHost, strconv.Itoa(useHTTPPort)),
},
caURL: c.caURL,
})
case challenge.TLSALPN01:
useTLSALPNPort := TLSALPNChallengePort
if HTTPSPort > 0 && HTTPSPort != TLSALPNChallengePort {
useTLSALPNPort = HTTPSPort
}
if c.mgr.AltTLSALPNPort > 0 {
useTLSALPNPort = c.mgr.AltTLSALPNPort
}
c.acmeClient.Challenge.SetTLSALPN01Provider(distributedSolver{
acmeManager: c.mgr,
providerServer: &tlsALPNSolver{
config: c.mgr.config,
address: net.JoinHostPort(c.mgr.ListenHost, strconv.Itoa(useTLSALPNPort)),
},
caURL: c.caURL,
})
case challenge.DNS01:
if c.mgr.DNSChallengeOption != nil {
c.acmeClient.Challenge.SetDNS01Provider(c.mgr.DNSProvider, c.mgr.DNSChallengeOption)
} else {
c.acmeClient.Challenge.SetDNS01Provider(c.mgr.DNSProvider)
}
}
return randomChallenge, available
}
func (c *acmeClient) throttle(ctx context.Context, names []string) error {
// throttling is scoped to CA + account email
rateLimiterKey := c.caURL + "," + c.mgr.Email
rateLimitersMu.Lock()
rl, ok := rateLimiters[rateLimiterKey]
if !ok {
rl = NewRateLimiter(RateLimitEvents, RateLimitEventsWindow)
rateLimiters[rateLimiterKey] = rl
// TODO: stop rate limiter when it is garbage-collected...
}
rateLimitersMu.Unlock()
log.Printf("[INFO]%v Waiting on rate limiter...", names)
err := rl.Wait(ctx)
if err != nil {
return err
}
log.Printf("[INFO]%v Done waiting", names)
return nil
}
func (c *acmeClient) usingTestCA() bool {
return c.mgr.TestCA != "" && c.caURL == c.mgr.TestCA
}
func (c *acmeClient) revoke(_ context.Context, certRes certificate.Resource) error {
return c.acmeClient.Certificate.Revoke(certRes.Certificate)
}
func buildUAString() string {
ua := "CertMagic"
if UserAgent != "" {
ua += " " + UserAgent
}
return ua
}
// These internal rate limits are designed to prevent accidentally
// firehosing a CA's ACME endpoints. They are not intended to
// replace or replicate the CA's actual rate limits.
//
// Let's Encrypt's rate limits can be found here:
// https://letsencrypt.org/docs/rate-limits/
//
// Currently (as of December 2019), Let's Encrypt's most relevant
// rate limit for large deployments is 300 new orders per account
// per 3 hours (on average, or best case, that's about 1 every 36
// seconds, or 2 every 72 seconds, etc.); but it's not reasonable
// to try to assume that our internal state is the same as the CA's
// (due to process restarts, config changes, failed validations,
// etc.) and ultimately, only the CA's actual rate limiter is the
// authority. Thus, our own rate limiters do not attempt to enforce
// external rate limits. Doing so causes problems when the domains
// are not in our control (i.e. serving customer sites) and/or lots
// of domains fail validation: they clog our internal rate limiter
// and nearly starve out (or at least slow down) the other domains
// that need certificates. Failed transactions are already retried
// with exponential backoff, so adding in rate limiting can slow
// things down even more.
//
// Instead, the point of our internal rate limiter is to avoid
// hammering the CA's endpoint when there are thousands or even
// millions of certificates under management. Our goal is to
// allow small bursts in a relatively short timeframe so as to
// not block any one domain for too long, without unleashing
// thousands of requests to the CA at once.
var (
rateLimiters = make(map[string]*RingBufferRateLimiter)
rateLimitersMu sync.RWMutex
// RateLimitEvents is how many new events can be allowed
// in RateLimitEventsWindow.
RateLimitEvents = 10
// RateLimitEventsWindow is the size of the sliding
// window that throttles events.
RateLimitEventsWindow = 1 * time.Minute
)
// Some default values passed down to the underlying lego client.
var (
UserAgent string
HTTPTimeout = 30 * time.Second
)
// We keep a global cache of ACME clients so that they
// can be reused. Since the number of CAs, accounts,
// and key types should be fairly limited under best
// practices, this map will hardly ever have more than
// a few entries at the most. The associated lock
// protects access to the map but also ensures that only
// one ACME client is created at a time.
// TODO: consider using storage for a distributed lock
// TODO: consider evicting clients after some time
var (
acmeClients = make(map[string]*lego.Client)
acmeClientsMu sync.Mutex
)