/
chromium.go
584 lines (490 loc) · 17.7 KB
/
chromium.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
package chromium
import (
"context"
"errors"
"fmt"
"os"
"time"
"github.com/alexliesenfeld/health"
"github.com/chromedp/cdproto/network"
flag "github.com/spf13/pflag"
"go.uber.org/zap"
"github.com/gotenberg/gotenberg/v8/pkg/gotenberg"
"github.com/gotenberg/gotenberg/v8/pkg/modules/api"
)
func init() {
gotenberg.MustRegisterModule(new(Chromium))
}
var (
// ErrInvalidEmulatedMediaType happens if the emulated media type is not
// "screen" nor "print". Empty value are allowed though.
ErrInvalidEmulatedMediaType = errors.New("invalid emulated media type")
// ErrInvalidEvaluationExpression happens if an evaluation expression
// returns an exception or undefined.
ErrInvalidEvaluationExpression = errors.New("invalid evaluation expression")
// ErrRpccMessageTooLarge happens when the messages received by
// ChromeDevTools are larger than 100 MB.
ErrRpccMessageTooLarge = errors.New("rpcc message too large")
// ErrInvalidHttpStatusCode happens when the status code from the main page
// matches with one of the entry in [Options.FailOnHttpStatusCodes].
ErrInvalidHttpStatusCode = errors.New("invalid HTTP status code")
// ErrConsoleExceptions happens when there are exceptions in the Chromium
// console. It also happens only if the [Options.FailOnConsoleExceptions]
// is set to true.
ErrConsoleExceptions = errors.New("console exceptions")
// PDF specific.
// ErrOmitBackgroundWithoutPrintBackground happens if
// PdfOptions.OmitBackground is set to true but not PdfOptions.PrintBackground.
ErrOmitBackgroundWithoutPrintBackground = errors.New("omit background without print background")
// ErrInvalidPrinterSettings happens if the PdfOptions have one or more
// aberrant values.
ErrInvalidPrinterSettings = errors.New("invalid printer settings")
// ErrPageRangesSyntaxError happens if the PdfOptions have an invalid page
// ranges.
ErrPageRangesSyntaxError = errors.New("page ranges syntax error")
)
// Chromium is a module which provides both an [Api] and routes for converting
// HTML document to PDF.
type Chromium struct {
autoStart bool
disableRoutes bool
args browserArguments
logger *zap.Logger
browser browser
supervisor gotenberg.ProcessSupervisor
engine gotenberg.PdfEngine
}
// Options are the common options for all conversions.
type Options struct {
// SkipNetworkIdleEvent set if the conversion should wait for the
// "networkIdle" event, drastically improving the conversion speed. It may
// not be suitable for all HTML documents, as some may not be fully
// rendered until this event is fired.
// Optional.
SkipNetworkIdleEvent bool
// FailOnHttpStatusCodes sets if the conversion should fail if the status
// code from the main page matches with one of its entries.
// Optional.
FailOnHttpStatusCodes []int64
// FailOnConsoleExceptions sets if the conversion should fail if there are
// exceptions in the Chromium console.
// Optional.
FailOnConsoleExceptions bool
// WaitDelay is the duration to wait when loading an HTML document before
// converting it.
// Optional.
WaitDelay time.Duration
// WaitWindowStatus is the window.status value to wait for before
// converting an HTML document.
// Optional.
WaitWindowStatus string
// WaitForExpression is the custom JavaScript expression to wait before
// converting an HTML document until it returns true
// Optional.
WaitForExpression string
// Cookies are the cookies to put in the Chromium cookies' jar.
// Optional
Cookies []Cookie
// ExtraHttpHeaders are the HTTP headers to send by Chromium while loading
// the HTML document.
// Optional.
ExtraHttpHeaders map[string]string
// EmulatedMediaType is the media type to emulate, either "screen" or
// "print".
// Optional.
EmulatedMediaType string
// OmitBackground hides default white background and allows generating PDFs
// with transparency.
// Optional.
OmitBackground bool
}
// DefaultOptions returns the default values for Options.
func DefaultOptions() Options {
return Options{
SkipNetworkIdleEvent: false,
FailOnHttpStatusCodes: []int64{499, 599},
FailOnConsoleExceptions: false,
WaitDelay: 0,
WaitWindowStatus: "",
WaitForExpression: "",
Cookies: nil,
ExtraHttpHeaders: nil,
EmulatedMediaType: "",
OmitBackground: false,
}
}
// PdfOptions are the available options for converting an HTML document to PDF.
type PdfOptions struct {
Options
// Landscape sets the paper orientation.
// Optional.
Landscape bool
// PrintBackground prints the background graphics.
// Optional.
PrintBackground bool
// Scale is the scale of the page rendering.
// Optional.
Scale float64
// SinglePage defines whether to print the entire content in one single
// page.
// Optional.
SinglePage bool
// PaperWidth is the paper width, in inches.
// Optional.
PaperWidth float64
// PaperHeight is the paper height, in inches.
// Optional.
PaperHeight float64
// MarginTop is the top margin, in inches.
// Optional.
MarginTop float64
// MarginBottom is the bottom margin, in inches.
// Optional.
MarginBottom float64
// MarginLeft is the left margin, in inches.
// Optional.
MarginLeft float64
// MarginRight is the right margin, in inches.
// Optional.
MarginRight float64
// Page ranges to print, e.g., '1-5, 8, 11-13'. Empty means all pages.
// Optional.
PageRanges string
// HeaderTemplate is the HTML template of the header. It should be valid
// HTML markup with following classes used to inject printing values into
// them:
// - date: formatted print date
// - title: document title
// - url: document location
// - pageNumber: current page number
// - totalPages: total pages in the document
// For example, <span class=title></span> would generate span containing
// the title.
// Optional.
HeaderTemplate string
// FooterTemplate is the HTML template of the footer. It should use the
// same format as the HeaderTemplate.
// Optional.
FooterTemplate string
// PreferCssPageSize defines whether to prefer page size as defined by CSS.
// If false, the content will be scaled to fit the paper size.
// Optional.
PreferCssPageSize bool
}
// DefaultPdfOptions returns the default values for PdfOptions.
func DefaultPdfOptions() PdfOptions {
return PdfOptions{
Options: DefaultOptions(),
Landscape: false,
PrintBackground: false,
Scale: 1.0,
SinglePage: false,
PaperWidth: 8.5,
PaperHeight: 11,
MarginTop: 0.39,
MarginBottom: 0.39,
MarginLeft: 0.39,
MarginRight: 0.39,
PageRanges: "",
HeaderTemplate: "<html><head></head><body></body></html>",
FooterTemplate: "<html><head></head><body></body></html>",
PreferCssPageSize: false,
}
}
// ScreenshotOptions are the available options for capturing a screenshot from
// an HTML document.
type ScreenshotOptions struct {
Options
// Width is the device screen width in pixels.
// Optional.
Width int
// Height is the device screen height in pixels.
// Optional.
Height int
// Clip defines whether to clip the screenshot according to the device
// dimensions.
// Optional.
Clip bool
// Format is the image compression format, either "png" or "jpeg" or
// "webp".
// Optional.
Format string
// Quality is the compression quality from range [0..100] (jpeg only).
// Optional.
Quality int
// OptimizeForSpeed defines whether to optimize image encoding for speed,
// not for resulting size.
// Optional.
OptimizeForSpeed bool
}
// DefaultScreenshotOptions returns the default values for ScreenshotOptions.
func DefaultScreenshotOptions() ScreenshotOptions {
return ScreenshotOptions{
Options: DefaultOptions(),
Width: 800,
Height: 600,
Clip: false,
Format: "png",
Quality: 100,
OptimizeForSpeed: false,
}
}
// Cookie gathers the available entries for setting a cookie in the Chromium
// cookies' jar.
type Cookie struct {
// Name is the cookie name.
// Required.
Name string `json:"name"`
// Value is the cookie value.
// Required.
Value string `json:"value"`
// Domain is the cookie domain.
// Required.
Domain string `json:"domain"`
// Path is the cookie path.
// Optional.
Path string `json:"path,omitempty"`
// Secure sets the cookie secure if true.
// Optional.
Secure bool `json:"secure,omitempty"`
// HttpOnly sets the cookie as HTTP-only if true.
// Optional.
HttpOnly bool `json:"httpOnly,omitempty"`
// SameSite is cookie 'Same-Site' status.
// Optional.
SameSite network.CookieSameSite `json:"sameSite,omitempty"`
}
// Api helps to interact with Chromium for converting HTML documents to PDF.
type Api interface {
Pdf(ctx context.Context, logger *zap.Logger, url, outputPath string, options PdfOptions) error
Screenshot(ctx context.Context, logger *zap.Logger, url, outputPath string, options ScreenshotOptions) error
}
// Provider is a module interface which exposes a method for creating an [Api]
// for other modules.
//
// func (m *YourModule) Provision(ctx *gotenberg.Context) error {
// provider, _ := ctx.Module(new(chromium.Provider))
// api, _ := provider.(chromium.Provider).Chromium()
// }
type Provider interface {
Chromium() (Api, error)
}
// Descriptor returns a [Chromium]'s module descriptor.
func (mod *Chromium) Descriptor() gotenberg.ModuleDescriptor {
return gotenberg.ModuleDescriptor{
ID: "chromium",
FlagSet: func() *flag.FlagSet {
fs := flag.NewFlagSet("chromium", flag.ExitOnError)
fs.Int64("chromium-restart-after", 0, "Number of conversions after which Chromium will automatically restart. Set to 0 to disable this feature")
fs.Int64("chromium-max-queue-size", 0, "Maximum request queue size for Chromium. Set to 0 to disable this feature")
fs.Bool("chromium-auto-start", false, "Automatically launch Chromium upon initialization if set to true; otherwise, Chromium will start at the time of the first conversion")
fs.Duration("chromium-start-timeout", time.Duration(20)*time.Second, "Maximum duration to wait for Chromium to start or restart")
fs.Bool("chromium-incognito", false, "Start Chromium with incognito mode")
fs.Bool("chromium-allow-insecure-localhost", false, "Ignore TLS/SSL errors on localhost")
fs.Bool("chromium-ignore-certificate-errors", false, "Ignore the certificate errors")
fs.Bool("chromium-disable-web-security", false, "Don't enforce the same-origin policy")
fs.Bool("chromium-allow-file-access-from-files", false, "Allow file:// URIs to read other file:// URIs")
fs.String("chromium-host-resolver-rules", "", "Set custom mappings to the host resolver")
fs.String("chromium-proxy-server", "", "Set the outbound proxy server; this switch only affects HTTP and HTTPS requests")
fs.String("chromium-allow-list", "", "Set the allowed URLs for Chromium using a regular expression")
fs.String("chromium-deny-list", `^file:(?!//\/tmp/).*`, "Set the denied URLs for Chromium using a regular expression")
fs.Bool("chromium-clear-cache", false, "Clear Chromium cache between each conversion")
fs.Bool("chromium-clear-cookies", false, "Clear Chromium cookies between each conversion")
fs.Bool("chromium-disable-javascript", false, "Disable JavaScript")
fs.Bool("chromium-disable-routes", false, "Disable the routes")
return fs
}(),
New: func() gotenberg.Module { return new(Chromium) },
}
}
// Provision sets the module properties.
func (mod *Chromium) Provision(ctx *gotenberg.Context) error {
flags := ctx.ParsedFlags()
mod.autoStart = flags.MustBool("chromium-auto-start")
mod.disableRoutes = flags.MustBool("chromium-disable-routes")
binPath, ok := os.LookupEnv("CHROMIUM_BIN_PATH")
if !ok {
return errors.New("CHROMIUM_BIN_PATH environment variable is not set")
}
mod.args = browserArguments{
binPath: binPath,
incognito: flags.MustBool("chromium-incognito"),
allowInsecureLocalhost: flags.MustBool("chromium-allow-insecure-localhost"),
ignoreCertificateErrors: flags.MustBool("chromium-ignore-certificate-errors"),
disableWebSecurity: flags.MustBool("chromium-disable-web-security"),
allowFileAccessFromFiles: flags.MustBool("chromium-allow-file-access-from-files"),
hostResolverRules: flags.MustString("chromium-host-resolver-rules"),
proxyServer: flags.MustString("chromium-proxy-server"),
wsUrlReadTimeout: flags.MustDuration("chromium-start-timeout"),
allowList: flags.MustRegexp("chromium-allow-list"),
denyList: flags.MustRegexp("chromium-deny-list"),
clearCache: flags.MustBool("chromium-clear-cache"),
clearCookies: flags.MustBool("chromium-clear-cookies"),
disableJavaScript: flags.MustBool("chromium-disable-javascript"),
}
// Logger.
loggerProvider, err := ctx.Module(new(gotenberg.LoggerProvider))
if err != nil {
return fmt.Errorf("get logger provider: %w", err)
}
logger, err := loggerProvider.(gotenberg.LoggerProvider).Logger(mod)
if err != nil {
return fmt.Errorf("get logger: %w", err)
}
mod.logger = logger.Named("browser")
// Process.
mod.browser = newChromiumBrowser(mod.args)
mod.supervisor = gotenberg.NewProcessSupervisor(mod.logger, mod.browser, flags.MustInt64("chromium-restart-after"), flags.MustInt64("chromium-max-queue-size"))
// PDF Engine.
provider, err := ctx.Module(new(gotenberg.PdfEngineProvider))
if err != nil {
return fmt.Errorf("get PDF engine provider: %w", err)
}
engine, err := provider.(gotenberg.PdfEngineProvider).PdfEngine()
if err != nil {
return fmt.Errorf("get PDF engine: %w", err)
}
mod.engine = engine
return nil
}
// Validate validates the module properties.
func (mod *Chromium) Validate() error {
_, err := os.Stat(mod.args.binPath)
if os.IsNotExist(err) {
return fmt.Errorf("chromium binary path does not exist: %w", err)
}
return nil
}
// Start does nothing if auto-start is not enabled. Otherwise, it starts a
// browser instance.
func (mod *Chromium) Start() error {
if !mod.autoStart {
return nil
}
err := mod.supervisor.Launch()
if err != nil {
return fmt.Errorf("launch supervisor: %w", err)
}
return nil
}
// StartupMessage returns a custom startup message.
func (mod *Chromium) StartupMessage() string {
if !mod.autoStart {
return "Chromium ready to start"
}
return "Chromium automatically started"
}
// Stop stops the current browser instance.
func (mod *Chromium) Stop(ctx context.Context) error {
// Block until the context is done so that other module may gracefully stop
// before we do a shutdown.
mod.logger.Debug("wait for the end of grace duration")
<-ctx.Done()
err := mod.supervisor.Shutdown()
if err == nil {
return nil
}
return fmt.Errorf("stop Chromium: %w", err)
}
// Metrics returns the metrics.
func (mod *Chromium) Metrics() ([]gotenberg.Metric, error) {
return []gotenberg.Metric{
{
Name: "chromium_requests_queue_size",
Description: "Current number of Chromium conversion requests waiting to be treated.",
Read: func() float64 {
return float64(mod.supervisor.ReqQueueSize())
},
},
{
Name: "chromium_restarts_count",
Description: "Current number of Chromium restarts.",
Read: func() float64 {
return float64(mod.supervisor.RestartsCount())
},
},
}, nil
}
// Checks adds a health check that verifies if Chromium is healthy.
func (mod *Chromium) Checks() ([]health.CheckerOption, error) {
return []health.CheckerOption{
health.WithCheck(health.Check{
Name: "chromium",
Check: func(_ context.Context) error {
if mod.supervisor.Healthy() {
return nil
}
return errors.New("Chromium is unhealthy")
},
}),
}, nil
}
// Ready returns no error if the module is ready.
func (mod *Chromium) Ready() error {
if !mod.autoStart {
return nil
}
ctx, cancel := context.WithTimeout(context.Background(), mod.args.wsUrlReadTimeout)
defer cancel()
ticker := time.NewTicker(time.Duration(100) * time.Millisecond)
for {
select {
case <-ctx.Done():
ticker.Stop()
return fmt.Errorf("context done while waiting for Chromium to be ready: %w", ctx.Err())
case <-ticker.C:
ok := mod.browser.Healthy(mod.logger)
if ok {
ticker.Stop()
return nil
}
continue
}
}
}
// Chromium returns an [Api] for interacting with Chromium for converting HTML
// documents to PDF.
func (mod *Chromium) Chromium() (Api, error) {
return mod, nil
}
// Routes returns the HTTP routes.
func (mod *Chromium) Routes() ([]api.Route, error) {
if mod.disableRoutes {
return nil, nil
}
return []api.Route{
convertUrlRoute(mod, mod.engine),
screenshotUrlRoute(mod),
convertHtmlRoute(mod, mod.engine),
screenshotHtmlRoute(mod),
convertMarkdownRoute(mod, mod.engine),
screenshotMarkdownRoute(mod),
}, nil
}
// Pdf converts a URL to PDF.
func (mod *Chromium) Pdf(ctx context.Context, logger *zap.Logger, url, outputPath string, options PdfOptions) error {
// Note: no error wrapping because it leaks on errors we want to display to
// the end user.
return mod.supervisor.Run(ctx, logger, func() error {
return mod.browser.pdf(ctx, logger, url, outputPath, options)
})
}
func (mod *Chromium) Screenshot(ctx context.Context, logger *zap.Logger, url, outputPath string, options ScreenshotOptions) error {
// Note: no error wrapping because it leaks on errors we want to display to
// the end user.
return mod.supervisor.Run(ctx, logger, func() error {
return mod.browser.screenshot(ctx, logger, url, outputPath, options)
})
}
// Interface guards.
var (
_ gotenberg.Module = (*Chromium)(nil)
_ gotenberg.Provisioner = (*Chromium)(nil)
_ gotenberg.Validator = (*Chromium)(nil)
_ gotenberg.App = (*Chromium)(nil)
_ gotenberg.MetricsProvider = (*Chromium)(nil)
_ api.HealthChecker = (*Chromium)(nil)
_ api.Router = (*Chromium)(nil)
_ Api = (*Chromium)(nil)
_ Provider = (*Chromium)(nil)
)