From ddd234ae3250d76efcb77c946351b841e5537c2b Mon Sep 17 00:00:00 2001 From: Jamal Carvalho Date: Mon, 2 Nov 2020 10:17:11 -0500 Subject: [PATCH] internal/frontend: separate goldmark readme code from legacy overview code The overview file contains mostly legacy code used to construct the overview page. This change separates the goldmark code in preparation for updates that will generate a TOC for the readme and the future removal of all overview related code. For golang/go#39297 Change-Id: Ifa5d0ee3983478fd25c6c59fc1bd2c45457cc05c Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/267117 Run-TryBot: Jamal Carvalho TryBot-Result: kokoro Reviewed-by: Jonathan Amsterdam Trust: Jamal Carvalho --- internal/frontend/overview.go | 88 ++-------------------- internal/frontend/overview_test.go | 48 +----------- internal/frontend/readme.go | 113 +++++++++++++++++++++++++++++ internal/frontend/readme_test.go | 61 ++++++++++++++++ internal/frontend/unit_main.go | 24 +++--- 5 files changed, 196 insertions(+), 138 deletions(-) create mode 100644 internal/frontend/readme.go create mode 100644 internal/frontend/readme_test.go diff --git a/internal/frontend/overview.go b/internal/frontend/overview.go index a054193fc..1c464d3df 100644 --- a/internal/frontend/overview.go +++ b/internal/frontend/overview.go @@ -19,14 +19,6 @@ import ( "github.com/google/safehtml/uncheckedconversions" "github.com/microcosm-cc/bluemonday" "github.com/russross/blackfriday/v2" - "github.com/yuin/goldmark" - emoji "github.com/yuin/goldmark-emoji" - "github.com/yuin/goldmark/extension" - "github.com/yuin/goldmark/parser" - "github.com/yuin/goldmark/renderer" - goldmarkHtml "github.com/yuin/goldmark/renderer/html" - "github.com/yuin/goldmark/text" - "github.com/yuin/goldmark/util" "golang.org/x/net/html" "golang.org/x/net/html/atom" "golang.org/x/pkgsite/internal" @@ -85,7 +77,7 @@ func constructOverviewDetails(ctx context.Context, mi *internal.ModuleInfo, read } if overview.Redistributable && readme != nil { overview.ReadMeSource = fileSource(mi.ModulePath, mi.Version, readme.Filepath) - r, err := ReadmeHTML(ctx, mi, readme) + r, err := LegacyReadmeHTML(ctx, mi, readme) if err != nil { return nil, err } @@ -145,13 +137,14 @@ func blackfridayReadmeHTML(ctx context.Context, readme *internal.Readme, mi *int return sanitizeHTML(b), nil } -// ReadmeHTML sanitizes readmeContents based on bluemondy.UGCPolicy and returns +// LegacyReadmeHTML sanitizes readmeContents based on bluemondy.UGCPolicy and returns // a safehtml.HTML. If readmeFilePath indicates that this is a markdown file, // it will also render the markdown contents using blackfriday. // -// It is exported to support external testing. -func ReadmeHTML(ctx context.Context, mi *internal.ModuleInfo, readme *internal.Readme) (_ safehtml.HTML, err error) { - defer derrors.Wrap(&err, "readmeHTML(%s@%s)", mi.ModulePath, mi.Version) +// This function is exported for use in an external tool that uses this package to +// compare readme files to see how changes in processing will affect them. +func LegacyReadmeHTML(ctx context.Context, mi *internal.ModuleInfo, readme *internal.Readme) (_ safehtml.HTML, err error) { + defer derrors.Wrap(&err, "LegacyReadmeHTML(%s@%s)", mi.ModulePath, mi.Version) if readme == nil || readme.Contents == "" { return safehtml.HTML{}, nil } @@ -164,78 +157,9 @@ func ReadmeHTML(ctx context.Context, mi *internal.ModuleInfo, readme *internal.R return h, nil } - if experiment.IsActive(ctx, internal.ExperimentGoldmark) { - // Sets priority value so that we always use our custom transformer - // instead of the default ones. The default values are in: - // https://github.com/yuin/goldmark/blob/7b90f04af43131db79ec320be0bd4744079b346f/parser/parser.go#L567 - const ASTTransformerPriority = 10000 - gdMarkdown := goldmark.New( - goldmark.WithParserOptions( - // WithHeadingAttribute allows us to include other attributes in - // heading tags. This is useful for our aria-level implementation of - // increasing heading rankings. - parser.WithHeadingAttribute(), - // Generates an id in every heading tag. This is used in github in - // order to generate a link with a hash that a user would scroll to - //

goldmark

=> github.com/yuin/goldmark#goldmark - parser.WithAutoHeadingID(), - // Include custom ASTTransformer using the readme and module info to - // use translateRelativeLink and translateHTML to modify the AST - // before it is rendered. - parser.WithASTTransformers(util.Prioritized(&ASTTransformer{ - info: mi.SourceInfo, - readme: readme, - }, ASTTransformerPriority)), - ), - // These extensions lets users write HTML code in the README. This is - // fine since we process the contents using bluemonday after. - goldmark.WithRendererOptions(goldmarkHtml.WithUnsafe(), goldmarkHtml.WithXHTML()), - goldmark.WithExtensions( - extension.GFM, // Support Github Flavored Markdown. - emoji.Emoji, // Support Github markdown emoji markup. - ), - ) - gdMarkdown.Renderer().AddOptions( - renderer.WithNodeRenderers( - util.Prioritized(NewHTMLRenderer(mi.SourceInfo, readme), 100), - ), - ) - - var b bytes.Buffer - contents := []byte(readme.Contents) - gdRenderer := gdMarkdown.Renderer() - gdParser := gdMarkdown.Parser() - - reader := text.NewReader(contents) - doc := gdParser.Parse(reader) - - if err := gdRenderer.Render(&b, contents, doc); err != nil { - return safehtml.HTML{}, nil - } - - return sanitizeGoldmarkHTML(&b), nil - } return blackfridayReadmeHTML(ctx, readme, mi) } -// sanitizeGoldmarkHTML sanitizes HTML from a bytes.Buffer so that it is safe. -func sanitizeGoldmarkHTML(b *bytes.Buffer) safehtml.HTML { - p := bluemonday.UGCPolicy() - - p.AllowAttrs("width", "align").OnElements("img") - p.AllowAttrs("width", "align").OnElements("div") - p.AllowAttrs("width", "align").OnElements("p") - // Allow accessible headings (i.e
). - p.AllowAttrs("width", "align", "role", "aria-level").OnElements("div") - for _, h := range []string{"h1", "h2", "h3", "h4", "h5", "h6"} { - // Needed to preserve github styles heading font-sizes - p.AllowAttrs("class").OnElements(h) - } - - s := string(p.SanitizeBytes(b.Bytes())) - return uncheckedconversions.HTMLFromStringKnownToSatisfyTypeContract(s) -} - // sanitizeHTML reads HTML from r and sanitizes it to ensure it is safe. func sanitizeHTML(r io.Reader) safehtml.HTML { // bluemonday.UGCPolicy allows a broad selection of HTML elements and diff --git a/internal/frontend/overview_test.go b/internal/frontend/overview_test.go index ca754f271..7b7d8e0af 100644 --- a/internal/frontend/overview_test.go +++ b/internal/frontend/overview_test.go @@ -177,50 +177,6 @@ func TestPackageOverviewDetails(t *testing.T) { } } -func TestGoldmarkReadmeHTML(t *testing.T) { - ctx := experiment.NewContext(context.Background(), internal.ExperimentGoldmark) - mod := &internal.ModuleInfo{ - Version: sample.VersionString, - SourceInfo: source.NewGitHubInfo(sample.ModulePath, "", "v1.2.3"), - } - for _, tc := range []struct { - name string - mi *internal.ModuleInfo - readme *internal.Readme - want string - }{ - { - name: "Top level heading is h3 from ####, and following header levels become hN-1", - mi: mod, - readme: &internal.Readme{ - Filepath: sample.ReadmeFilePath, - Contents: "#### Heading Rank 4\n\n##### Heading Rank 5", - }, - want: "

Heading Rank 4

\n

Heading Rank 5

", - }, - { - name: "Github markdown emoji markup is properly rendered", - mi: mod, - readme: &internal.Readme{ - Filepath: sample.ReadmeFilePath, - Contents: "# :zap: Zap \n\n :joy:", - }, - want: "

⚡ Zap

\n

😂

", - }, - } { - t.Run(tc.name, func(t *testing.T) { - hgot, err := ReadmeHTML(ctx, tc.mi, tc.readme) - if err != nil { - t.Fatal(err) - } - got := strings.TrimSpace(hgot.String()) - if diff := cmp.Diff(tc.want, got); diff != "" { - t.Errorf("readmeHTML(%v) mismatch (-want +got):\n%s", tc.mi, diff) - } - }) - } -} - func TestBlackfridayReadmeHTML(t *testing.T) { ctx := context.Background() aModule := &internal.ModuleInfo{ @@ -411,13 +367,13 @@ func TestBlackfridayReadmeHTML(t *testing.T) { }, } checkReadme := func(ctx context.Context, t *testing.T, mi *internal.ModuleInfo, readme *internal.Readme, want string) { - hgot, err := ReadmeHTML(ctx, mi, readme) + hgot, err := LegacyReadmeHTML(ctx, mi, readme) if err != nil { t.Fatal(err) } got := strings.TrimSpace(hgot.String()) if diff := cmp.Diff(want, got); diff != "" { - t.Errorf("readmeHTML(%v) mismatch (-want +got):\n%s", mi, diff) + t.Errorf("LegacyReadmeHTML(%v) mismatch (-want +got):\n%s", mi, diff) } } for _, test := range tests { diff --git a/internal/frontend/readme.go b/internal/frontend/readme.go new file mode 100644 index 000000000..24fe17f9d --- /dev/null +++ b/internal/frontend/readme.go @@ -0,0 +1,113 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package frontend + +import ( + "bytes" + "context" + + "github.com/google/safehtml" + "github.com/google/safehtml/template" + "github.com/google/safehtml/uncheckedconversions" + "github.com/microcosm-cc/bluemonday" + "github.com/yuin/goldmark" + emoji "github.com/yuin/goldmark-emoji" + "github.com/yuin/goldmark/extension" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/renderer" + goldmarkHtml "github.com/yuin/goldmark/renderer/html" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" + "golang.org/x/pkgsite/internal" + "golang.org/x/pkgsite/internal/derrors" +) + +// ReadmeHTML sanitizes readmeContents based on bluemondy.UGCPolicy and returns +// a safehtml.HTML. If readmeFilePath indicates that this is a markdown file, +// it will also render the markdown contents using goldmark. +// +// This function is exported for use in an external tool that uses this package to +// compare readme files to see how changes in processing will affect them. +func ReadmeHTML(ctx context.Context, mi *internal.ModuleInfo, readme *internal.Readme) (_ safehtml.HTML, err error) { + defer derrors.Wrap(&err, "ReadmeHTML(%s@%s)", mi.ModulePath, mi.Version) + if readme == nil || readme.Contents == "" { + return safehtml.HTML{}, nil + } + if !isMarkdown(readme.Filepath) { + t := template.Must(template.New("").Parse(`
{{.}}
`)) + h, err := t.ExecuteToHTML(readme.Contents) + if err != nil { + return safehtml.HTML{}, err + } + return h, nil + } + + // Sets priority value so that we always use our custom transformer + // instead of the default ones. The default values are in: + // https://github.com/yuin/goldmark/blob/7b90f04af43131db79ec320be0bd4744079b346f/parser/parser.go#L567 + const ASTTransformerPriority = 10000 + gdMarkdown := goldmark.New( + goldmark.WithParserOptions( + // WithHeadingAttribute allows us to include other attributes in + // heading tags. This is useful for our aria-level implementation of + // increasing heading rankings. + parser.WithHeadingAttribute(), + // Generates an id in every heading tag. This is used in github in + // order to generate a link with a hash that a user would scroll to + //

goldmark

=> github.com/yuin/goldmark#goldmark + parser.WithAutoHeadingID(), + // Include custom ASTTransformer using the readme and module info to + // use translateRelativeLink and translateHTML to modify the AST + // before it is rendered. + parser.WithASTTransformers(util.Prioritized(&ASTTransformer{ + info: mi.SourceInfo, + readme: readme, + }, ASTTransformerPriority)), + ), + // These extensions lets users write HTML code in the README. This is + // fine since we process the contents using bluemonday after. + goldmark.WithRendererOptions(goldmarkHtml.WithUnsafe(), goldmarkHtml.WithXHTML()), + goldmark.WithExtensions( + extension.GFM, // Support Github Flavored Markdown. + emoji.Emoji, // Support Github markdown emoji markup. + ), + ) + gdMarkdown.Renderer().AddOptions( + renderer.WithNodeRenderers( + util.Prioritized(NewHTMLRenderer(mi.SourceInfo, readme), 100), + ), + ) + + var b bytes.Buffer + contents := []byte(readme.Contents) + gdRenderer := gdMarkdown.Renderer() + gdParser := gdMarkdown.Parser() + + reader := text.NewReader(contents) + doc := gdParser.Parse(reader) + + if err := gdRenderer.Render(&b, contents, doc); err != nil { + return safehtml.HTML{}, nil + } + return sanitizeGoldmarkHTML(&b), nil +} + +// sanitizeGoldmarkHTML sanitizes HTML from a bytes.Buffer so that it is safe. +func sanitizeGoldmarkHTML(b *bytes.Buffer) safehtml.HTML { + p := bluemonday.UGCPolicy() + + p.AllowAttrs("width", "align").OnElements("img") + p.AllowAttrs("width", "align").OnElements("div") + p.AllowAttrs("width", "align").OnElements("p") + // Allow accessible headings (i.e
). + p.AllowAttrs("width", "align", "role", "aria-level").OnElements("div") + for _, h := range []string{"h1", "h2", "h3", "h4", "h5", "h6"} { + // Needed to preserve github styles heading font-sizes + p.AllowAttrs("class").OnElements(h) + } + + s := string(p.SanitizeBytes(b.Bytes())) + return uncheckedconversions.HTMLFromStringKnownToSatisfyTypeContract(s) +} diff --git a/internal/frontend/readme_test.go b/internal/frontend/readme_test.go new file mode 100644 index 000000000..236b1ce8a --- /dev/null +++ b/internal/frontend/readme_test.go @@ -0,0 +1,61 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package frontend + +import ( + "context" + "strings" + "testing" + + "github.com/google/go-cmp/cmp" + "golang.org/x/pkgsite/internal" + "golang.org/x/pkgsite/internal/experiment" + "golang.org/x/pkgsite/internal/source" + "golang.org/x/pkgsite/internal/testing/sample" +) + +func TestGoldmarkReadmeHTML(t *testing.T) { + ctx := experiment.NewContext(context.Background(), internal.ExperimentGoldmark) + mod := &internal.ModuleInfo{ + Version: sample.VersionString, + SourceInfo: source.NewGitHubInfo(sample.ModulePath, "", sample.VersionString), + } + for _, tc := range []struct { + name string + mi *internal.ModuleInfo + readme *internal.Readme + want string + }{ + { + name: "Top level heading is h3 from ####, and following header levels become hN-1", + mi: mod, + readme: &internal.Readme{ + Filepath: sample.ReadmeFilePath, + Contents: "#### Heading Rank 4\n\n##### Heading Rank 5", + }, + want: "

Heading Rank 4

\n

Heading Rank 5

", + }, + { + name: "Github markdown emoji markup is properly rendered", + mi: mod, + readme: &internal.Readme{ + Filepath: sample.ReadmeFilePath, + Contents: "# :zap: Zap \n\n :joy:", + }, + want: "

⚡ Zap

\n

😂

", + }, + } { + t.Run(tc.name, func(t *testing.T) { + hgot, err := ReadmeHTML(ctx, tc.mi, tc.readme) + if err != nil { + t.Fatal(err) + } + got := strings.TrimSpace(hgot.String()) + if diff := cmp.Diff(tc.want, got); diff != "" { + t.Errorf("ReadmeHTML(%v) mismatch (-want +got):\n%s", tc.mi, diff) + } + }) + } +} diff --git a/internal/frontend/unit_main.go b/internal/frontend/unit_main.go index 16c4065d6..73313fc04 100644 --- a/internal/frontend/unit_main.go +++ b/internal/frontend/unit_main.go @@ -170,18 +170,22 @@ func moduleInfo(um *internal.UnitMeta) *internal.ModuleInfo { } // readmeContent renders the readme to html. -func readmeContent(ctx context.Context, um *internal.UnitMeta, readme *internal.Readme) (safehtml.HTML, error) { +func readmeContent(ctx context.Context, um *internal.UnitMeta, readme *internal.Readme) (_ safehtml.HTML, err error) { defer middleware.ElapsedStat(ctx, "readmeContent")() - - if um.IsRedistributable && readme != nil { - mi := moduleInfo(um) - readme, err := ReadmeHTML(ctx, mi, readme) - if err != nil { - return safehtml.HTML{}, err - } - return readme, nil + if !um.IsRedistributable || readme == nil { + return safehtml.HTML{}, nil + } + mi := moduleInfo(um) + var readmeHTML safehtml.HTML + if experiment.IsActive(ctx, internal.ExperimentGoldmark) { + readmeHTML, err = ReadmeHTML(ctx, mi, readme) + } else { + readmeHTML, err = LegacyReadmeHTML(ctx, mi, readme) + } + if err != nil { + return safehtml.HTML{}, err } - return safehtml.HTML{}, nil + return readmeHTML, nil } func getNestedModules(ctx context.Context, ds internal.DataSource, um *internal.UnitMeta) ([]*NestedModule, error) {