Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
internal/frontend: separate goldmark readme code from legacy overview…
… code The overview file contains mostly legacy code used to construct the overview page. This change separates the goldmark code in preparation for updates that will generate a TOC for the readme and the future removal of all overview related code. For golang/go#39297 Change-Id: Ifa5d0ee3983478fd25c6c59fc1bd2c45457cc05c Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/267117 Run-TryBot: Jamal Carvalho <jamal@golang.org> TryBot-Result: kokoro <noreply+kokoro@google.com> Reviewed-by: Jonathan Amsterdam <jba@google.com> Trust: Jamal Carvalho <jamal@golang.org>
- Loading branch information
Showing
5 changed files
with
196 additions
and
138 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
// Copyright 2020 The Go Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style | ||
// license that can be found in the LICENSE file. | ||
|
||
package frontend | ||
|
||
import ( | ||
"bytes" | ||
"context" | ||
|
||
"github.com/google/safehtml" | ||
"github.com/google/safehtml/template" | ||
"github.com/google/safehtml/uncheckedconversions" | ||
"github.com/microcosm-cc/bluemonday" | ||
"github.com/yuin/goldmark" | ||
emoji "github.com/yuin/goldmark-emoji" | ||
"github.com/yuin/goldmark/extension" | ||
"github.com/yuin/goldmark/parser" | ||
"github.com/yuin/goldmark/renderer" | ||
goldmarkHtml "github.com/yuin/goldmark/renderer/html" | ||
"github.com/yuin/goldmark/text" | ||
"github.com/yuin/goldmark/util" | ||
"golang.org/x/pkgsite/internal" | ||
"golang.org/x/pkgsite/internal/derrors" | ||
) | ||
|
||
// ReadmeHTML sanitizes readmeContents based on bluemondy.UGCPolicy and returns | ||
// a safehtml.HTML. If readmeFilePath indicates that this is a markdown file, | ||
// it will also render the markdown contents using goldmark. | ||
// | ||
// This function is exported for use in an external tool that uses this package to | ||
// compare readme files to see how changes in processing will affect them. | ||
func ReadmeHTML(ctx context.Context, mi *internal.ModuleInfo, readme *internal.Readme) (_ safehtml.HTML, err error) { | ||
defer derrors.Wrap(&err, "ReadmeHTML(%s@%s)", mi.ModulePath, mi.Version) | ||
if readme == nil || readme.Contents == "" { | ||
return safehtml.HTML{}, nil | ||
} | ||
if !isMarkdown(readme.Filepath) { | ||
t := template.Must(template.New("").Parse(`<pre class="readme">{{.}}</pre>`)) | ||
h, err := t.ExecuteToHTML(readme.Contents) | ||
if err != nil { | ||
return safehtml.HTML{}, err | ||
} | ||
return h, nil | ||
} | ||
|
||
// Sets priority value so that we always use our custom transformer | ||
// instead of the default ones. The default values are in: | ||
// https://github.com/yuin/goldmark/blob/7b90f04af43131db79ec320be0bd4744079b346f/parser/parser.go#L567 | ||
const ASTTransformerPriority = 10000 | ||
gdMarkdown := goldmark.New( | ||
goldmark.WithParserOptions( | ||
// WithHeadingAttribute allows us to include other attributes in | ||
// heading tags. This is useful for our aria-level implementation of | ||
// increasing heading rankings. | ||
parser.WithHeadingAttribute(), | ||
// Generates an id in every heading tag. This is used in github in | ||
// order to generate a link with a hash that a user would scroll to | ||
// <h1 id="goldmark">goldmark</h1> => github.com/yuin/goldmark#goldmark | ||
parser.WithAutoHeadingID(), | ||
// Include custom ASTTransformer using the readme and module info to | ||
// use translateRelativeLink and translateHTML to modify the AST | ||
// before it is rendered. | ||
parser.WithASTTransformers(util.Prioritized(&ASTTransformer{ | ||
info: mi.SourceInfo, | ||
readme: readme, | ||
}, ASTTransformerPriority)), | ||
), | ||
// These extensions lets users write HTML code in the README. This is | ||
// fine since we process the contents using bluemonday after. | ||
goldmark.WithRendererOptions(goldmarkHtml.WithUnsafe(), goldmarkHtml.WithXHTML()), | ||
goldmark.WithExtensions( | ||
extension.GFM, // Support Github Flavored Markdown. | ||
emoji.Emoji, // Support Github markdown emoji markup. | ||
), | ||
) | ||
gdMarkdown.Renderer().AddOptions( | ||
renderer.WithNodeRenderers( | ||
util.Prioritized(NewHTMLRenderer(mi.SourceInfo, readme), 100), | ||
), | ||
) | ||
|
||
var b bytes.Buffer | ||
contents := []byte(readme.Contents) | ||
gdRenderer := gdMarkdown.Renderer() | ||
gdParser := gdMarkdown.Parser() | ||
|
||
reader := text.NewReader(contents) | ||
doc := gdParser.Parse(reader) | ||
|
||
if err := gdRenderer.Render(&b, contents, doc); err != nil { | ||
return safehtml.HTML{}, nil | ||
} | ||
return sanitizeGoldmarkHTML(&b), nil | ||
} | ||
|
||
// sanitizeGoldmarkHTML sanitizes HTML from a bytes.Buffer so that it is safe. | ||
func sanitizeGoldmarkHTML(b *bytes.Buffer) safehtml.HTML { | ||
p := bluemonday.UGCPolicy() | ||
|
||
p.AllowAttrs("width", "align").OnElements("img") | ||
p.AllowAttrs("width", "align").OnElements("div") | ||
p.AllowAttrs("width", "align").OnElements("p") | ||
// Allow accessible headings (i.e <div role="heading" aria-level="7">). | ||
p.AllowAttrs("width", "align", "role", "aria-level").OnElements("div") | ||
for _, h := range []string{"h1", "h2", "h3", "h4", "h5", "h6"} { | ||
// Needed to preserve github styles heading font-sizes | ||
p.AllowAttrs("class").OnElements(h) | ||
} | ||
|
||
s := string(p.SanitizeBytes(b.Bytes())) | ||
return uncheckedconversions.HTMLFromStringKnownToSatisfyTypeContract(s) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
// Copyright 2020 The Go Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style | ||
// license that can be found in the LICENSE file. | ||
|
||
package frontend | ||
|
||
import ( | ||
"context" | ||
"strings" | ||
"testing" | ||
|
||
"github.com/google/go-cmp/cmp" | ||
"golang.org/x/pkgsite/internal" | ||
"golang.org/x/pkgsite/internal/experiment" | ||
"golang.org/x/pkgsite/internal/source" | ||
"golang.org/x/pkgsite/internal/testing/sample" | ||
) | ||
|
||
func TestGoldmarkReadmeHTML(t *testing.T) { | ||
ctx := experiment.NewContext(context.Background(), internal.ExperimentGoldmark) | ||
mod := &internal.ModuleInfo{ | ||
Version: sample.VersionString, | ||
SourceInfo: source.NewGitHubInfo(sample.ModulePath, "", sample.VersionString), | ||
} | ||
for _, tc := range []struct { | ||
name string | ||
mi *internal.ModuleInfo | ||
readme *internal.Readme | ||
want string | ||
}{ | ||
{ | ||
name: "Top level heading is h3 from ####, and following header levels become hN-1", | ||
mi: mod, | ||
readme: &internal.Readme{ | ||
Filepath: sample.ReadmeFilePath, | ||
Contents: "#### Heading Rank 4\n\n##### Heading Rank 5", | ||
}, | ||
want: "<h3 class=\"h4\" id=\"heading-rank-4\">Heading Rank 4</h3>\n<h4 class=\"h5\" id=\"heading-rank-5\">Heading Rank 5</h4>", | ||
}, | ||
{ | ||
name: "Github markdown emoji markup is properly rendered", | ||
mi: mod, | ||
readme: &internal.Readme{ | ||
Filepath: sample.ReadmeFilePath, | ||
Contents: "# :zap: Zap \n\n :joy:", | ||
}, | ||
want: "<h3 class=\"h1\" id=\"zap-zap\">⚡ Zap</h3>\n<p>😂</p>", | ||
}, | ||
} { | ||
t.Run(tc.name, func(t *testing.T) { | ||
hgot, err := ReadmeHTML(ctx, tc.mi, tc.readme) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
got := strings.TrimSpace(hgot.String()) | ||
if diff := cmp.Diff(tc.want, got); diff != "" { | ||
t.Errorf("ReadmeHTML(%v) mismatch (-want +got):\n%s", tc.mi, diff) | ||
} | ||
}) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters