Skip to content

Commit

Permalink
internal/frontend: separate goldmark readme code from legacy overview…
Browse files Browse the repository at this point in the history
… code

The overview file contains mostly legacy code used
to construct the overview page. This change separates
the goldmark code in preparation for updates that will
generate a TOC for the readme and the future removal of
all overview related code.

For golang/go#39297

Change-Id: Ifa5d0ee3983478fd25c6c59fc1bd2c45457cc05c
Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/267117
Run-TryBot: Jamal Carvalho <jamal@golang.org>
TryBot-Result: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jonathan Amsterdam <jba@google.com>
Trust: Jamal Carvalho <jamal@golang.org>
  • Loading branch information
jamalc committed Nov 3, 2020
1 parent 8ac0e15 commit ddd234a
Show file tree
Hide file tree
Showing 5 changed files with 196 additions and 138 deletions.
88 changes: 6 additions & 82 deletions internal/frontend/overview.go
Expand Up @@ -19,14 +19,6 @@ import (
"github.com/google/safehtml/uncheckedconversions"
"github.com/microcosm-cc/bluemonday"
"github.com/russross/blackfriday/v2"
"github.com/yuin/goldmark"
emoji "github.com/yuin/goldmark-emoji"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/renderer"
goldmarkHtml "github.com/yuin/goldmark/renderer/html"
"github.com/yuin/goldmark/text"
"github.com/yuin/goldmark/util"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
"golang.org/x/pkgsite/internal"
Expand Down Expand Up @@ -85,7 +77,7 @@ func constructOverviewDetails(ctx context.Context, mi *internal.ModuleInfo, read
}
if overview.Redistributable && readme != nil {
overview.ReadMeSource = fileSource(mi.ModulePath, mi.Version, readme.Filepath)
r, err := ReadmeHTML(ctx, mi, readme)
r, err := LegacyReadmeHTML(ctx, mi, readme)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -145,13 +137,14 @@ func blackfridayReadmeHTML(ctx context.Context, readme *internal.Readme, mi *int
return sanitizeHTML(b), nil
}

// ReadmeHTML sanitizes readmeContents based on bluemondy.UGCPolicy and returns
// LegacyReadmeHTML sanitizes readmeContents based on bluemondy.UGCPolicy and returns
// a safehtml.HTML. If readmeFilePath indicates that this is a markdown file,
// it will also render the markdown contents using blackfriday.
//
// It is exported to support external testing.
func ReadmeHTML(ctx context.Context, mi *internal.ModuleInfo, readme *internal.Readme) (_ safehtml.HTML, err error) {
defer derrors.Wrap(&err, "readmeHTML(%s@%s)", mi.ModulePath, mi.Version)
// This function is exported for use in an external tool that uses this package to
// compare readme files to see how changes in processing will affect them.
func LegacyReadmeHTML(ctx context.Context, mi *internal.ModuleInfo, readme *internal.Readme) (_ safehtml.HTML, err error) {
defer derrors.Wrap(&err, "LegacyReadmeHTML(%s@%s)", mi.ModulePath, mi.Version)
if readme == nil || readme.Contents == "" {
return safehtml.HTML{}, nil
}
Expand All @@ -164,78 +157,9 @@ func ReadmeHTML(ctx context.Context, mi *internal.ModuleInfo, readme *internal.R
return h, nil
}

if experiment.IsActive(ctx, internal.ExperimentGoldmark) {
// Sets priority value so that we always use our custom transformer
// instead of the default ones. The default values are in:
// https://github.com/yuin/goldmark/blob/7b90f04af43131db79ec320be0bd4744079b346f/parser/parser.go#L567
const ASTTransformerPriority = 10000
gdMarkdown := goldmark.New(
goldmark.WithParserOptions(
// WithHeadingAttribute allows us to include other attributes in
// heading tags. This is useful for our aria-level implementation of
// increasing heading rankings.
parser.WithHeadingAttribute(),
// Generates an id in every heading tag. This is used in github in
// order to generate a link with a hash that a user would scroll to
// <h1 id="goldmark">goldmark</h1> => github.com/yuin/goldmark#goldmark
parser.WithAutoHeadingID(),
// Include custom ASTTransformer using the readme and module info to
// use translateRelativeLink and translateHTML to modify the AST
// before it is rendered.
parser.WithASTTransformers(util.Prioritized(&ASTTransformer{
info: mi.SourceInfo,
readme: readme,
}, ASTTransformerPriority)),
),
// These extensions lets users write HTML code in the README. This is
// fine since we process the contents using bluemonday after.
goldmark.WithRendererOptions(goldmarkHtml.WithUnsafe(), goldmarkHtml.WithXHTML()),
goldmark.WithExtensions(
extension.GFM, // Support Github Flavored Markdown.
emoji.Emoji, // Support Github markdown emoji markup.
),
)
gdMarkdown.Renderer().AddOptions(
renderer.WithNodeRenderers(
util.Prioritized(NewHTMLRenderer(mi.SourceInfo, readme), 100),
),
)

var b bytes.Buffer
contents := []byte(readme.Contents)
gdRenderer := gdMarkdown.Renderer()
gdParser := gdMarkdown.Parser()

reader := text.NewReader(contents)
doc := gdParser.Parse(reader)

if err := gdRenderer.Render(&b, contents, doc); err != nil {
return safehtml.HTML{}, nil
}

return sanitizeGoldmarkHTML(&b), nil
}
return blackfridayReadmeHTML(ctx, readme, mi)
}

// sanitizeGoldmarkHTML sanitizes HTML from a bytes.Buffer so that it is safe.
func sanitizeGoldmarkHTML(b *bytes.Buffer) safehtml.HTML {
p := bluemonday.UGCPolicy()

p.AllowAttrs("width", "align").OnElements("img")
p.AllowAttrs("width", "align").OnElements("div")
p.AllowAttrs("width", "align").OnElements("p")
// Allow accessible headings (i.e <div role="heading" aria-level="7">).
p.AllowAttrs("width", "align", "role", "aria-level").OnElements("div")
for _, h := range []string{"h1", "h2", "h3", "h4", "h5", "h6"} {
// Needed to preserve github styles heading font-sizes
p.AllowAttrs("class").OnElements(h)
}

s := string(p.SanitizeBytes(b.Bytes()))
return uncheckedconversions.HTMLFromStringKnownToSatisfyTypeContract(s)
}

// sanitizeHTML reads HTML from r and sanitizes it to ensure it is safe.
func sanitizeHTML(r io.Reader) safehtml.HTML {
// bluemonday.UGCPolicy allows a broad selection of HTML elements and
Expand Down
48 changes: 2 additions & 46 deletions internal/frontend/overview_test.go
Expand Up @@ -177,50 +177,6 @@ func TestPackageOverviewDetails(t *testing.T) {
}
}

func TestGoldmarkReadmeHTML(t *testing.T) {
ctx := experiment.NewContext(context.Background(), internal.ExperimentGoldmark)
mod := &internal.ModuleInfo{
Version: sample.VersionString,
SourceInfo: source.NewGitHubInfo(sample.ModulePath, "", "v1.2.3"),
}
for _, tc := range []struct {
name string
mi *internal.ModuleInfo
readme *internal.Readme
want string
}{
{
name: "Top level heading is h3 from ####, and following header levels become hN-1",
mi: mod,
readme: &internal.Readme{
Filepath: sample.ReadmeFilePath,
Contents: "#### Heading Rank 4\n\n##### Heading Rank 5",
},
want: "<h3 class=\"h4\" id=\"heading-rank-4\">Heading Rank 4</h3>\n<h4 class=\"h5\" id=\"heading-rank-5\">Heading Rank 5</h4>",
},
{
name: "Github markdown emoji markup is properly rendered",
mi: mod,
readme: &internal.Readme{
Filepath: sample.ReadmeFilePath,
Contents: "# :zap: Zap \n\n :joy:",
},
want: "<h3 class=\"h1\" id=\"zap-zap\">⚡ Zap</h3>\n<p>😂</p>",
},
} {
t.Run(tc.name, func(t *testing.T) {
hgot, err := ReadmeHTML(ctx, tc.mi, tc.readme)
if err != nil {
t.Fatal(err)
}
got := strings.TrimSpace(hgot.String())
if diff := cmp.Diff(tc.want, got); diff != "" {
t.Errorf("readmeHTML(%v) mismatch (-want +got):\n%s", tc.mi, diff)
}
})
}
}

func TestBlackfridayReadmeHTML(t *testing.T) {
ctx := context.Background()
aModule := &internal.ModuleInfo{
Expand Down Expand Up @@ -411,13 +367,13 @@ func TestBlackfridayReadmeHTML(t *testing.T) {
},
}
checkReadme := func(ctx context.Context, t *testing.T, mi *internal.ModuleInfo, readme *internal.Readme, want string) {
hgot, err := ReadmeHTML(ctx, mi, readme)
hgot, err := LegacyReadmeHTML(ctx, mi, readme)
if err != nil {
t.Fatal(err)
}
got := strings.TrimSpace(hgot.String())
if diff := cmp.Diff(want, got); diff != "" {
t.Errorf("readmeHTML(%v) mismatch (-want +got):\n%s", mi, diff)
t.Errorf("LegacyReadmeHTML(%v) mismatch (-want +got):\n%s", mi, diff)
}
}
for _, test := range tests {
Expand Down
113 changes: 113 additions & 0 deletions internal/frontend/readme.go
@@ -0,0 +1,113 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package frontend

import (
"bytes"
"context"

"github.com/google/safehtml"
"github.com/google/safehtml/template"
"github.com/google/safehtml/uncheckedconversions"
"github.com/microcosm-cc/bluemonday"
"github.com/yuin/goldmark"
emoji "github.com/yuin/goldmark-emoji"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/renderer"
goldmarkHtml "github.com/yuin/goldmark/renderer/html"
"github.com/yuin/goldmark/text"
"github.com/yuin/goldmark/util"
"golang.org/x/pkgsite/internal"
"golang.org/x/pkgsite/internal/derrors"
)

// ReadmeHTML sanitizes readmeContents based on bluemondy.UGCPolicy and returns
// a safehtml.HTML. If readmeFilePath indicates that this is a markdown file,
// it will also render the markdown contents using goldmark.
//
// This function is exported for use in an external tool that uses this package to
// compare readme files to see how changes in processing will affect them.
func ReadmeHTML(ctx context.Context, mi *internal.ModuleInfo, readme *internal.Readme) (_ safehtml.HTML, err error) {
defer derrors.Wrap(&err, "ReadmeHTML(%s@%s)", mi.ModulePath, mi.Version)
if readme == nil || readme.Contents == "" {
return safehtml.HTML{}, nil
}
if !isMarkdown(readme.Filepath) {
t := template.Must(template.New("").Parse(`<pre class="readme">{{.}}</pre>`))
h, err := t.ExecuteToHTML(readme.Contents)
if err != nil {
return safehtml.HTML{}, err
}
return h, nil
}

// Sets priority value so that we always use our custom transformer
// instead of the default ones. The default values are in:
// https://github.com/yuin/goldmark/blob/7b90f04af43131db79ec320be0bd4744079b346f/parser/parser.go#L567
const ASTTransformerPriority = 10000
gdMarkdown := goldmark.New(
goldmark.WithParserOptions(
// WithHeadingAttribute allows us to include other attributes in
// heading tags. This is useful for our aria-level implementation of
// increasing heading rankings.
parser.WithHeadingAttribute(),
// Generates an id in every heading tag. This is used in github in
// order to generate a link with a hash that a user would scroll to
// <h1 id="goldmark">goldmark</h1> => github.com/yuin/goldmark#goldmark
parser.WithAutoHeadingID(),
// Include custom ASTTransformer using the readme and module info to
// use translateRelativeLink and translateHTML to modify the AST
// before it is rendered.
parser.WithASTTransformers(util.Prioritized(&ASTTransformer{
info: mi.SourceInfo,
readme: readme,
}, ASTTransformerPriority)),
),
// These extensions lets users write HTML code in the README. This is
// fine since we process the contents using bluemonday after.
goldmark.WithRendererOptions(goldmarkHtml.WithUnsafe(), goldmarkHtml.WithXHTML()),
goldmark.WithExtensions(
extension.GFM, // Support Github Flavored Markdown.
emoji.Emoji, // Support Github markdown emoji markup.
),
)
gdMarkdown.Renderer().AddOptions(
renderer.WithNodeRenderers(
util.Prioritized(NewHTMLRenderer(mi.SourceInfo, readme), 100),
),
)

var b bytes.Buffer
contents := []byte(readme.Contents)
gdRenderer := gdMarkdown.Renderer()
gdParser := gdMarkdown.Parser()

reader := text.NewReader(contents)
doc := gdParser.Parse(reader)

if err := gdRenderer.Render(&b, contents, doc); err != nil {
return safehtml.HTML{}, nil
}
return sanitizeGoldmarkHTML(&b), nil
}

// sanitizeGoldmarkHTML sanitizes HTML from a bytes.Buffer so that it is safe.
func sanitizeGoldmarkHTML(b *bytes.Buffer) safehtml.HTML {
p := bluemonday.UGCPolicy()

p.AllowAttrs("width", "align").OnElements("img")
p.AllowAttrs("width", "align").OnElements("div")
p.AllowAttrs("width", "align").OnElements("p")
// Allow accessible headings (i.e <div role="heading" aria-level="7">).
p.AllowAttrs("width", "align", "role", "aria-level").OnElements("div")
for _, h := range []string{"h1", "h2", "h3", "h4", "h5", "h6"} {
// Needed to preserve github styles heading font-sizes
p.AllowAttrs("class").OnElements(h)
}

s := string(p.SanitizeBytes(b.Bytes()))
return uncheckedconversions.HTMLFromStringKnownToSatisfyTypeContract(s)
}
61 changes: 61 additions & 0 deletions internal/frontend/readme_test.go
@@ -0,0 +1,61 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package frontend

import (
"context"
"strings"
"testing"

"github.com/google/go-cmp/cmp"
"golang.org/x/pkgsite/internal"
"golang.org/x/pkgsite/internal/experiment"
"golang.org/x/pkgsite/internal/source"
"golang.org/x/pkgsite/internal/testing/sample"
)

func TestGoldmarkReadmeHTML(t *testing.T) {
ctx := experiment.NewContext(context.Background(), internal.ExperimentGoldmark)
mod := &internal.ModuleInfo{
Version: sample.VersionString,
SourceInfo: source.NewGitHubInfo(sample.ModulePath, "", sample.VersionString),
}
for _, tc := range []struct {
name string
mi *internal.ModuleInfo
readme *internal.Readme
want string
}{
{
name: "Top level heading is h3 from ####, and following header levels become hN-1",
mi: mod,
readme: &internal.Readme{
Filepath: sample.ReadmeFilePath,
Contents: "#### Heading Rank 4\n\n##### Heading Rank 5",
},
want: "<h3 class=\"h4\" id=\"heading-rank-4\">Heading Rank 4</h3>\n<h4 class=\"h5\" id=\"heading-rank-5\">Heading Rank 5</h4>",
},
{
name: "Github markdown emoji markup is properly rendered",
mi: mod,
readme: &internal.Readme{
Filepath: sample.ReadmeFilePath,
Contents: "# :zap: Zap \n\n :joy:",
},
want: "<h3 class=\"h1\" id=\"zap-zap\">⚡ Zap</h3>\n<p>😂</p>",
},
} {
t.Run(tc.name, func(t *testing.T) {
hgot, err := ReadmeHTML(ctx, tc.mi, tc.readme)
if err != nil {
t.Fatal(err)
}
got := strings.TrimSpace(hgot.String())
if diff := cmp.Diff(tc.want, got); diff != "" {
t.Errorf("ReadmeHTML(%v) mismatch (-want +got):\n%s", tc.mi, diff)
}
})
}
}
24 changes: 14 additions & 10 deletions internal/frontend/unit_main.go
Expand Up @@ -170,18 +170,22 @@ func moduleInfo(um *internal.UnitMeta) *internal.ModuleInfo {
}

// readmeContent renders the readme to html.
func readmeContent(ctx context.Context, um *internal.UnitMeta, readme *internal.Readme) (safehtml.HTML, error) {
func readmeContent(ctx context.Context, um *internal.UnitMeta, readme *internal.Readme) (_ safehtml.HTML, err error) {
defer middleware.ElapsedStat(ctx, "readmeContent")()

if um.IsRedistributable && readme != nil {
mi := moduleInfo(um)
readme, err := ReadmeHTML(ctx, mi, readme)
if err != nil {
return safehtml.HTML{}, err
}
return readme, nil
if !um.IsRedistributable || readme == nil {
return safehtml.HTML{}, nil
}
mi := moduleInfo(um)
var readmeHTML safehtml.HTML
if experiment.IsActive(ctx, internal.ExperimentGoldmark) {
readmeHTML, err = ReadmeHTML(ctx, mi, readme)
} else {
readmeHTML, err = LegacyReadmeHTML(ctx, mi, readme)
}
if err != nil {
return safehtml.HTML{}, err
}
return safehtml.HTML{}, nil
return readmeHTML, nil
}

func getNestedModules(ctx context.Context, ds internal.DataSource, um *internal.UnitMeta) ([]*NestedModule, error) {
Expand Down

0 comments on commit ddd234a

Please sign in to comment.