Skip to content

Commit

Permalink
cmd/go: avoid compiling most regexes at init
Browse files Browse the repository at this point in the history
These regexes are all related to commands like get and build, so they're
unnecessary for simpler commands like env. In particular, we need env to
be fast, since libraries like go/packages call it early and often. Some
external Go tools are interactive, so milliseconds matter.

lazyregexp eagerly compiles the patterns when running from within a test
binary, so there's no longer any need to do that as part of non-test
binaries.

Picking up the low-hanging fruit spotted by 'perf record' shaves off
well over a full millisecond off the benchmark on my laptop:

name         old time/op    new time/op    delta
ExecGoEnv-8    4.92ms ± 1%    3.81ms ± 0%  -22.52%  (p=0.004 n=6+5)

This CL required adding a few more methods to the lazy regexp wrapper.

Updates #29382.

Change-Id: I22417ab6258f7437a2feea0d25ceb2bb4d735a15
Reviewed-on: https://go-review.googlesource.com/c/155540
Run-TryBot: Daniel Martí <mvdan@mvdan.cc>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Bryan C. Mills <bcmills@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
  • Loading branch information
mvdan committed Feb 27, 2019
1 parent 694ee61 commit df557fe
Show file tree
Hide file tree
Showing 10 changed files with 67 additions and 51 deletions.
50 changes: 22 additions & 28 deletions src/cmd/go/internal/get/vcs.go
Expand Up @@ -8,6 +8,7 @@ import (
"encoding/json"
"errors"
"fmt"
"internal/lazyregexp"
"internal/singleflight"
"log"
"net/url"
Expand Down Expand Up @@ -170,7 +171,7 @@ var vcsGit = &vcsCmd{

// scpSyntaxRe matches the SCP-like addresses used by Git to access
// repositories by SSH.
var scpSyntaxRe = regexp.MustCompile(`^([a-zA-Z0-9_]+)@([a-zA-Z0-9._-]+):(.*)$`)
var scpSyntaxRe = lazyregexp.New(`^([a-zA-Z0-9_]+)@([a-zA-Z0-9._-]+):(.*)$`)

func gitRemoteRepo(vcsGit *vcsCmd, rootDir string) (remoteRepo string, err error) {
cmd := "config remote.origin.url"
Expand Down Expand Up @@ -525,13 +526,11 @@ func (v *vcsCmd) tagSync(dir, tag string) error {
// version control system and repository name.
type vcsPath struct {
prefix string // prefix this description applies to
re string // pattern for import path
regexp *lazyregexp.Regexp // compiled pattern for import path
repo string // repository to use (expand with match of re)
vcs string // version control system to use (expand with match of re)
check func(match map[string]string) error // additional checks
ping bool // ping for scheme to use to download repo

regexp *regexp.Regexp // cached compiled form of re
}

// vcsFromDir inspects dir and its parents to determine the
Expand Down Expand Up @@ -632,7 +631,14 @@ type RepoRoot struct {
vcs *vcsCmd // internal: vcs command access
}

var httpPrefixRE = regexp.MustCompile(`^https?:`)
func httpPrefix(s string) string {
for _, prefix := range [...]string{"http:", "https:"} {
if strings.HasPrefix(s, prefix) {
return prefix
}
}
return ""
}

// ModuleMode specifies whether to prefer modules when looking up code sources.
type ModuleMode int
Expand Down Expand Up @@ -677,10 +683,10 @@ var errUnknownSite = errors.New("dynamic lookup required to find mapping")
func repoRootFromVCSPaths(importPath, scheme string, security web.SecurityMode, vcsPaths []*vcsPath) (*RepoRoot, error) {
// A common error is to use https://packagepath because that's what
// hg and git require. Diagnose this helpfully.
if loc := httpPrefixRE.FindStringIndex(importPath); loc != nil {
if prefix := httpPrefix(importPath); prefix != "" {
// The importPath has been cleaned, so has only one slash. The pattern
// ignores the slashes; the error message puts them back on the RHS at least.
return nil, fmt.Errorf("%q not allowed in import path", importPath[loc[0]:loc[1]]+"//")
return nil, fmt.Errorf("%q not allowed in import path", prefix+"//")
}
for _, srv := range vcsPaths {
if !strings.HasPrefix(importPath, srv.prefix) {
Expand Down Expand Up @@ -975,7 +981,7 @@ var vcsPaths = []*vcsPath{
// Github
{
prefix: "github.com/",
re: `^(?P<root>github\.com/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)(/[\p{L}0-9_.\-]+)*$`,
regexp: lazyregexp.New(`^(?P<root>github\.com/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)(/[\p{L}0-9_.\-]+)*$`),
vcs: "git",
repo: "https://{root}",
check: noVCSSuffix,
Expand All @@ -984,15 +990,15 @@ var vcsPaths = []*vcsPath{
// Bitbucket
{
prefix: "bitbucket.org/",
re: `^(?P<root>bitbucket\.org/(?P<bitname>[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`,
regexp: lazyregexp.New(`^(?P<root>bitbucket\.org/(?P<bitname>[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`),
repo: "https://{root}",
check: bitbucketVCS,
},

// IBM DevOps Services (JazzHub)
{
prefix: "hub.jazz.net/git/",
re: `^(?P<root>hub\.jazz\.net/git/[a-z0-9]+/[A-Za-z0-9_.\-]+)(/[A-Za-z0-9_.\-]+)*$`,
regexp: lazyregexp.New(`^(?P<root>hub\.jazz\.net/git/[a-z0-9]+/[A-Za-z0-9_.\-]+)(/[A-Za-z0-9_.\-]+)*$`),
vcs: "git",
repo: "https://{root}",
check: noVCSSuffix,
Expand All @@ -1001,32 +1007,32 @@ var vcsPaths = []*vcsPath{
// Git at Apache
{
prefix: "git.apache.org/",
re: `^(?P<root>git\.apache\.org/[a-z0-9_.\-]+\.git)(/[A-Za-z0-9_.\-]+)*$`,
regexp: lazyregexp.New(`^(?P<root>git\.apache\.org/[a-z0-9_.\-]+\.git)(/[A-Za-z0-9_.\-]+)*$`),
vcs: "git",
repo: "https://{root}",
},

// Git at OpenStack
{
prefix: "git.openstack.org/",
re: `^(?P<root>git\.openstack\.org/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)(\.git)?(/[A-Za-z0-9_.\-]+)*$`,
regexp: lazyregexp.New(`^(?P<root>git\.openstack\.org/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)(\.git)?(/[A-Za-z0-9_.\-]+)*$`),
vcs: "git",
repo: "https://{root}",
},

// chiselapp.com for fossil
{
prefix: "chiselapp.com/",
re: `^(?P<root>chiselapp\.com/user/[A-Za-z0-9]+/repository/[A-Za-z0-9_.\-]+)$`,
regexp: lazyregexp.New(`^(?P<root>chiselapp\.com/user/[A-Za-z0-9]+/repository/[A-Za-z0-9_.\-]+)$`),
vcs: "fossil",
repo: "https://{root}",
},

// General syntax for any server.
// Must be last.
{
re: `^(?P<root>(?P<repo>([a-z0-9.\-]+\.)+[a-z0-9.\-]+(:[0-9]+)?(/~?[A-Za-z0-9_.\-]+)+?)\.(?P<vcs>bzr|fossil|git|hg|svn))(/~?[A-Za-z0-9_.\-]+)*$`,
ping: true,
regexp: lazyregexp.New(`(?P<root>(?P<repo>([a-z0-9.\-]+\.)+[a-z0-9.\-]+(:[0-9]+)?(/~?[A-Za-z0-9_.\-]+)+?)\.(?P<vcs>bzr|fossil|git|hg|svn))(/~?[A-Za-z0-9_.\-]+)*$`),
ping: true,
},
}

Expand All @@ -1038,25 +1044,13 @@ var vcsPathsAfterDynamic = []*vcsPath{
// Launchpad. See golang.org/issue/11436.
{
prefix: "launchpad.net/",
re: `^(?P<root>launchpad\.net/((?P<project>[A-Za-z0-9_.\-]+)(?P<series>/[A-Za-z0-9_.\-]+)?|~[A-Za-z0-9_.\-]+/(\+junk|[A-Za-z0-9_.\-]+)/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`,
regexp: lazyregexp.New(`^(?P<root>launchpad\.net/((?P<project>[A-Za-z0-9_.\-]+)(?P<series>/[A-Za-z0-9_.\-]+)?|~[A-Za-z0-9_.\-]+/(\+junk|[A-Za-z0-9_.\-]+)/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`),
vcs: "bzr",
repo: "https://{root}",
check: launchpadVCS,
},
}

func init() {
// fill in cached regexps.
// Doing this eagerly discovers invalid regexp syntax
// without having to run a command that needs that regexp.
for _, srv := range vcsPaths {
srv.regexp = regexp.MustCompile(srv.re)
}
for _, srv := range vcsPathsAfterDynamic {
srv.regexp = regexp.MustCompile(srv.re)
}
}

// noVCSSuffix checks that the repository name does not
// end in .foo for any version control system foo.
// The usual culprit is ".git".
Expand Down
10 changes: 5 additions & 5 deletions src/cmd/go/internal/modfetch/codehost/vcs.go
Expand Up @@ -7,11 +7,11 @@ package codehost
import (
"encoding/xml"
"fmt"
"internal/lazyregexp"
"io"
"io/ioutil"
"os"
"path/filepath"
"regexp"
"sort"
"strconv"
"strings"
Expand Down Expand Up @@ -124,10 +124,10 @@ type vcsCmd struct {
vcs string // vcs name "hg"
init func(remote string) []string // cmd to init repo to track remote
tags func(remote string) []string // cmd to list local tags
tagRE *regexp.Regexp // regexp to extract tag names from output of tags cmd
tagRE *lazyregexp.Regexp // regexp to extract tag names from output of tags cmd
branches func(remote string) []string // cmd to list local branches
branchRE *regexp.Regexp // regexp to extract branch names from output of tags cmd
badLocalRevRE *regexp.Regexp // regexp of names that must not be served out of local cache without doing fetch first
branchRE *lazyregexp.Regexp // regexp to extract branch names from output of tags cmd
badLocalRevRE *lazyregexp.Regexp // regexp of names that must not be served out of local cache without doing fetch first
statLocal func(rev, remote string) []string // cmd to stat local rev
parseStat func(rev, out string) (*RevInfo, error) // cmd to parse output of statLocal
fetch []string // cmd to fetch everything from remote
Expand All @@ -136,7 +136,7 @@ type vcsCmd struct {
readZip func(rev, subdir, remote, target string) []string // cmd to read rev's subdir as zip file
}

var re = regexp.MustCompile
var re = lazyregexp.New

var vcsCmds = map[string]*vcsCmd{
"hg": {
Expand Down
4 changes: 2 additions & 2 deletions src/cmd/go/internal/modfetch/pseudo.go
Expand Up @@ -37,7 +37,7 @@ package modfetch
import (
"cmd/go/internal/semver"
"fmt"
"regexp"
"internal/lazyregexp"
"strings"
"time"
)
Expand Down Expand Up @@ -86,7 +86,7 @@ func PseudoVersion(major, older string, t time.Time, rev string) string {
return v + patch + "-0." + segment + build
}

var pseudoVersionRE = regexp.MustCompile(`^v[0-9]+\.(0\.0-|\d+\.\d+-([^+]*\.)?0\.)\d{14}-[A-Za-z0-9]+(\+incompatible)?$`)
var pseudoVersionRE = lazyregexp.New(`^v[0-9]+\.(0\.0-|\d+\.\d+-([^+]*\.)?0\.)\d{14}-[A-Za-z0-9]+(\+incompatible)?$`)

// IsPseudoVersion reports whether v is a pseudo-version.
func IsPseudoVersion(v string) bool {
Expand Down
4 changes: 2 additions & 2 deletions src/cmd/go/internal/modfile/rule.go
Expand Up @@ -8,8 +8,8 @@ import (
"bytes"
"errors"
"fmt"
"internal/lazyregexp"
"path/filepath"
"regexp"
"sort"
"strconv"
"strings"
Expand Down Expand Up @@ -154,7 +154,7 @@ func parseToFile(file string, data []byte, fix VersionFixer, strict bool) (*File
return f, nil
}

var GoVersionRE = regexp.MustCompile(`([1-9][0-9]*)\.(0|[1-9][0-9]*)`)
var GoVersionRE = lazyregexp.New(`([1-9][0-9]*)\.(0|[1-9][0-9]*)`)

func (f *File) add(errs *bytes.Buffer, line *Line, verb string, args []string, fix VersionFixer, strict bool) {
// If strict is false, this module is a dependency.
Expand Down
6 changes: 3 additions & 3 deletions src/cmd/go/internal/modload/init.go
Expand Up @@ -21,11 +21,11 @@ import (
"encoding/json"
"fmt"
"go/build"
"internal/lazyregexp"
"io/ioutil"
"os"
"path"
"path/filepath"
"regexp"
"runtime/debug"
"strconv"
"strings"
Expand Down Expand Up @@ -569,8 +569,8 @@ func findModulePath(dir string) (string, error) {
}

var (
gitOriginRE = regexp.MustCompile(`(?m)^\[remote "origin"\]\r?\n\turl = (?:https://github.com/|git@github.com:|gh:)([^/]+/[^/]+?)(\.git)?\r?\n`)
importCommentRE = regexp.MustCompile(`(?m)^package[ \t]+[^ \t\r\n/]+[ \t]+//[ \t]+import[ \t]+(\"[^"]+\")[ \t]*\r?\n`)
gitOriginRE = lazyregexp.New(`(?m)^\[remote "origin"\]\r?\n\turl = (?:https://github.com/|git@github.com:|gh:)([^/]+/[^/]+?)(\.git)?\r?\n`)
importCommentRE = lazyregexp.New(`(?m)^package[ \t]+[^ \t\r\n/]+[ \t]+//[ \t]+import[ \t]+(\"[^"]+\")[ \t]*\r?\n`)
)

func findImportComment(file string) string {
Expand Down
7 changes: 4 additions & 3 deletions src/cmd/go/internal/work/exec.go
Expand Up @@ -11,6 +11,7 @@ import (
"encoding/json"
"errors"
"fmt"
"internal/lazyregexp"
"io"
"io/ioutil"
"log"
Expand Down Expand Up @@ -1838,8 +1839,8 @@ func (b *Builder) showOutput(a *Action, dir, desc, out string) {
// print this error.
var errPrintedOutput = errors.New("already printed output - no need to show error")

var cgoLine = regexp.MustCompile(`\[[^\[\]]+\.(cgo1|cover)\.go:[0-9]+(:[0-9]+)?\]`)
var cgoTypeSigRe = regexp.MustCompile(`\b_C2?(type|func|var|macro)_\B`)
var cgoLine = lazyregexp.New(`\[[^\[\]]+\.(cgo1|cover)\.go:[0-9]+(:[0-9]+)?\]`)
var cgoTypeSigRe = lazyregexp.New(`\b_C2?(type|func|var|macro)_\B`)

// run runs the command given by cmdline in the directory dir.
// If the command fails, run prints information about the failure
Expand Down Expand Up @@ -2412,7 +2413,7 @@ func buildFlags(name, defaults string, fromPackage []string, check func(string,
return str.StringList(envList("CGO_"+name, defaults), fromPackage), nil
}

var cgoRe = regexp.MustCompile(`[/\\:]`)
var cgoRe = lazyregexp.New(`[/\\:]`)

func (b *Builder) cgo(a *Action, cgoExe, objdir string, pcCFLAGS, pcLDFLAGS, cgofiles, gccfiles, gxxfiles, mfiles, ffiles []string) (outGo, outObj []string, err error) {
p := a.Package
Expand Down
9 changes: 5 additions & 4 deletions src/cmd/go/internal/work/security.go
Expand Up @@ -32,14 +32,15 @@ package work
import (
"cmd/go/internal/load"
"fmt"
"internal/lazyregexp"
"os"
"regexp"
"strings"
)

var re = regexp.MustCompile
var re = lazyregexp.New

var validCompilerFlags = []*regexp.Regexp{
var validCompilerFlags = []*lazyregexp.Regexp{
re(`-D([A-Za-z_].*)`),
re(`-F([^@\-].*)`),
re(`-I([^@\-].*)`),
Expand Down Expand Up @@ -130,7 +131,7 @@ var validCompilerFlagsWithNextArg = []string{
"-x",
}

var validLinkerFlags = []*regexp.Regexp{
var validLinkerFlags = []*lazyregexp.Regexp{
re(`-F([^@\-].*)`),
re(`-l([^@\-].*)`),
re(`-L([^@\-].*)`),
Expand Down Expand Up @@ -217,7 +218,7 @@ func checkLinkerFlags(name, source string, list []string) error {
return checkFlags(name, source, list, validLinkerFlags, validLinkerFlagsWithNextArg)
}

func checkFlags(name, source string, list []string, valid []*regexp.Regexp, validNext []string) error {
func checkFlags(name, source string, list []string, valid []*lazyregexp.Regexp, validNext []string) error {
// Let users override rules with $CGO_CFLAGS_ALLOW, $CGO_CFLAGS_DISALLOW, etc.
var (
allow *regexp.Regexp
Expand Down
4 changes: 2 additions & 2 deletions src/go/doc/example.go
Expand Up @@ -9,8 +9,8 @@ package doc
import (
"go/ast"
"go/token"
"internal/lazyregexp"
"path"
"regexp"
"sort"
"strconv"
"strings"
Expand Down Expand Up @@ -104,7 +104,7 @@ func Examples(files ...*ast.File) []*Example {
return list
}

var outputPrefix = regexp.MustCompile(`(?i)^[[:space:]]*(unordered )?output:`)
var outputPrefix = lazyregexp.New(`(?i)^[[:space:]]*(unordered )?output:`)

// Extracts the expected output and whether there was a valid output comment
func exampleOutput(b *ast.BlockStmt, comments []*ast.CommentGroup) (output string, unordered, ok bool) {
Expand Down
4 changes: 2 additions & 2 deletions src/go/doc/headscan.go
Expand Up @@ -22,9 +22,9 @@ import (
"go/doc"
"go/parser"
"go/token"
"internal/lazyregexp"
"os"
"path/filepath"
"regexp"
"runtime"
"strings"
)
Expand All @@ -35,7 +35,7 @@ var (
)

// ToHTML in comment.go assigns a (possibly blank) ID to each heading
var html_h = regexp.MustCompile(`<h3 id="[^"]*">`)
var html_h = lazyregexp.New(`<h3 id="[^"]*">`)

const html_endh = "</h3>\n"

Expand Down
20 changes: 20 additions & 0 deletions src/internal/lazyregexp/lazyre.go
Expand Up @@ -27,6 +27,14 @@ func (r *Regexp) build() {
r.str = ""
}

func (r *Regexp) FindSubmatch(s []byte) [][]byte {
return r.re().FindSubmatch(s)
}

func (r *Regexp) FindStringSubmatch(s string) []string {
return r.re().FindStringSubmatch(s)
}

func (r *Regexp) FindStringSubmatchIndex(s string) []int {
return r.re().FindStringSubmatchIndex(s)
}
Expand All @@ -35,10 +43,22 @@ func (r *Regexp) ReplaceAllString(src, repl string) string {
return r.re().ReplaceAllString(src, repl)
}

func (r *Regexp) FindString(s string) string {
return r.re().FindString(s)
}

func (r *Regexp) FindAllString(s string, n int) []string {
return r.re().FindAllString(s, n)
}

func (r *Regexp) MatchString(s string) bool {
return r.re().MatchString(s)
}

func (r *Regexp) SubexpNames() []string {
return r.re().SubexpNames()
}

var inTest = len(os.Args) > 0 && strings.HasSuffix(strings.TrimSuffix(os.Args[0], ".exe"), ".test")

func New(str string) *Regexp {
Expand Down

0 comments on commit df557fe

Please sign in to comment.