diff --git a/go/gcexportdata/gcexportdata.go b/go/gcexportdata/gcexportdata.go index ddc276cfbcb28..2ed25a750248e 100644 --- a/go/gcexportdata/gcexportdata.go +++ b/go/gcexportdata/gcexportdata.go @@ -116,13 +116,29 @@ func Read(in io.Reader, fset *token.FileSet, imports map[string]*types.Package, // The indexed export format starts with an 'i'; the older // binary export format starts with a 'c', 'd', or 'v' // (from "version"). Select appropriate importer. - if len(data) > 0 && data[0] == 'i' { - _, pkg, err := gcimporter.IImportData(fset, imports, data[1:], path) - return pkg, err - } + if len(data) > 0 { + switch data[0] { + case 'i': + _, pkg, err := gcimporter.IImportData(fset, imports, data[1:], path) + return pkg, err + + case 'v', 'c', 'd': + _, pkg, err := gcimporter.BImportData(fset, imports, data, path) + return pkg, err - _, pkg, err := gcimporter.BImportData(fset, imports, data, path) - return pkg, err + case 'u': + _, pkg, err := gcimporter.UImportData(fset, imports, data[1:], path) + return pkg, err + + default: + l := len(data) + if l > 10 { + l = 10 + } + return nil, fmt.Errorf("unexpected export data with prefix %q for path %s", string(data[:l]), path) + } + } + return nil, fmt.Errorf("empty export data for %s", path) } // Write writes encoded type information for the specified package to out. diff --git a/go/internal/gcimporter/gcimporter.go b/go/internal/gcimporter/gcimporter.go index 493bfa03b0f62..e96c39600d16b 100644 --- a/go/internal/gcimporter/gcimporter.go +++ b/go/internal/gcimporter/gcimporter.go @@ -181,8 +181,9 @@ func Import(packages map[string]*types.Package, path, srcDir string, lookup func defer rc.Close() var hdr string + var size int64 buf := bufio.NewReader(rc) - if hdr, _, err = FindExportData(buf); err != nil { + if hdr, size, err = FindExportData(buf); err != nil { return } @@ -210,10 +211,27 @@ func Import(packages map[string]*types.Package, path, srcDir string, lookup func // The indexed export format starts with an 'i'; the older // binary export format starts with a 'c', 'd', or 'v' // (from "version"). Select appropriate importer. - if len(data) > 0 && data[0] == 'i' { - _, pkg, err = IImportData(fset, packages, data[1:], id) - } else { - _, pkg, err = BImportData(fset, packages, data, id) + if len(data) > 0 { + switch data[0] { + case 'i': + _, pkg, err := IImportData(fset, packages, data[1:], id) + return pkg, err + + case 'v', 'c', 'd': + _, pkg, err := BImportData(fset, packages, data, id) + return pkg, err + + case 'u': + _, pkg, err := UImportData(fset, packages, data[1:size], id) + return pkg, err + + default: + l := len(data) + if l > 10 { + l = 10 + } + return nil, fmt.Errorf("unexpected export data with prefix %q for path %s", string(data[:l]), id) + } } default: diff --git a/go/internal/gcimporter/gcimporter_test.go b/go/internal/gcimporter/gcimporter_test.go index 4e992af76b3f7..66c09269d88f6 100644 --- a/go/internal/gcimporter/gcimporter_test.go +++ b/go/internal/gcimporter/gcimporter_test.go @@ -45,6 +45,10 @@ func needsCompiler(t *testing.T, compiler string) { // compile runs the compiler on filename, with dirname as the working directory, // and writes the output file to outdirname. func compile(t *testing.T, dirname, filename, outdirname string) string { + return compilePkg(t, dirname, filename, outdirname, "p") +} + +func compilePkg(t *testing.T, dirname, filename, outdirname, pkg string) string { testenv.NeedsGoBuild(t) // filename must end with ".go" @@ -53,12 +57,12 @@ func compile(t *testing.T, dirname, filename, outdirname string) string { } basename := filepath.Base(filename) outname := filepath.Join(outdirname, basename[:len(basename)-2]+"o") - cmd := exec.Command("go", "tool", "compile", "-p=p", "-o", outname, filename) + cmd := exec.Command("go", "tool", "compile", "-p="+pkg, "-o", outname, filename) cmd.Dir = dirname out, err := cmd.CombinedOutput() if err != nil { t.Logf("%s", out) - t.Fatalf("go tool compile %s failed: %s", filename, err) + t.Fatalf("(cd %v && %v) failed: %s", cmd.Dir, cmd, err) } return outname } @@ -140,7 +144,11 @@ func TestImportTestdata(t *testing.T) { // For now, we just test the presence of a few packages // that we know are there for sure. got := fmt.Sprint(pkg.Imports()) - for _, want := range []string{"go/ast", "go/token"} { + wants := []string{"go/ast", "go/token"} + if unifiedIR { + wants = []string{"go/ast"} + } + for _, want := range wants { if !strings.Contains(got, want) { t.Errorf(`Package("exports").Imports() = %s, does not contain %s`, got, want) } @@ -364,6 +372,14 @@ func verifyInterfaceMethodRecvs(t *testing.T, named *types.Named, level int) { return // not an interface } + // The unified IR importer always sets interface method receiver + // parameters to point to the Interface type, rather than the Named. + // See #49906. + var want types.Type = named + if unifiedIR { + want = iface + } + // check explicitly declared methods for i := 0; i < iface.NumExplicitMethods(); i++ { m := iface.ExplicitMethod(i) @@ -372,8 +388,8 @@ func verifyInterfaceMethodRecvs(t *testing.T, named *types.Named, level int) { t.Errorf("%s: missing receiver type", m) continue } - if recv.Type() != named { - t.Errorf("%s: got recv type %s; want %s", m, recv.Type(), named) + if recv.Type() != want { + t.Errorf("%s: got recv type %s; want %s", m, recv.Type(), want) } } @@ -451,7 +467,7 @@ func TestIssue13566(t *testing.T) { if err != nil { t.Fatal(err) } - compile(t, "testdata", "a.go", testoutdir) + compilePkg(t, "testdata", "a.go", testoutdir, apkg(testoutdir)) compile(t, testoutdir, bpath, testoutdir) // import must succeed (test for issue at hand) @@ -611,13 +627,22 @@ func TestIssue51836(t *testing.T) { if err != nil { t.Fatal(err) } - compile(t, dir, "a.go", testoutdir) + compilePkg(t, dir, "a.go", testoutdir, apkg(testoutdir)) compile(t, testoutdir, bpath, testoutdir) // import must succeed (test for issue at hand) _ = importPkg(t, "./testdata/aa", tmpdir) } +// apkg returns the package "a" prefixed by (as a package) testoutdir +func apkg(testoutdir string) string { + apkg := testoutdir + "/a" + if os.PathSeparator != '/' { + apkg = strings.ReplaceAll(apkg, string(os.PathSeparator), "/") + } + return apkg +} + func importPkg(t *testing.T, path, srcDir string) *types.Package { pkg, err := Import(make(map[string]*types.Package), path, srcDir, nil) if err != nil { diff --git a/go/internal/gcimporter/unified_no.go b/go/internal/gcimporter/unified_no.go new file mode 100644 index 0000000000000..286bf445483d8 --- /dev/null +++ b/go/internal/gcimporter/unified_no.go @@ -0,0 +1,10 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !(go1.18 && goexperiment.unified) +// +build !go1.18 !goexperiment.unified + +package gcimporter + +const unifiedIR = false diff --git a/go/internal/gcimporter/unified_yes.go b/go/internal/gcimporter/unified_yes.go new file mode 100644 index 0000000000000..b5d69ffbe682d --- /dev/null +++ b/go/internal/gcimporter/unified_yes.go @@ -0,0 +1,10 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build go1.18 && goexperiment.unified +// +build go1.18,goexperiment.unified + +package gcimporter + +const unifiedIR = true diff --git a/go/internal/gcimporter/ureader_no.go b/go/internal/gcimporter/ureader_no.go new file mode 100644 index 0000000000000..8eb20729c2ad5 --- /dev/null +++ b/go/internal/gcimporter/ureader_no.go @@ -0,0 +1,19 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !go1.18 +// +build !go1.18 + +package gcimporter + +import ( + "fmt" + "go/token" + "go/types" +) + +func UImportData(fset *token.FileSet, imports map[string]*types.Package, data []byte, path string) (_ int, pkg *types.Package, err error) { + err = fmt.Errorf("go/tools compiled with a Go version earlier than 1.18 cannot read unified IR export data") + return +} diff --git a/go/internal/gcimporter/ureader_yes.go b/go/internal/gcimporter/ureader_yes.go new file mode 100644 index 0000000000000..3c1a4375435a7 --- /dev/null +++ b/go/internal/gcimporter/ureader_yes.go @@ -0,0 +1,612 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Derived from go/internal/gcimporter/ureader.go + +//go:build go1.18 +// +build go1.18 + +package gcimporter + +import ( + "go/token" + "go/types" + "strings" + + "golang.org/x/tools/go/internal/pkgbits" +) + +// A pkgReader holds the shared state for reading a unified IR package +// description. +type pkgReader struct { + pkgbits.PkgDecoder + + fake fakeFileSet + + ctxt *types.Context + imports map[string]*types.Package // previously imported packages, indexed by path + + // lazily initialized arrays corresponding to the unified IR + // PosBase, Pkg, and Type sections, respectively. + posBases []string // position bases (i.e., file names) + pkgs []*types.Package + typs []types.Type + + // laterFns holds functions that need to be invoked at the end of + // import reading. + laterFns []func() +} + +// later adds a function to be invoked at the end of import reading. +func (pr *pkgReader) later(fn func()) { + pr.laterFns = append(pr.laterFns, fn) +} + +// See cmd/compile/internal/noder.derivedInfo. +type derivedInfo struct { + idx pkgbits.Index + needed bool +} + +// See cmd/compile/internal/noder.typeInfo. +type typeInfo struct { + idx pkgbits.Index + derived bool +} + +func UImportData(fset *token.FileSet, imports map[string]*types.Package, data []byte, path string) (_ int, pkg *types.Package, err error) { + s := string(data) + s = s[:strings.LastIndex(s, "\n$$\n")] + input := pkgbits.NewPkgDecoder(path, s) + pkg = readUnifiedPackage(fset, nil, imports, input) + return +} + +// readUnifiedPackage reads a package description from the given +// unified IR export data decoder. +func readUnifiedPackage(fset *token.FileSet, ctxt *types.Context, imports map[string]*types.Package, input pkgbits.PkgDecoder) *types.Package { + pr := pkgReader{ + PkgDecoder: input, + + fake: fakeFileSet{ + fset: fset, + files: make(map[string]*fileInfo), + }, + + ctxt: ctxt, + imports: imports, + + posBases: make([]string, input.NumElems(pkgbits.RelocPosBase)), + pkgs: make([]*types.Package, input.NumElems(pkgbits.RelocPkg)), + typs: make([]types.Type, input.NumElems(pkgbits.RelocType)), + } + defer pr.fake.setLines() + + r := pr.newReader(pkgbits.RelocMeta, pkgbits.PublicRootIdx, pkgbits.SyncPublic) + pkg := r.pkg() + r.Bool() // has init + + for i, n := 0, r.Len(); i < n; i++ { + // As if r.obj(), but avoiding the Scope.Lookup call, + // to avoid eager loading of imports. + r.Sync(pkgbits.SyncObject) + assert(!r.Bool()) + r.p.objIdx(r.Reloc(pkgbits.RelocObj)) + assert(r.Len() == 0) + } + + r.Sync(pkgbits.SyncEOF) + + for _, fn := range pr.laterFns { + fn() + } + + pkg.MarkComplete() + return pkg +} + +// A reader holds the state for reading a single unified IR element +// within a package. +type reader struct { + pkgbits.Decoder + + p *pkgReader + + dict *readerDict +} + +// A readerDict holds the state for type parameters that parameterize +// the current unified IR element. +type readerDict struct { + // bounds is a slice of typeInfos corresponding to the underlying + // bounds of the element's type parameters. + bounds []typeInfo + + // tparams is a slice of the constructed TypeParams for the element. + tparams []*types.TypeParam + + // devived is a slice of types derived from tparams, which may be + // instantiated while reading the current element. + derived []derivedInfo + derivedTypes []types.Type // lazily instantiated from derived +} + +func (pr *pkgReader) newReader(k pkgbits.RelocKind, idx pkgbits.Index, marker pkgbits.SyncMarker) *reader { + return &reader{ + Decoder: pr.NewDecoder(k, idx, marker), + p: pr, + } +} + +// @@@ Positions + +func (r *reader) pos() token.Pos { + r.Sync(pkgbits.SyncPos) + if !r.Bool() { + return token.NoPos + } + + // TODO(mdempsky): Delta encoding. + posBase := r.posBase() + line := r.Uint() + col := r.Uint() + return r.p.fake.pos(posBase, int(line), int(col)) +} + +func (r *reader) posBase() string { + return r.p.posBaseIdx(r.Reloc(pkgbits.RelocPosBase)) +} + +func (pr *pkgReader) posBaseIdx(idx pkgbits.Index) string { + if b := pr.posBases[idx]; b != "" { + return b + } + + r := pr.newReader(pkgbits.RelocPosBase, idx, pkgbits.SyncPosBase) + + // Within types2, position bases have a lot more details (e.g., + // keeping track of where //line directives appeared exactly). + // + // For go/types, we just track the file name. + + filename := r.String() + + if r.Bool() { // file base + // Was: "b = token.NewTrimmedFileBase(filename, true)" + } else { // line base + pos := r.pos() + line := r.Uint() + col := r.Uint() + + // Was: "b = token.NewLineBase(pos, filename, true, line, col)" + _, _, _ = pos, line, col + } + + b := filename + pr.posBases[idx] = b + return b +} + +// @@@ Packages + +func (r *reader) pkg() *types.Package { + r.Sync(pkgbits.SyncPkg) + return r.p.pkgIdx(r.Reloc(pkgbits.RelocPkg)) +} + +func (pr *pkgReader) pkgIdx(idx pkgbits.Index) *types.Package { + // TODO(mdempsky): Consider using some non-nil pointer to indicate + // the universe scope, so we don't need to keep re-reading it. + if pkg := pr.pkgs[idx]; pkg != nil { + return pkg + } + + pkg := pr.newReader(pkgbits.RelocPkg, idx, pkgbits.SyncPkgDef).doPkg() + pr.pkgs[idx] = pkg + return pkg +} + +func (r *reader) doPkg() *types.Package { + path := r.String() + switch path { + case "": + path = r.p.PkgPath() + case "builtin": + return nil // universe + case "unsafe": + return types.Unsafe + } + + if pkg := r.p.imports[path]; pkg != nil { + return pkg + } + + name := r.String() + + pkg := types.NewPackage(path, name) + r.p.imports[path] = pkg + + imports := make([]*types.Package, r.Len()) + for i := range imports { + imports[i] = r.pkg() + } + pkg.SetImports(imports) + + return pkg +} + +// @@@ Types + +func (r *reader) typ() types.Type { + return r.p.typIdx(r.typInfo(), r.dict) +} + +func (r *reader) typInfo() typeInfo { + r.Sync(pkgbits.SyncType) + if r.Bool() { + return typeInfo{idx: pkgbits.Index(r.Len()), derived: true} + } + return typeInfo{idx: r.Reloc(pkgbits.RelocType), derived: false} +} + +func (pr *pkgReader) typIdx(info typeInfo, dict *readerDict) types.Type { + idx := info.idx + var where *types.Type + if info.derived { + where = &dict.derivedTypes[idx] + idx = dict.derived[idx].idx + } else { + where = &pr.typs[idx] + } + + if typ := *where; typ != nil { + return typ + } + + r := pr.newReader(pkgbits.RelocType, idx, pkgbits.SyncTypeIdx) + r.dict = dict + + typ := r.doTyp() + assert(typ != nil) + + // See comment in pkgReader.typIdx explaining how this happens. + if prev := *where; prev != nil { + return prev + } + + *where = typ + return typ +} + +func (r *reader) doTyp() (res types.Type) { + switch tag := pkgbits.CodeType(r.Code(pkgbits.SyncType)); tag { + default: + errorf("unhandled type tag: %v", tag) + panic("unreachable") + + case pkgbits.TypeBasic: + return types.Typ[r.Len()] + + case pkgbits.TypeNamed: + obj, targs := r.obj() + name := obj.(*types.TypeName) + if len(targs) != 0 { + t, _ := types.Instantiate(r.p.ctxt, name.Type(), targs, false) + return t + } + return name.Type() + + case pkgbits.TypeTypeParam: + return r.dict.tparams[r.Len()] + + case pkgbits.TypeArray: + len := int64(r.Uint64()) + return types.NewArray(r.typ(), len) + case pkgbits.TypeChan: + dir := types.ChanDir(r.Len()) + return types.NewChan(dir, r.typ()) + case pkgbits.TypeMap: + return types.NewMap(r.typ(), r.typ()) + case pkgbits.TypePointer: + return types.NewPointer(r.typ()) + case pkgbits.TypeSignature: + return r.signature(nil, nil, nil) + case pkgbits.TypeSlice: + return types.NewSlice(r.typ()) + case pkgbits.TypeStruct: + return r.structType() + case pkgbits.TypeInterface: + return r.interfaceType() + case pkgbits.TypeUnion: + return r.unionType() + } +} + +func (r *reader) structType() *types.Struct { + fields := make([]*types.Var, r.Len()) + var tags []string + for i := range fields { + pos := r.pos() + pkg, name := r.selector() + ftyp := r.typ() + tag := r.String() + embedded := r.Bool() + + fields[i] = types.NewField(pos, pkg, name, ftyp, embedded) + if tag != "" { + for len(tags) < i { + tags = append(tags, "") + } + tags = append(tags, tag) + } + } + return types.NewStruct(fields, tags) +} + +func (r *reader) unionType() *types.Union { + terms := make([]*types.Term, r.Len()) + for i := range terms { + terms[i] = types.NewTerm(r.Bool(), r.typ()) + } + return types.NewUnion(terms) +} + +func (r *reader) interfaceType() *types.Interface { + methods := make([]*types.Func, r.Len()) + embeddeds := make([]types.Type, r.Len()) + implicit := len(methods) == 0 && len(embeddeds) == 1 && r.Bool() + + for i := range methods { + pos := r.pos() + pkg, name := r.selector() + mtyp := r.signature(nil, nil, nil) + methods[i] = types.NewFunc(pos, pkg, name, mtyp) + } + + for i := range embeddeds { + embeddeds[i] = r.typ() + } + + iface := types.NewInterfaceType(methods, embeddeds) + if implicit { + iface.MarkImplicit() + } + return iface +} + +func (r *reader) signature(recv *types.Var, rtparams, tparams []*types.TypeParam) *types.Signature { + r.Sync(pkgbits.SyncSignature) + + params := r.params() + results := r.params() + variadic := r.Bool() + + return types.NewSignatureType(recv, rtparams, tparams, params, results, variadic) +} + +func (r *reader) params() *types.Tuple { + r.Sync(pkgbits.SyncParams) + + params := make([]*types.Var, r.Len()) + for i := range params { + params[i] = r.param() + } + + return types.NewTuple(params...) +} + +func (r *reader) param() *types.Var { + r.Sync(pkgbits.SyncParam) + + pos := r.pos() + pkg, name := r.localIdent() + typ := r.typ() + + return types.NewParam(pos, pkg, name, typ) +} + +// @@@ Objects + +func (r *reader) obj() (types.Object, []types.Type) { + r.Sync(pkgbits.SyncObject) + + assert(!r.Bool()) + + pkg, name := r.p.objIdx(r.Reloc(pkgbits.RelocObj)) + obj := pkgScope(pkg).Lookup(name) + + targs := make([]types.Type, r.Len()) + for i := range targs { + targs[i] = r.typ() + } + + return obj, targs +} + +func (pr *pkgReader) objIdx(idx pkgbits.Index) (*types.Package, string) { + rname := pr.newReader(pkgbits.RelocName, idx, pkgbits.SyncObject1) + + objPkg, objName := rname.qualifiedIdent() + assert(objName != "") + + tag := pkgbits.CodeObj(rname.Code(pkgbits.SyncCodeObj)) + + if tag == pkgbits.ObjStub { + assert(objPkg == nil || objPkg == types.Unsafe) + return objPkg, objName + } + + if objPkg.Scope().Lookup(objName) == nil { + dict := pr.objDictIdx(idx) + + r := pr.newReader(pkgbits.RelocObj, idx, pkgbits.SyncObject1) + r.dict = dict + + declare := func(obj types.Object) { + objPkg.Scope().Insert(obj) + } + + switch tag { + default: + panic("weird") + + case pkgbits.ObjAlias: + pos := r.pos() + typ := r.typ() + declare(types.NewTypeName(pos, objPkg, objName, typ)) + + case pkgbits.ObjConst: + pos := r.pos() + typ := r.typ() + val := r.Value() + declare(types.NewConst(pos, objPkg, objName, typ, val)) + + case pkgbits.ObjFunc: + pos := r.pos() + tparams := r.typeParamNames() + sig := r.signature(nil, nil, tparams) + declare(types.NewFunc(pos, objPkg, objName, sig)) + + case pkgbits.ObjType: + pos := r.pos() + + obj := types.NewTypeName(pos, objPkg, objName, nil) + named := types.NewNamed(obj, nil, nil) + declare(obj) + + named.SetTypeParams(r.typeParamNames()) + + // TODO(mdempsky): Rewrite receiver types to underlying is an + // Interface? The go/types importer does this (I think because + // unit tests expected that), but cmd/compile doesn't care + // about it, so maybe we can avoid worrying about that here. + rhs := r.typ() + r.p.later(func() { + underlying := rhs.Underlying() + named.SetUnderlying(underlying) + }) + + for i, n := 0, r.Len(); i < n; i++ { + named.AddMethod(r.method()) + } + + case pkgbits.ObjVar: + pos := r.pos() + typ := r.typ() + declare(types.NewVar(pos, objPkg, objName, typ)) + } + } + + return objPkg, objName +} + +func (pr *pkgReader) objDictIdx(idx pkgbits.Index) *readerDict { + r := pr.newReader(pkgbits.RelocObjDict, idx, pkgbits.SyncObject1) + + var dict readerDict + + if implicits := r.Len(); implicits != 0 { + errorf("unexpected object with %v implicit type parameter(s)", implicits) + } + + dict.bounds = make([]typeInfo, r.Len()) + for i := range dict.bounds { + dict.bounds[i] = r.typInfo() + } + + dict.derived = make([]derivedInfo, r.Len()) + dict.derivedTypes = make([]types.Type, len(dict.derived)) + for i := range dict.derived { + dict.derived[i] = derivedInfo{r.Reloc(pkgbits.RelocType), r.Bool()} + } + + // function references follow, but reader doesn't need those + + return &dict +} + +func (r *reader) typeParamNames() []*types.TypeParam { + r.Sync(pkgbits.SyncTypeParamNames) + + // Note: This code assumes it only processes objects without + // implement type parameters. This is currently fine, because + // reader is only used to read in exported declarations, which are + // always package scoped. + + if len(r.dict.bounds) == 0 { + return nil + } + + // Careful: Type parameter lists may have cycles. To allow for this, + // we construct the type parameter list in two passes: first we + // create all the TypeNames and TypeParams, then we construct and + // set the bound type. + + r.dict.tparams = make([]*types.TypeParam, len(r.dict.bounds)) + for i := range r.dict.bounds { + pos := r.pos() + pkg, name := r.localIdent() + + tname := types.NewTypeName(pos, pkg, name, nil) + r.dict.tparams[i] = types.NewTypeParam(tname, nil) + } + + typs := make([]types.Type, len(r.dict.bounds)) + for i, bound := range r.dict.bounds { + typs[i] = r.p.typIdx(bound, r.dict) + } + + // TODO(mdempsky): This is subtle, elaborate further. + // + // We have to save tparams outside of the closure, because + // typeParamNames() can be called multiple times with the same + // dictionary instance. + // + // Also, this needs to happen later to make sure SetUnderlying has + // been called. + // + // TODO(mdempsky): Is it safe to have a single "later" slice or do + // we need to have multiple passes? See comments on CL 386002 and + // go.dev/issue/52104. + tparams := r.dict.tparams + r.p.later(func() { + for i, typ := range typs { + tparams[i].SetConstraint(typ) + } + }) + + return r.dict.tparams +} + +func (r *reader) method() *types.Func { + r.Sync(pkgbits.SyncMethod) + pos := r.pos() + pkg, name := r.selector() + + rparams := r.typeParamNames() + sig := r.signature(r.param(), rparams, nil) + + _ = r.pos() // TODO(mdempsky): Remove; this is a hacker for linker.go. + return types.NewFunc(pos, pkg, name, sig) +} + +func (r *reader) qualifiedIdent() (*types.Package, string) { return r.ident(pkgbits.SyncSym) } +func (r *reader) localIdent() (*types.Package, string) { return r.ident(pkgbits.SyncLocalIdent) } +func (r *reader) selector() (*types.Package, string) { return r.ident(pkgbits.SyncSelector) } + +func (r *reader) ident(marker pkgbits.SyncMarker) (*types.Package, string) { + r.Sync(marker) + return r.pkg(), r.String() +} + +// pkgScope returns pkg.Scope(). +// If pkg is nil, it returns types.Universe instead. +// +// TODO(mdempsky): Remove after x/tools can depend on Go 1.19. +func pkgScope(pkg *types.Package) *types.Scope { + if pkg != nil { + return pkg.Scope() + } + return types.Universe +} diff --git a/go/internal/pkgbits/codes.go b/go/internal/pkgbits/codes.go new file mode 100644 index 0000000000000..f0cabde96eba9 --- /dev/null +++ b/go/internal/pkgbits/codes.go @@ -0,0 +1,77 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pkgbits + +// A Code is an enum value that can be encoded into bitstreams. +// +// Code types are preferable for enum types, because they allow +// Decoder to detect desyncs. +type Code interface { + // Marker returns the SyncMarker for the Code's dynamic type. + Marker() SyncMarker + + // Value returns the Code's ordinal value. + Value() int +} + +// A CodeVal distinguishes among go/constant.Value encodings. +type CodeVal int + +func (c CodeVal) Marker() SyncMarker { return SyncVal } +func (c CodeVal) Value() int { return int(c) } + +// Note: These values are public and cannot be changed without +// updating the go/types importers. + +const ( + ValBool CodeVal = iota + ValString + ValInt64 + ValBigInt + ValBigRat + ValBigFloat +) + +// A CodeType distinguishes among go/types.Type encodings. +type CodeType int + +func (c CodeType) Marker() SyncMarker { return SyncType } +func (c CodeType) Value() int { return int(c) } + +// Note: These values are public and cannot be changed without +// updating the go/types importers. + +const ( + TypeBasic CodeType = iota + TypeNamed + TypePointer + TypeSlice + TypeArray + TypeChan + TypeMap + TypeSignature + TypeStruct + TypeInterface + TypeUnion + TypeTypeParam +) + +// A CodeObj distinguishes among go/types.Object encodings. +type CodeObj int + +func (c CodeObj) Marker() SyncMarker { return SyncCodeObj } +func (c CodeObj) Value() int { return int(c) } + +// Note: These values are public and cannot be changed without +// updating the go/types importers. + +const ( + ObjAlias CodeObj = iota + ObjConst + ObjType + ObjFunc + ObjVar + ObjStub +) diff --git a/go/internal/pkgbits/decoder.go b/go/internal/pkgbits/decoder.go new file mode 100644 index 0000000000000..2bc793668ec90 --- /dev/null +++ b/go/internal/pkgbits/decoder.go @@ -0,0 +1,433 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pkgbits + +import ( + "encoding/binary" + "fmt" + "go/constant" + "go/token" + "math/big" + "os" + "runtime" + "strings" +) + +// A PkgDecoder provides methods for decoding a package's Unified IR +// export data. +type PkgDecoder struct { + // version is the file format version. + version uint32 + + // sync indicates whether the file uses sync markers. + sync bool + + // pkgPath is the package path for the package to be decoded. + // + // TODO(mdempsky): Remove; unneeded since CL 391014. + pkgPath string + + // elemData is the full data payload of the encoded package. + // Elements are densely and contiguously packed together. + // + // The last 8 bytes of elemData are the package fingerprint. + elemData string + + // elemEnds stores the byte-offset end positions of element + // bitstreams within elemData. + // + // For example, element I's bitstream data starts at elemEnds[I-1] + // (or 0, if I==0) and ends at elemEnds[I]. + // + // Note: elemEnds is indexed by absolute indices, not + // section-relative indices. + elemEnds []uint32 + + // elemEndsEnds stores the index-offset end positions of relocation + // sections within elemEnds. + // + // For example, section K's end positions start at elemEndsEnds[K-1] + // (or 0, if K==0) and end at elemEndsEnds[K]. + elemEndsEnds [numRelocs]uint32 +} + +// PkgPath returns the package path for the package +// +// TODO(mdempsky): Remove; unneeded since CL 391014. +func (pr *PkgDecoder) PkgPath() string { return pr.pkgPath } + +// SyncMarkers reports whether pr uses sync markers. +func (pr *PkgDecoder) SyncMarkers() bool { return pr.sync } + +// NewPkgDecoder returns a PkgDecoder initialized to read the Unified +// IR export data from input. pkgPath is the package path for the +// compilation unit that produced the export data. +// +// TODO(mdempsky): Remove pkgPath parameter; unneeded since CL 391014. +func NewPkgDecoder(pkgPath, input string) PkgDecoder { + pr := PkgDecoder{ + pkgPath: pkgPath, + } + + // TODO(mdempsky): Implement direct indexing of input string to + // avoid copying the position information. + + r := strings.NewReader(input) + + assert(binary.Read(r, binary.LittleEndian, &pr.version) == nil) + + switch pr.version { + default: + panic(fmt.Errorf("unsupported version: %v", pr.version)) + case 0: + // no flags + case 1: + var flags uint32 + assert(binary.Read(r, binary.LittleEndian, &flags) == nil) + pr.sync = flags&flagSyncMarkers != 0 + } + + assert(binary.Read(r, binary.LittleEndian, pr.elemEndsEnds[:]) == nil) + + pr.elemEnds = make([]uint32, pr.elemEndsEnds[len(pr.elemEndsEnds)-1]) + assert(binary.Read(r, binary.LittleEndian, pr.elemEnds[:]) == nil) + + pos, err := r.Seek(0, os.SEEK_CUR) + assert(err == nil) + + pr.elemData = input[pos:] + assert(len(pr.elemData)-8 == int(pr.elemEnds[len(pr.elemEnds)-1])) + + return pr +} + +// NumElems returns the number of elements in section k. +func (pr *PkgDecoder) NumElems(k RelocKind) int { + count := int(pr.elemEndsEnds[k]) + if k > 0 { + count -= int(pr.elemEndsEnds[k-1]) + } + return count +} + +// TotalElems returns the total number of elements across all sections. +func (pr *PkgDecoder) TotalElems() int { + return len(pr.elemEnds) +} + +// Fingerprint returns the package fingerprint. +func (pr *PkgDecoder) Fingerprint() [8]byte { + var fp [8]byte + copy(fp[:], pr.elemData[len(pr.elemData)-8:]) + return fp +} + +// AbsIdx returns the absolute index for the given (section, index) +// pair. +func (pr *PkgDecoder) AbsIdx(k RelocKind, idx Index) int { + absIdx := int(idx) + if k > 0 { + absIdx += int(pr.elemEndsEnds[k-1]) + } + if absIdx >= int(pr.elemEndsEnds[k]) { + errorf("%v:%v is out of bounds; %v", k, idx, pr.elemEndsEnds) + } + return absIdx +} + +// DataIdx returns the raw element bitstream for the given (section, +// index) pair. +func (pr *PkgDecoder) DataIdx(k RelocKind, idx Index) string { + absIdx := pr.AbsIdx(k, idx) + + var start uint32 + if absIdx > 0 { + start = pr.elemEnds[absIdx-1] + } + end := pr.elemEnds[absIdx] + + return pr.elemData[start:end] +} + +// StringIdx returns the string value for the given string index. +func (pr *PkgDecoder) StringIdx(idx Index) string { + return pr.DataIdx(RelocString, idx) +} + +// NewDecoder returns a Decoder for the given (section, index) pair, +// and decodes the given SyncMarker from the element bitstream. +func (pr *PkgDecoder) NewDecoder(k RelocKind, idx Index, marker SyncMarker) Decoder { + r := pr.NewDecoderRaw(k, idx) + r.Sync(marker) + return r +} + +// NewDecoderRaw returns a Decoder for the given (section, index) pair. +// +// Most callers should use NewDecoder instead. +func (pr *PkgDecoder) NewDecoderRaw(k RelocKind, idx Index) Decoder { + r := Decoder{ + common: pr, + k: k, + Idx: idx, + } + + // TODO(mdempsky) r.data.Reset(...) after #44505 is resolved. + r.Data = *strings.NewReader(pr.DataIdx(k, idx)) + + r.Sync(SyncRelocs) + r.Relocs = make([]RelocEnt, r.Len()) + for i := range r.Relocs { + r.Sync(SyncReloc) + r.Relocs[i] = RelocEnt{RelocKind(r.Len()), Index(r.Len())} + } + + return r +} + +// A Decoder provides methods for decoding an individual element's +// bitstream data. +type Decoder struct { + common *PkgDecoder + + Relocs []RelocEnt + Data strings.Reader + + k RelocKind + Idx Index +} + +func (r *Decoder) checkErr(err error) { + if err != nil { + errorf("unexpected decoding error: %w", err) + } +} + +func (r *Decoder) rawUvarint() uint64 { + x, err := binary.ReadUvarint(&r.Data) + r.checkErr(err) + return x +} + +func (r *Decoder) rawVarint() int64 { + ux := r.rawUvarint() + + // Zig-zag decode. + x := int64(ux >> 1) + if ux&1 != 0 { + x = ^x + } + return x +} + +func (r *Decoder) rawReloc(k RelocKind, idx int) Index { + e := r.Relocs[idx] + assert(e.Kind == k) + return e.Idx +} + +// Sync decodes a sync marker from the element bitstream and asserts +// that it matches the expected marker. +// +// If r.common.sync is false, then Sync is a no-op. +func (r *Decoder) Sync(mWant SyncMarker) { + if !r.common.sync { + return + } + + pos, _ := r.Data.Seek(0, os.SEEK_CUR) // TODO(mdempsky): io.SeekCurrent after #44505 is resolved + mHave := SyncMarker(r.rawUvarint()) + writerPCs := make([]int, r.rawUvarint()) + for i := range writerPCs { + writerPCs[i] = int(r.rawUvarint()) + } + + if mHave == mWant { + return + } + + // There's some tension here between printing: + // + // (1) full file paths that tools can recognize (e.g., so emacs + // hyperlinks the "file:line" text for easy navigation), or + // + // (2) short file paths that are easier for humans to read (e.g., by + // omitting redundant or irrelevant details, so it's easier to + // focus on the useful bits that remain). + // + // The current formatting favors the former, as it seems more + // helpful in practice. But perhaps the formatting could be improved + // to better address both concerns. For example, use relative file + // paths if they would be shorter, or rewrite file paths to contain + // "$GOROOT" (like objabi.AbsFile does) if tools can be taught how + // to reliably expand that again. + + fmt.Printf("export data desync: package %q, section %v, index %v, offset %v\n", r.common.pkgPath, r.k, r.Idx, pos) + + fmt.Printf("\nfound %v, written at:\n", mHave) + if len(writerPCs) == 0 { + fmt.Printf("\t[stack trace unavailable; recompile package %q with -d=syncframes]\n", r.common.pkgPath) + } + for _, pc := range writerPCs { + fmt.Printf("\t%s\n", r.common.StringIdx(r.rawReloc(RelocString, pc))) + } + + fmt.Printf("\nexpected %v, reading at:\n", mWant) + var readerPCs [32]uintptr // TODO(mdempsky): Dynamically size? + n := runtime.Callers(2, readerPCs[:]) + for _, pc := range fmtFrames(readerPCs[:n]...) { + fmt.Printf("\t%s\n", pc) + } + + // We already printed a stack trace for the reader, so now we can + // simply exit. Printing a second one with panic or base.Fatalf + // would just be noise. + os.Exit(1) +} + +// Bool decodes and returns a bool value from the element bitstream. +func (r *Decoder) Bool() bool { + r.Sync(SyncBool) + x, err := r.Data.ReadByte() + r.checkErr(err) + assert(x < 2) + return x != 0 +} + +// Int64 decodes and returns an int64 value from the element bitstream. +func (r *Decoder) Int64() int64 { + r.Sync(SyncInt64) + return r.rawVarint() +} + +// Int64 decodes and returns a uint64 value from the element bitstream. +func (r *Decoder) Uint64() uint64 { + r.Sync(SyncUint64) + return r.rawUvarint() +} + +// Len decodes and returns a non-negative int value from the element bitstream. +func (r *Decoder) Len() int { x := r.Uint64(); v := int(x); assert(uint64(v) == x); return v } + +// Int decodes and returns an int value from the element bitstream. +func (r *Decoder) Int() int { x := r.Int64(); v := int(x); assert(int64(v) == x); return v } + +// Uint decodes and returns a uint value from the element bitstream. +func (r *Decoder) Uint() uint { x := r.Uint64(); v := uint(x); assert(uint64(v) == x); return v } + +// Code decodes a Code value from the element bitstream and returns +// its ordinal value. It's the caller's responsibility to convert the +// result to an appropriate Code type. +// +// TODO(mdempsky): Ideally this method would have signature "Code[T +// Code] T" instead, but we don't allow generic methods and the +// compiler can't depend on generics yet anyway. +func (r *Decoder) Code(mark SyncMarker) int { + r.Sync(mark) + return r.Len() +} + +// Reloc decodes a relocation of expected section k from the element +// bitstream and returns an index to the referenced element. +func (r *Decoder) Reloc(k RelocKind) Index { + r.Sync(SyncUseReloc) + return r.rawReloc(k, r.Len()) +} + +// String decodes and returns a string value from the element +// bitstream. +func (r *Decoder) String() string { + r.Sync(SyncString) + return r.common.StringIdx(r.Reloc(RelocString)) +} + +// Strings decodes and returns a variable-length slice of strings from +// the element bitstream. +func (r *Decoder) Strings() []string { + res := make([]string, r.Len()) + for i := range res { + res[i] = r.String() + } + return res +} + +// Value decodes and returns a constant.Value from the element +// bitstream. +func (r *Decoder) Value() constant.Value { + r.Sync(SyncValue) + isComplex := r.Bool() + val := r.scalar() + if isComplex { + val = constant.BinaryOp(val, token.ADD, constant.MakeImag(r.scalar())) + } + return val +} + +func (r *Decoder) scalar() constant.Value { + switch tag := CodeVal(r.Code(SyncVal)); tag { + default: + panic(fmt.Errorf("unexpected scalar tag: %v", tag)) + + case ValBool: + return constant.MakeBool(r.Bool()) + case ValString: + return constant.MakeString(r.String()) + case ValInt64: + return constant.MakeInt64(r.Int64()) + case ValBigInt: + return constant.Make(r.bigInt()) + case ValBigRat: + num := r.bigInt() + denom := r.bigInt() + return constant.Make(new(big.Rat).SetFrac(num, denom)) + case ValBigFloat: + return constant.Make(r.bigFloat()) + } +} + +func (r *Decoder) bigInt() *big.Int { + v := new(big.Int).SetBytes([]byte(r.String())) + if r.Bool() { + v.Neg(v) + } + return v +} + +func (r *Decoder) bigFloat() *big.Float { + v := new(big.Float).SetPrec(512) + assert(v.UnmarshalText([]byte(r.String())) == nil) + return v +} + +// @@@ Helpers + +// TODO(mdempsky): These should probably be removed. I think they're a +// smell that the export data format is not yet quite right. + +// PeekPkgPath returns the package path for the specified package +// index. +func (pr *PkgDecoder) PeekPkgPath(idx Index) string { + r := pr.NewDecoder(RelocPkg, idx, SyncPkgDef) + path := r.String() + if path == "" { + path = pr.pkgPath + } + return path +} + +// PeekObj returns the package path, object name, and CodeObj for the +// specified object index. +func (pr *PkgDecoder) PeekObj(idx Index) (string, string, CodeObj) { + r := pr.NewDecoder(RelocName, idx, SyncObject1) + r.Sync(SyncSym) + r.Sync(SyncPkg) + path := pr.PeekPkgPath(r.Reloc(RelocPkg)) + name := r.String() + assert(name != "") + + tag := CodeObj(r.Code(SyncCodeObj)) + + return path, name, tag +} diff --git a/go/internal/pkgbits/doc.go b/go/internal/pkgbits/doc.go new file mode 100644 index 0000000000000..c8a2796b5e4cb --- /dev/null +++ b/go/internal/pkgbits/doc.go @@ -0,0 +1,32 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package pkgbits implements low-level coding abstractions for +// Unified IR's export data format. +// +// At a low-level, a package is a collection of bitstream elements. +// Each element has a "kind" and a dense, non-negative index. +// Elements can be randomly accessed given their kind and index. +// +// Individual elements are sequences of variable-length values (e.g., +// integers, booleans, strings, go/constant values, cross-references +// to other elements). Package pkgbits provides APIs for encoding and +// decoding these low-level values, but the details of mapping +// higher-level Go constructs into elements is left to higher-level +// abstractions. +// +// Elements may cross-reference each other with "relocations." For +// example, an element representing a pointer type has a relocation +// referring to the element type. +// +// Go constructs may be composed as a constellation of multiple +// elements. For example, a declared function may have one element to +// describe the object (e.g., its name, type, position), and a +// separate element to describe its function body. This allows readers +// some flexibility in efficiently seeking or re-reading data (e.g., +// inlining requires re-reading the function body for each inlined +// call, without needing to re-read the object-level details). +// +// This is a copy of internal/pkgbits in the Go implementation. +package pkgbits diff --git a/go/internal/pkgbits/encoder.go b/go/internal/pkgbits/encoder.go new file mode 100644 index 0000000000000..c50c838caaecd --- /dev/null +++ b/go/internal/pkgbits/encoder.go @@ -0,0 +1,379 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pkgbits + +import ( + "bytes" + "crypto/md5" + "encoding/binary" + "go/constant" + "io" + "math/big" + "runtime" +) + +// currentVersion is the current version number. +// +// - v0: initial prototype +// +// - v1: adds the flags uint32 word +const currentVersion uint32 = 1 + +// A PkgEncoder provides methods for encoding a package's Unified IR +// export data. +type PkgEncoder struct { + // elems holds the bitstream for previously encoded elements. + elems [numRelocs][]string + + // stringsIdx maps previously encoded strings to their index within + // the RelocString section, to allow deduplication. That is, + // elems[RelocString][stringsIdx[s]] == s (if present). + stringsIdx map[string]Index + + // syncFrames is the number of frames to write at each sync + // marker. A negative value means sync markers are omitted. + syncFrames int +} + +// SyncMarkers reports whether pw uses sync markers. +func (pw *PkgEncoder) SyncMarkers() bool { return pw.syncFrames >= 0 } + +// NewPkgEncoder returns an initialized PkgEncoder. +// +// syncFrames is the number of caller frames that should be serialized +// at Sync points. Serializing additional frames results in larger +// export data files, but can help diagnosing desync errors in +// higher-level Unified IR reader/writer code. If syncFrames is +// negative, then sync markers are omitted entirely. +func NewPkgEncoder(syncFrames int) PkgEncoder { + return PkgEncoder{ + stringsIdx: make(map[string]Index), + syncFrames: syncFrames, + } +} + +// DumpTo writes the package's encoded data to out0 and returns the +// package fingerprint. +func (pw *PkgEncoder) DumpTo(out0 io.Writer) (fingerprint [8]byte) { + h := md5.New() + out := io.MultiWriter(out0, h) + + writeUint32 := func(x uint32) { + assert(binary.Write(out, binary.LittleEndian, x) == nil) + } + + writeUint32(currentVersion) + + var flags uint32 + if pw.SyncMarkers() { + flags |= flagSyncMarkers + } + writeUint32(flags) + + // Write elemEndsEnds. + var sum uint32 + for _, elems := range &pw.elems { + sum += uint32(len(elems)) + writeUint32(sum) + } + + // Write elemEnds. + sum = 0 + for _, elems := range &pw.elems { + for _, elem := range elems { + sum += uint32(len(elem)) + writeUint32(sum) + } + } + + // Write elemData. + for _, elems := range &pw.elems { + for _, elem := range elems { + _, err := io.WriteString(out, elem) + assert(err == nil) + } + } + + // Write fingerprint. + copy(fingerprint[:], h.Sum(nil)) + _, err := out0.Write(fingerprint[:]) + assert(err == nil) + + return +} + +// StringIdx adds a string value to the strings section, if not +// already present, and returns its index. +func (pw *PkgEncoder) StringIdx(s string) Index { + if idx, ok := pw.stringsIdx[s]; ok { + assert(pw.elems[RelocString][idx] == s) + return idx + } + + idx := Index(len(pw.elems[RelocString])) + pw.elems[RelocString] = append(pw.elems[RelocString], s) + pw.stringsIdx[s] = idx + return idx +} + +// NewEncoder returns an Encoder for a new element within the given +// section, and encodes the given SyncMarker as the start of the +// element bitstream. +func (pw *PkgEncoder) NewEncoder(k RelocKind, marker SyncMarker) Encoder { + e := pw.NewEncoderRaw(k) + e.Sync(marker) + return e +} + +// NewEncoderRaw returns an Encoder for a new element within the given +// section. +// +// Most callers should use NewEncoder instead. +func (pw *PkgEncoder) NewEncoderRaw(k RelocKind) Encoder { + idx := Index(len(pw.elems[k])) + pw.elems[k] = append(pw.elems[k], "") // placeholder + + return Encoder{ + p: pw, + k: k, + Idx: idx, + } +} + +// An Encoder provides methods for encoding an individual element's +// bitstream data. +type Encoder struct { + p *PkgEncoder + + Relocs []RelocEnt + Data bytes.Buffer // accumulated element bitstream data + + encodingRelocHeader bool + + k RelocKind + Idx Index // index within relocation section +} + +// Flush finalizes the element's bitstream and returns its Index. +func (w *Encoder) Flush() Index { + var sb bytes.Buffer // TODO(mdempsky): strings.Builder after #44505 is resolved + + // Backup the data so we write the relocations at the front. + var tmp bytes.Buffer + io.Copy(&tmp, &w.Data) + + // TODO(mdempsky): Consider writing these out separately so they're + // easier to strip, along with function bodies, so that we can prune + // down to just the data that's relevant to go/types. + if w.encodingRelocHeader { + panic("encodingRelocHeader already true; recursive flush?") + } + w.encodingRelocHeader = true + w.Sync(SyncRelocs) + w.Len(len(w.Relocs)) + for _, rEnt := range w.Relocs { + w.Sync(SyncReloc) + w.Len(int(rEnt.Kind)) + w.Len(int(rEnt.Idx)) + } + + io.Copy(&sb, &w.Data) + io.Copy(&sb, &tmp) + w.p.elems[w.k][w.Idx] = sb.String() + + return w.Idx +} + +func (w *Encoder) checkErr(err error) { + if err != nil { + errorf("unexpected encoding error: %v", err) + } +} + +func (w *Encoder) rawUvarint(x uint64) { + var buf [binary.MaxVarintLen64]byte + n := binary.PutUvarint(buf[:], x) + _, err := w.Data.Write(buf[:n]) + w.checkErr(err) +} + +func (w *Encoder) rawVarint(x int64) { + // Zig-zag encode. + ux := uint64(x) << 1 + if x < 0 { + ux = ^ux + } + + w.rawUvarint(ux) +} + +func (w *Encoder) rawReloc(r RelocKind, idx Index) int { + // TODO(mdempsky): Use map for lookup; this takes quadratic time. + for i, rEnt := range w.Relocs { + if rEnt.Kind == r && rEnt.Idx == idx { + return i + } + } + + i := len(w.Relocs) + w.Relocs = append(w.Relocs, RelocEnt{r, idx}) + return i +} + +func (w *Encoder) Sync(m SyncMarker) { + if !w.p.SyncMarkers() { + return + } + + // Writing out stack frame string references requires working + // relocations, but writing out the relocations themselves involves + // sync markers. To prevent infinite recursion, we simply trim the + // stack frame for sync markers within the relocation header. + var frames []string + if !w.encodingRelocHeader && w.p.syncFrames > 0 { + pcs := make([]uintptr, w.p.syncFrames) + n := runtime.Callers(2, pcs) + frames = fmtFrames(pcs[:n]...) + } + + // TODO(mdempsky): Save space by writing out stack frames as a + // linked list so we can share common stack frames. + w.rawUvarint(uint64(m)) + w.rawUvarint(uint64(len(frames))) + for _, frame := range frames { + w.rawUvarint(uint64(w.rawReloc(RelocString, w.p.StringIdx(frame)))) + } +} + +// Bool encodes and writes a bool value into the element bitstream, +// and then returns the bool value. +// +// For simple, 2-alternative encodings, the idiomatic way to call Bool +// is something like: +// +// if w.Bool(x != 0) { +// // alternative #1 +// } else { +// // alternative #2 +// } +// +// For multi-alternative encodings, use Code instead. +func (w *Encoder) Bool(b bool) bool { + w.Sync(SyncBool) + var x byte + if b { + x = 1 + } + err := w.Data.WriteByte(x) + w.checkErr(err) + return b +} + +// Int64 encodes and writes an int64 value into the element bitstream. +func (w *Encoder) Int64(x int64) { + w.Sync(SyncInt64) + w.rawVarint(x) +} + +// Uint64 encodes and writes a uint64 value into the element bitstream. +func (w *Encoder) Uint64(x uint64) { + w.Sync(SyncUint64) + w.rawUvarint(x) +} + +// Len encodes and writes a non-negative int value into the element bitstream. +func (w *Encoder) Len(x int) { assert(x >= 0); w.Uint64(uint64(x)) } + +// Int encodes and writes an int value into the element bitstream. +func (w *Encoder) Int(x int) { w.Int64(int64(x)) } + +// Len encodes and writes a uint value into the element bitstream. +func (w *Encoder) Uint(x uint) { w.Uint64(uint64(x)) } + +// Reloc encodes and writes a relocation for the given (section, +// index) pair into the element bitstream. +// +// Note: Only the index is formally written into the element +// bitstream, so bitstream decoders must know from context which +// section an encoded relocation refers to. +func (w *Encoder) Reloc(r RelocKind, idx Index) { + w.Sync(SyncUseReloc) + w.Len(w.rawReloc(r, idx)) +} + +// Code encodes and writes a Code value into the element bitstream. +func (w *Encoder) Code(c Code) { + w.Sync(c.Marker()) + w.Len(c.Value()) +} + +// String encodes and writes a string value into the element +// bitstream. +// +// Internally, strings are deduplicated by adding them to the strings +// section (if not already present), and then writing a relocation +// into the element bitstream. +func (w *Encoder) String(s string) { + w.Sync(SyncString) + w.Reloc(RelocString, w.p.StringIdx(s)) +} + +// Strings encodes and writes a variable-length slice of strings into +// the element bitstream. +func (w *Encoder) Strings(ss []string) { + w.Len(len(ss)) + for _, s := range ss { + w.String(s) + } +} + +// Value encodes and writes a constant.Value into the element +// bitstream. +func (w *Encoder) Value(val constant.Value) { + w.Sync(SyncValue) + if w.Bool(val.Kind() == constant.Complex) { + w.scalar(constant.Real(val)) + w.scalar(constant.Imag(val)) + } else { + w.scalar(val) + } +} + +func (w *Encoder) scalar(val constant.Value) { + switch v := constant.Val(val).(type) { + default: + errorf("unhandled %v (%v)", val, val.Kind()) + case bool: + w.Code(ValBool) + w.Bool(v) + case string: + w.Code(ValString) + w.String(v) + case int64: + w.Code(ValInt64) + w.Int64(v) + case *big.Int: + w.Code(ValBigInt) + w.bigInt(v) + case *big.Rat: + w.Code(ValBigRat) + w.bigInt(v.Num()) + w.bigInt(v.Denom()) + case *big.Float: + w.Code(ValBigFloat) + w.bigFloat(v) + } +} + +func (w *Encoder) bigInt(v *big.Int) { + b := v.Bytes() + w.String(string(b)) // TODO: More efficient encoding. + w.Bool(v.Sign() < 0) +} + +func (w *Encoder) bigFloat(v *big.Float) { + b := v.Append(nil, 'p', -1) + w.String(string(b)) // TODO: More efficient encoding. +} diff --git a/go/internal/pkgbits/flags.go b/go/internal/pkgbits/flags.go new file mode 100644 index 0000000000000..654222745facd --- /dev/null +++ b/go/internal/pkgbits/flags.go @@ -0,0 +1,9 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pkgbits + +const ( + flagSyncMarkers = 1 << iota // file format contains sync markers +) diff --git a/go/internal/pkgbits/frames_go1.go b/go/internal/pkgbits/frames_go1.go new file mode 100644 index 0000000000000..5294f6a63edd7 --- /dev/null +++ b/go/internal/pkgbits/frames_go1.go @@ -0,0 +1,21 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !go1.7 +// +build !go1.7 + +// TODO(mdempsky): Remove after #44505 is resolved + +package pkgbits + +import "runtime" + +func walkFrames(pcs []uintptr, visit frameVisitor) { + for _, pc := range pcs { + fn := runtime.FuncForPC(pc) + file, line := fn.FileLine(pc) + + visit(file, line, fn.Name(), pc-fn.Entry()) + } +} diff --git a/go/internal/pkgbits/frames_go17.go b/go/internal/pkgbits/frames_go17.go new file mode 100644 index 0000000000000..2324ae7adfe20 --- /dev/null +++ b/go/internal/pkgbits/frames_go17.go @@ -0,0 +1,28 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build go1.7 +// +build go1.7 + +package pkgbits + +import "runtime" + +// walkFrames calls visit for each call frame represented by pcs. +// +// pcs should be a slice of PCs, as returned by runtime.Callers. +func walkFrames(pcs []uintptr, visit frameVisitor) { + if len(pcs) == 0 { + return + } + + frames := runtime.CallersFrames(pcs) + for { + frame, more := frames.Next() + visit(frame.File, frame.Line, frame.Function, frame.PC-frame.Entry) + if !more { + return + } + } +} diff --git a/go/internal/pkgbits/reloc.go b/go/internal/pkgbits/reloc.go new file mode 100644 index 0000000000000..7a8f04ab3fc66 --- /dev/null +++ b/go/internal/pkgbits/reloc.go @@ -0,0 +1,42 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pkgbits + +// A RelocKind indicates a particular section within a unified IR export. +type RelocKind int + +// An Index represents a bitstream element index within a particular +// section. +type Index int + +// A relocEnt (relocation entry) is an entry in an element's local +// reference table. +// +// TODO(mdempsky): Rename this too. +type RelocEnt struct { + Kind RelocKind + Idx Index +} + +// Reserved indices within the meta relocation section. +const ( + PublicRootIdx Index = 0 + PrivateRootIdx Index = 1 +) + +const ( + RelocString RelocKind = iota + RelocMeta + RelocPosBase + RelocPkg + RelocName + RelocType + RelocObj + RelocObjExt + RelocObjDict + RelocBody + + numRelocs = iota +) diff --git a/go/internal/pkgbits/support.go b/go/internal/pkgbits/support.go new file mode 100644 index 0000000000000..ad26d3b28cae1 --- /dev/null +++ b/go/internal/pkgbits/support.go @@ -0,0 +1,17 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pkgbits + +import "fmt" + +func assert(b bool) { + if !b { + panic("assertion failed") + } +} + +func errorf(format string, args ...interface{}) { + panic(fmt.Errorf(format, args...)) +} diff --git a/go/internal/pkgbits/sync.go b/go/internal/pkgbits/sync.go new file mode 100644 index 0000000000000..5bd51ef717007 --- /dev/null +++ b/go/internal/pkgbits/sync.go @@ -0,0 +1,113 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pkgbits + +import ( + "fmt" + "strings" +) + +// fmtFrames formats a backtrace for reporting reader/writer desyncs. +func fmtFrames(pcs ...uintptr) []string { + res := make([]string, 0, len(pcs)) + walkFrames(pcs, func(file string, line int, name string, offset uintptr) { + // Trim package from function name. It's just redundant noise. + name = strings.TrimPrefix(name, "cmd/compile/internal/noder.") + + res = append(res, fmt.Sprintf("%s:%v: %s +0x%v", file, line, name, offset)) + }) + return res +} + +type frameVisitor func(file string, line int, name string, offset uintptr) + +// SyncMarker is an enum type that represents markers that may be +// written to export data to ensure the reader and writer stay +// synchronized. +type SyncMarker int + +//go:generate stringer -type=SyncMarker -trimprefix=Sync + +const ( + _ SyncMarker = iota + + // Public markers (known to go/types importers). + + // Low-level coding markers. + SyncEOF + SyncBool + SyncInt64 + SyncUint64 + SyncString + SyncValue + SyncVal + SyncRelocs + SyncReloc + SyncUseReloc + + // Higher-level object and type markers. + SyncPublic + SyncPos + SyncPosBase + SyncObject + SyncObject1 + SyncPkg + SyncPkgDef + SyncMethod + SyncType + SyncTypeIdx + SyncTypeParamNames + SyncSignature + SyncParams + SyncParam + SyncCodeObj + SyncSym + SyncLocalIdent + SyncSelector + + // Private markers (only known to cmd/compile). + SyncPrivate + + SyncFuncExt + SyncVarExt + SyncTypeExt + SyncPragma + + SyncExprList + SyncExprs + SyncExpr + SyncExprType + SyncAssign + SyncOp + SyncFuncLit + SyncCompLit + + SyncDecl + SyncFuncBody + SyncOpenScope + SyncCloseScope + SyncCloseAnotherScope + SyncDeclNames + SyncDeclName + + SyncStmts + SyncBlockStmt + SyncIfStmt + SyncForStmt + SyncSwitchStmt + SyncRangeStmt + SyncCaseClause + SyncCommClause + SyncSelectStmt + SyncDecls + SyncLabeledStmt + SyncUseObjLocal + SyncAddLocal + SyncLinkname + SyncStmt1 + SyncStmtsEnd + SyncLabel + SyncOptLabel +) diff --git a/go/internal/pkgbits/syncmarker_string.go b/go/internal/pkgbits/syncmarker_string.go new file mode 100644 index 0000000000000..4a5b0ca5f2ffc --- /dev/null +++ b/go/internal/pkgbits/syncmarker_string.go @@ -0,0 +1,89 @@ +// Code generated by "stringer -type=SyncMarker -trimprefix=Sync"; DO NOT EDIT. + +package pkgbits + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[SyncEOF-1] + _ = x[SyncBool-2] + _ = x[SyncInt64-3] + _ = x[SyncUint64-4] + _ = x[SyncString-5] + _ = x[SyncValue-6] + _ = x[SyncVal-7] + _ = x[SyncRelocs-8] + _ = x[SyncReloc-9] + _ = x[SyncUseReloc-10] + _ = x[SyncPublic-11] + _ = x[SyncPos-12] + _ = x[SyncPosBase-13] + _ = x[SyncObject-14] + _ = x[SyncObject1-15] + _ = x[SyncPkg-16] + _ = x[SyncPkgDef-17] + _ = x[SyncMethod-18] + _ = x[SyncType-19] + _ = x[SyncTypeIdx-20] + _ = x[SyncTypeParamNames-21] + _ = x[SyncSignature-22] + _ = x[SyncParams-23] + _ = x[SyncParam-24] + _ = x[SyncCodeObj-25] + _ = x[SyncSym-26] + _ = x[SyncLocalIdent-27] + _ = x[SyncSelector-28] + _ = x[SyncPrivate-29] + _ = x[SyncFuncExt-30] + _ = x[SyncVarExt-31] + _ = x[SyncTypeExt-32] + _ = x[SyncPragma-33] + _ = x[SyncExprList-34] + _ = x[SyncExprs-35] + _ = x[SyncExpr-36] + _ = x[SyncExprType-37] + _ = x[SyncAssign-38] + _ = x[SyncOp-39] + _ = x[SyncFuncLit-40] + _ = x[SyncCompLit-41] + _ = x[SyncDecl-42] + _ = x[SyncFuncBody-43] + _ = x[SyncOpenScope-44] + _ = x[SyncCloseScope-45] + _ = x[SyncCloseAnotherScope-46] + _ = x[SyncDeclNames-47] + _ = x[SyncDeclName-48] + _ = x[SyncStmts-49] + _ = x[SyncBlockStmt-50] + _ = x[SyncIfStmt-51] + _ = x[SyncForStmt-52] + _ = x[SyncSwitchStmt-53] + _ = x[SyncRangeStmt-54] + _ = x[SyncCaseClause-55] + _ = x[SyncCommClause-56] + _ = x[SyncSelectStmt-57] + _ = x[SyncDecls-58] + _ = x[SyncLabeledStmt-59] + _ = x[SyncUseObjLocal-60] + _ = x[SyncAddLocal-61] + _ = x[SyncLinkname-62] + _ = x[SyncStmt1-63] + _ = x[SyncStmtsEnd-64] + _ = x[SyncLabel-65] + _ = x[SyncOptLabel-66] +} + +const _SyncMarker_name = "EOFBoolInt64Uint64StringValueValRelocsRelocUseRelocPublicPosPosBaseObjectObject1PkgPkgDefMethodTypeTypeIdxTypeParamNamesSignatureParamsParamCodeObjSymLocalIdentSelectorPrivateFuncExtVarExtTypeExtPragmaExprListExprsExprExprTypeAssignOpFuncLitCompLitDeclFuncBodyOpenScopeCloseScopeCloseAnotherScopeDeclNamesDeclNameStmtsBlockStmtIfStmtForStmtSwitchStmtRangeStmtCaseClauseCommClauseSelectStmtDeclsLabeledStmtUseObjLocalAddLocalLinknameStmt1StmtsEndLabelOptLabel" + +var _SyncMarker_index = [...]uint16{0, 3, 7, 12, 18, 24, 29, 32, 38, 43, 51, 57, 60, 67, 73, 80, 83, 89, 95, 99, 106, 120, 129, 135, 140, 147, 150, 160, 168, 175, 182, 188, 195, 201, 209, 214, 218, 226, 232, 234, 241, 248, 252, 260, 269, 279, 296, 305, 313, 318, 327, 333, 340, 350, 359, 369, 379, 389, 394, 405, 416, 424, 432, 437, 445, 450, 458} + +func (i SyncMarker) String() string { + i -= 1 + if i < 0 || i >= SyncMarker(len(_SyncMarker_index)-1) { + return "SyncMarker(" + strconv.FormatInt(int64(i+1), 10) + ")" + } + return _SyncMarker_name[_SyncMarker_index[i]:_SyncMarker_index[i+1]] +}