From 7da8e2810896575c055a56f4facbd610ae1112cb Mon Sep 17 00:00:00 2001 From: Siddhartha Bagaria Date: Wed, 29 Nov 2023 06:27:59 +0000 Subject: [PATCH 1/6] fix(gazelle): __init__.py in per-file targets As per Python spec, `__init__.py` files are depended upon by every file in the package, so let's make sure that our generated targets also understand this implicit dependency. Note that because Python module dependencies are not a DAG, we can not depend on the Bazel target for `__init__.py` files (to avoid cycles in Bazel), and hence a non-empty `__init__.py` file is added to the `srcs` attribute of every `py_library` target. The language spec also says that each package depends on the parent package, but that is a less commonly used feature, and can make things more complex. --- gazelle/python/__main__.py | 5 ++- gazelle/python/generate.go | 39 ++++++++++++++++--- gazelle/python/resolve.go | 14 +++++-- .../per_file_non_empty_init/BUILD.out | 5 ++- .../testdata/per_file_subdirs/BUILD.out | 2 +- .../testdata/per_file_subdirs/bar/BUILD.out | 7 +++- 6 files changed, 56 insertions(+), 16 deletions(-) diff --git a/gazelle/python/__main__.py b/gazelle/python/__main__.py index 2f5a4a16ca..bf6400f887 100644 --- a/gazelle/python/__main__.py +++ b/gazelle/python/__main__.py @@ -16,10 +16,11 @@ # STDIN receives parse requests, one per line. It outputs the parsed modules and # comments from all the files from each request. -import parse -import std_modules import sys +import python.parse as parse +import python.std_modules as std_modules + if __name__ == "__main__": if len(sys.argv) < 2: sys.exit("Please provide subcommand, either print or std_modules") diff --git a/gazelle/python/generate.go b/gazelle/python/generate.go index 25fb194370..12f3c07d00 100644 --- a/gazelle/python/generate.go +++ b/gazelle/python/generate.go @@ -19,6 +19,7 @@ import ( "io/fs" "log" "os" + pathpkg "path" "path/filepath" "strings" @@ -252,18 +253,31 @@ func (py *Python) GenerateRules(args language.GenerateArgs) language.GenerateRes result.Imports = append(result.Imports, pyLibrary.PrivateAttr(config.GazelleImportsKey)) } if cfg.PerFileGeneration() { + hasInit, nonEmptyInit := hasLibraryEntrypointFile(args.Dir) pyLibraryFilenames.Each(func(index int, filename interface{}) { + var pyLibraryTargetName string if filename == pyLibraryEntrypointFilename { - stat, err := os.Stat(filepath.Join(args.Dir, filename.(string))) - if err != nil { - log.Fatalf("ERROR: %v\n", err) + if !nonEmptyInit { + return // ignore empty __init__.py. } - if stat.Size() == 0 { - return // ignore empty __init__.py + if args.File.Pkg == "" { + // As per Python spec, an __init__.py file does not make sense without + // a package name, but someone can technically configure the Bazel + // workspace as the Python package (i.e. parent of the Bazel workspace + // is part of PYTHONPATH), in which case this should be the workspace + // name, but there is no mechanism to obtain that here. So let's just + // call it "__init__". + pyLibraryTargetName = "__init__" + } else { + pyLibraryTargetName = pathpkg.Base(args.File.Pkg) } + } else { + pyLibraryTargetName = strings.TrimSuffix(filepath.Base(filename.(string)), ".py") } srcs := treeset.NewWith(godsutils.StringComparator, filename) - pyLibraryTargetName := strings.TrimSuffix(filepath.Base(filename.(string)), ".py") + if hasInit && nonEmptyInit { + srcs.Add(pyLibraryEntrypointFilename) + } appendPyLibrary(srcs, pyLibraryTargetName) }) } else if !pyLibraryFilenames.Empty() { @@ -463,6 +477,19 @@ func hasEntrypointFile(dir string) bool { return false } +// hasLibraryEntrypointFile returns if the given directory has the library +// entrypoint file, and if it is non-empty. +func hasLibraryEntrypointFile(dir string) (bool, bool) { + stat, err := os.Stat(filepath.Join(dir, pyLibraryEntrypointFilename)) + if os.IsNotExist(err) { + return false, false + } + if err != nil { + log.Fatalf("ERROR: %v\n", err) + } + return true, stat.Size() != 0 +} + // isEntrypointFile returns whether the given path is an entrypoint file. The // given path can be absolute or relative. func isEntrypointFile(path string) bool { diff --git a/gazelle/python/resolve.go b/gazelle/python/resolve.go index 1ddd63d3c2..f019a64c1a 100644 --- a/gazelle/python/resolve.go +++ b/gazelle/python/resolve.go @@ -61,11 +61,17 @@ func (py *Resolver) Imports(c *config.Config, r *rule.Rule, f *rule.File) []reso provides := make([]resolve.ImportSpec, 0, len(srcs)+1) for _, src := range srcs { ext := filepath.Ext(src) - if ext == ".py" { - pythonProjectRoot := cfg.PythonProjectRoot() - provide := importSpecFromSrc(pythonProjectRoot, f.Pkg, src) - provides = append(provides, provide) + if ext != ".py" { + continue } + if cfg.PerFileGeneration() && len(srcs) > 1 && src == pyLibraryEntrypointFilename { + // Do not provide import spec from __init__.py when it is being included as + // part of another module. + continue + } + pythonProjectRoot := cfg.PythonProjectRoot() + provide := importSpecFromSrc(pythonProjectRoot, f.Pkg, src) + provides = append(provides, provide) } if len(provides) == 0 { return nil diff --git a/gazelle/python/testdata/per_file_non_empty_init/BUILD.out b/gazelle/python/testdata/per_file_non_empty_init/BUILD.out index 8733dbd971..8ccbabc9f9 100644 --- a/gazelle/python/testdata/per_file_non_empty_init/BUILD.out +++ b/gazelle/python/testdata/per_file_non_empty_init/BUILD.out @@ -11,6 +11,9 @@ py_library( py_library( name = "foo", - srcs = ["foo.py"], + srcs = [ + "__init__.py", + "foo.py", + ], visibility = ["//:__subpackages__"], ) diff --git a/gazelle/python/testdata/per_file_subdirs/BUILD.out b/gazelle/python/testdata/per_file_subdirs/BUILD.out index 69c42e01a9..5c41373ed6 100644 --- a/gazelle/python/testdata/per_file_subdirs/BUILD.out +++ b/gazelle/python/testdata/per_file_subdirs/BUILD.out @@ -6,5 +6,5 @@ py_library( name = "foo", srcs = ["foo.py"], visibility = ["//:__subpackages__"], - deps = ["//bar:__init__"], + deps = ["//bar"], ) diff --git a/gazelle/python/testdata/per_file_subdirs/bar/BUILD.out b/gazelle/python/testdata/per_file_subdirs/bar/BUILD.out index 4da8d9c8b7..2448c3dde3 100644 --- a/gazelle/python/testdata/per_file_subdirs/bar/BUILD.out +++ b/gazelle/python/testdata/per_file_subdirs/bar/BUILD.out @@ -1,14 +1,17 @@ load("@rules_python//python:defs.bzl", "py_library", "py_test") py_library( - name = "__init__", + name = "bar", srcs = ["__init__.py"], visibility = ["//:__subpackages__"], ) py_library( name = "foo", - srcs = ["foo.py"], + srcs = [ + "__init__.py", + "foo.py", + ], visibility = ["//:__subpackages__"], ) From 3706aa31e02f955c3bb8078f76275fe74e38e889 Mon Sep 17 00:00:00 2001 From: Siddhartha Bagaria Date: Mon, 4 Dec 2023 15:24:54 -0800 Subject: [PATCH 2/6] Restore package target name as __init__ --- gazelle/python/generate.go | 22 +++---------------- .../testdata/per_file_subdirs/BUILD.out | 2 +- .../testdata/per_file_subdirs/bar/BUILD.out | 11 +++++++++- .../testdata/per_file_subdirs/bar/bar.py | 0 4 files changed, 14 insertions(+), 21 deletions(-) create mode 100644 gazelle/python/testdata/per_file_subdirs/bar/bar.py diff --git a/gazelle/python/generate.go b/gazelle/python/generate.go index 12f3c07d00..2162aaa47f 100644 --- a/gazelle/python/generate.go +++ b/gazelle/python/generate.go @@ -19,7 +19,6 @@ import ( "io/fs" "log" "os" - pathpkg "path" "path/filepath" "strings" @@ -255,24 +254,9 @@ func (py *Python) GenerateRules(args language.GenerateArgs) language.GenerateRes if cfg.PerFileGeneration() { hasInit, nonEmptyInit := hasLibraryEntrypointFile(args.Dir) pyLibraryFilenames.Each(func(index int, filename interface{}) { - var pyLibraryTargetName string - if filename == pyLibraryEntrypointFilename { - if !nonEmptyInit { - return // ignore empty __init__.py. - } - if args.File.Pkg == "" { - // As per Python spec, an __init__.py file does not make sense without - // a package name, but someone can technically configure the Bazel - // workspace as the Python package (i.e. parent of the Bazel workspace - // is part of PYTHONPATH), in which case this should be the workspace - // name, but there is no mechanism to obtain that here. So let's just - // call it "__init__". - pyLibraryTargetName = "__init__" - } else { - pyLibraryTargetName = pathpkg.Base(args.File.Pkg) - } - } else { - pyLibraryTargetName = strings.TrimSuffix(filepath.Base(filename.(string)), ".py") + pyLibraryTargetName := strings.TrimSuffix(filepath.Base(filename.(string)), ".py") + if filename == pyLibraryEntrypointFilename && !nonEmptyInit { + return // ignore empty __init__.py. } srcs := treeset.NewWith(godsutils.StringComparator, filename) if hasInit && nonEmptyInit { diff --git a/gazelle/python/testdata/per_file_subdirs/BUILD.out b/gazelle/python/testdata/per_file_subdirs/BUILD.out index 5c41373ed6..69c42e01a9 100644 --- a/gazelle/python/testdata/per_file_subdirs/BUILD.out +++ b/gazelle/python/testdata/per_file_subdirs/BUILD.out @@ -6,5 +6,5 @@ py_library( name = "foo", srcs = ["foo.py"], visibility = ["//:__subpackages__"], - deps = ["//bar"], + deps = ["//bar:__init__"], ) diff --git a/gazelle/python/testdata/per_file_subdirs/bar/BUILD.out b/gazelle/python/testdata/per_file_subdirs/bar/BUILD.out index 2448c3dde3..9f0002b47b 100644 --- a/gazelle/python/testdata/per_file_subdirs/bar/BUILD.out +++ b/gazelle/python/testdata/per_file_subdirs/bar/BUILD.out @@ -1,11 +1,20 @@ load("@rules_python//python:defs.bzl", "py_library", "py_test") py_library( - name = "bar", + name = "__init__", srcs = ["__init__.py"], visibility = ["//:__subpackages__"], ) +py_library( + name = "bar", + srcs = [ + "__init__.py", + "bar.py", + ], + visibility = ["//:__subpackages__"], +) + py_library( name = "foo", srcs = [ diff --git a/gazelle/python/testdata/per_file_subdirs/bar/bar.py b/gazelle/python/testdata/per_file_subdirs/bar/bar.py new file mode 100644 index 0000000000..e69de29bb2 From 87d0000269ea3a13e65c0bdf3ef3a9c380798979 Mon Sep 17 00:00:00 2001 From: Siddhartha Bagaria Date: Fri, 8 Dec 2023 09:38:57 -0800 Subject: [PATCH 3/6] Guard behind a directive --- gazelle/python/configure.go | 7 ++ gazelle/python/generate.go | 2 +- .../testdata/per_file_non_empty_init/BUILD.in | 1 + .../per_file_non_empty_init/BUILD.out | 1 + .../testdata/per_file_subdirs/bar/BUILD.in | 1 + .../testdata/per_file_subdirs/bar/BUILD.out | 2 + gazelle/pythonconfig/pythonconfig.go | 87 +++++++++++-------- 7 files changed, 66 insertions(+), 35 deletions(-) diff --git a/gazelle/python/configure.go b/gazelle/python/configure.go index 2d3880571c..69d276266e 100644 --- a/gazelle/python/configure.go +++ b/gazelle/python/configure.go @@ -59,6 +59,7 @@ func (py *Configurer) KnownDirectives() []string { pythonconfig.IgnoreDependenciesDirective, pythonconfig.ValidateImportStatementsDirective, pythonconfig.GenerationMode, + pythonconfig.GenerationModePerFileIncludeInit, pythonconfig.LibraryNamingConvention, pythonconfig.BinaryNamingConvention, pythonconfig.TestNamingConvention, @@ -149,6 +150,12 @@ func (py *Configurer) Configure(c *config.Config, rel string, f *rule.File) { pythonconfig.GenerationMode, d.Value) log.Fatal(err) } + case pythonconfig.GenerationModePerFileIncludeInit: + v, err := strconv.ParseBool(strings.TrimSpace(d.Value)) + if err != nil { + log.Fatal(err) + } + config.SetPerFileGenerationIncludeInit(v) case pythonconfig.LibraryNamingConvention: config.SetLibraryNamingConvention(strings.TrimSpace(d.Value)) case pythonconfig.BinaryNamingConvention: diff --git a/gazelle/python/generate.go b/gazelle/python/generate.go index 2162aaa47f..b28dc8000a 100644 --- a/gazelle/python/generate.go +++ b/gazelle/python/generate.go @@ -259,7 +259,7 @@ func (py *Python) GenerateRules(args language.GenerateArgs) language.GenerateRes return // ignore empty __init__.py. } srcs := treeset.NewWith(godsutils.StringComparator, filename) - if hasInit && nonEmptyInit { + if cfg.PerFileGenerationIncludeInit() && hasInit && nonEmptyInit { srcs.Add(pyLibraryEntrypointFilename) } appendPyLibrary(srcs, pyLibraryTargetName) diff --git a/gazelle/python/testdata/per_file_non_empty_init/BUILD.in b/gazelle/python/testdata/per_file_non_empty_init/BUILD.in index a5853f6c5c..f76a3d0b49 100644 --- a/gazelle/python/testdata/per_file_non_empty_init/BUILD.in +++ b/gazelle/python/testdata/per_file_non_empty_init/BUILD.in @@ -1,3 +1,4 @@ load("@rules_python//python:defs.bzl", "py_library") # gazelle:python_generation_mode file +# gazelle:python_generation_mode_per_file_include_init true diff --git a/gazelle/python/testdata/per_file_non_empty_init/BUILD.out b/gazelle/python/testdata/per_file_non_empty_init/BUILD.out index 8ccbabc9f9..ee4a417966 100644 --- a/gazelle/python/testdata/per_file_non_empty_init/BUILD.out +++ b/gazelle/python/testdata/per_file_non_empty_init/BUILD.out @@ -1,6 +1,7 @@ load("@rules_python//python:defs.bzl", "py_library") # gazelle:python_generation_mode file +# gazelle:python_generation_mode_per_file_include_init true py_library( name = "__init__", diff --git a/gazelle/python/testdata/per_file_subdirs/bar/BUILD.in b/gazelle/python/testdata/per_file_subdirs/bar/BUILD.in index e69de29bb2..4fc674a69a 100644 --- a/gazelle/python/testdata/per_file_subdirs/bar/BUILD.in +++ b/gazelle/python/testdata/per_file_subdirs/bar/BUILD.in @@ -0,0 +1 @@ +# gazelle:python_generation_mode_per_file_include_init true diff --git a/gazelle/python/testdata/per_file_subdirs/bar/BUILD.out b/gazelle/python/testdata/per_file_subdirs/bar/BUILD.out index 9f0002b47b..8835fb2ad7 100644 --- a/gazelle/python/testdata/per_file_subdirs/bar/BUILD.out +++ b/gazelle/python/testdata/per_file_subdirs/bar/BUILD.out @@ -1,5 +1,7 @@ load("@rules_python//python:defs.bzl", "py_library", "py_test") +# gazelle:python_generation_mode_per_file_include_init true + py_library( name = "__init__", srcs = ["__init__.py"], diff --git a/gazelle/pythonconfig/pythonconfig.go b/gazelle/pythonconfig/pythonconfig.go index 636d6a4cfc..09d308ad35 100644 --- a/gazelle/pythonconfig/pythonconfig.go +++ b/gazelle/pythonconfig/pythonconfig.go @@ -50,6 +50,10 @@ const ( // GenerationMode represents the directive that controls the target generation // mode. See below for the GenerationModeType constants. GenerationMode = "python_generation_mode" + // GenerationModePerFileIncludeInit represents the directive that augments + // the "per_file" GenerationMode by including the package's __init__.py file. + // This is a boolean directive. + GenerationModePerFileIncludeInit = "python_generation_mode_per_file_include_init" // LibraryNamingConvention represents the directive that controls the // py_library naming convention. It interpolates $package_name$ with the // Bazel package name. E.g. if the Bazel package name is `foo`, setting this @@ -122,15 +126,16 @@ type Config struct { pythonProjectRoot string gazelleManifest *manifest.Manifest - excludedPatterns *singlylinkedlist.List - ignoreFiles map[string]struct{} - ignoreDependencies map[string]struct{} - validateImportStatements bool - coarseGrainedGeneration bool - perFileGeneration bool - libraryNamingConvention string - binaryNamingConvention string - testNamingConvention string + excludedPatterns *singlylinkedlist.List + ignoreFiles map[string]struct{} + ignoreDependencies map[string]struct{} + validateImportStatements bool + coarseGrainedGeneration bool + perFileGeneration bool + perFileGenerationIncludeInit bool + libraryNamingConvention string + binaryNamingConvention string + testNamingConvention string } // New creates a new Config. @@ -139,18 +144,19 @@ func New( pythonProjectRoot string, ) *Config { return &Config{ - extensionEnabled: true, - repoRoot: repoRoot, - pythonProjectRoot: pythonProjectRoot, - excludedPatterns: singlylinkedlist.New(), - ignoreFiles: make(map[string]struct{}), - ignoreDependencies: make(map[string]struct{}), - validateImportStatements: true, - coarseGrainedGeneration: false, - perFileGeneration: false, - libraryNamingConvention: packageNameNamingConventionSubstitution, - binaryNamingConvention: fmt.Sprintf("%s_bin", packageNameNamingConventionSubstitution), - testNamingConvention: fmt.Sprintf("%s_test", packageNameNamingConventionSubstitution), + extensionEnabled: true, + repoRoot: repoRoot, + pythonProjectRoot: pythonProjectRoot, + excludedPatterns: singlylinkedlist.New(), + ignoreFiles: make(map[string]struct{}), + ignoreDependencies: make(map[string]struct{}), + validateImportStatements: true, + coarseGrainedGeneration: false, + perFileGeneration: false, + perFileGenerationIncludeInit: false, + libraryNamingConvention: packageNameNamingConventionSubstitution, + binaryNamingConvention: fmt.Sprintf("%s_bin", packageNameNamingConventionSubstitution), + testNamingConvention: fmt.Sprintf("%s_test", packageNameNamingConventionSubstitution), } } @@ -163,19 +169,20 @@ func (c *Config) Parent() *Config { // current Config and sets itself as the parent to the child. func (c *Config) NewChild() *Config { return &Config{ - parent: c, - extensionEnabled: c.extensionEnabled, - repoRoot: c.repoRoot, - pythonProjectRoot: c.pythonProjectRoot, - excludedPatterns: c.excludedPatterns, - ignoreFiles: make(map[string]struct{}), - ignoreDependencies: make(map[string]struct{}), - validateImportStatements: c.validateImportStatements, - coarseGrainedGeneration: c.coarseGrainedGeneration, - perFileGeneration: c.perFileGeneration, - libraryNamingConvention: c.libraryNamingConvention, - binaryNamingConvention: c.binaryNamingConvention, - testNamingConvention: c.testNamingConvention, + parent: c, + extensionEnabled: c.extensionEnabled, + repoRoot: c.repoRoot, + pythonProjectRoot: c.pythonProjectRoot, + excludedPatterns: c.excludedPatterns, + ignoreFiles: make(map[string]struct{}), + ignoreDependencies: make(map[string]struct{}), + validateImportStatements: c.validateImportStatements, + coarseGrainedGeneration: c.coarseGrainedGeneration, + perFileGeneration: c.perFileGeneration, + perFileGenerationIncludeInit: c.perFileGenerationIncludeInit, + libraryNamingConvention: c.libraryNamingConvention, + binaryNamingConvention: c.binaryNamingConvention, + testNamingConvention: c.testNamingConvention, } } @@ -344,6 +351,18 @@ func (c *Config) PerFileGeneration() bool { return c.perFileGeneration } +// SetPerFileGenerationIncludeInit sets whether py_library targets should +// include __init__.py files when PerFileGeneration() is true. +func (c *Config) SetPerFileGenerationIncludeInit(includeInit bool) { + c.perFileGenerationIncludeInit = includeInit +} + +// PerFileGenerationIncludeInit returns whether py_library targets should +// include __init__.py files when PerFileGeneration() is true. +func (c *Config) PerFileGenerationIncludeInit() bool { + return c.perFileGenerationIncludeInit +} + // SetLibraryNamingConvention sets the py_library target naming convention. func (c *Config) SetLibraryNamingConvention(libraryNamingConvention string) { c.libraryNamingConvention = libraryNamingConvention From 91455ad3419e63149923141228a79568f8b30a43 Mon Sep 17 00:00:00 2001 From: Siddhartha Bagaria Date: Tue, 12 Dec 2023 08:53:28 -0800 Subject: [PATCH 4/6] Mention directive in README --- gazelle/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gazelle/README.md b/gazelle/README.md index 208e841586..8493ee06d9 100644 --- a/gazelle/README.md +++ b/gazelle/README.md @@ -184,6 +184,8 @@ Python-specific directives are as follows: | Controls whether the Python import statements should be validated. Can be "true" or "false" | | | `# gazelle:python_generation_mode`| `package` | | Controls the target generation mode. Can be "file", "package", or "project" | | +| `# gazelle:python_generation_mode_per_file_include_init`| `package` | +| Controls whether `__init__.py` files are included as srcs in each generated target when target generation mode is "file". Can be "true", or "false" | | | `# gazelle:python_library_naming_convention`| `$package_name$` | | Controls the `py_library` naming convention. It interpolates \$package_name\$ with the Bazel package name. E.g. if the Bazel package name is `foo`, setting this to `$package_name$_my_lib` would result in a generated target named `foo_my_lib`. | | | `# gazelle:python_binary_naming_convention` | `$package_name$_bin` | From ee3818af7765a9c9190e589d7c095acd3773c9d1 Mon Sep 17 00:00:00 2001 From: Siddhartha Bagaria Date: Tue, 19 Dec 2023 20:28:14 -0800 Subject: [PATCH 5/6] Add to changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b032f4e427..2b2a9450be 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -50,6 +50,9 @@ A brief description of the categories of changes: ### Added * (docs) bzlmod extensions are now documented on rules-python.readthedocs.io +- (gazelle) `file` generation mode can now also add `__init__.py` to the srcs + attribute for every target in the package. This is enabled through a separate + directive `python_generation_mode_per_file_include_init`. [0.XX.0]: https://github.com/bazelbuild/rules_python/releases/tag/0.XX.0 From ea5663ee423c595ba330c25a305bee480633a625 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Wed, 20 Dec 2023 19:01:22 +0900 Subject: [PATCH 6/6] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2b2a9450be..b524536b8c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -50,7 +50,7 @@ A brief description of the categories of changes: ### Added * (docs) bzlmod extensions are now documented on rules-python.readthedocs.io -- (gazelle) `file` generation mode can now also add `__init__.py` to the srcs +* (gazelle) `file` generation mode can now also add `__init__.py` to the srcs attribute for every target in the package. This is enabled through a separate directive `python_generation_mode_per_file_include_init`.