Skip to content

Commit

Permalink
Disable pip cache on the workers to reduce prebuilt container image s…
Browse files Browse the repository at this point in the history
…ize (apache#27035)

Co-authored-by: tvalentyn <tvalentyn@users.noreply.github.com>
  • Loading branch information
2 people authored and bullet03 committed Aug 11, 2023
1 parent 0760de0 commit b419b50
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 7 deletions.
4 changes: 2 additions & 2 deletions CHANGES.md
Expand Up @@ -62,8 +62,8 @@

## New Features / Improvements

* Allow prebuilding large images when using `--prebuild_sdk_container_engine=cloud_build`, like images depending on `tensorflow` or `torch` ([#27023](https://github.com/apache/beam/pull/27023))
* X feature added (Java/Python) ([#X](https://github.com/apache/beam/issues/X)).
* Allow prebuilding large images when using `--prebuild_sdk_container_engine=cloud_build`, like images depending on `tensorflow` or `torch` ([#27023](https://github.com/apache/beam/pull/27023)).
* Disabled `pip` cache when installing packages on the workers. This reduces the size of prebuilt Python container images ([#27035](https://github.com/apache/beam/pull/27035)).

## Breaking Changes

Expand Down
10 changes: 5 additions & 5 deletions sdks/python/container/piputil.go
Expand Up @@ -37,14 +37,14 @@ func pipInstallRequirements(files []string, dir, name string) error {
// as possible PyPI downloads. In the first round the --find-links
// option will make sure that only things staged in the worker will be
// used without following their dependencies.
args := []string{"-m", "pip", "install", "-r", filepath.Join(dir, name), "--disable-pip-version-check", "--no-index", "--no-deps", "--find-links", dir}
args := []string{"-m", "pip", "install", "-r", filepath.Join(dir, name), "--no-cache-dir", "--disable-pip-version-check", "--no-index", "--no-deps", "--find-links", dir}
if err := execx.Execute("python", args...); err != nil {
fmt.Println("Some packages could not be installed solely from the requirements cache. Installing packages from PyPI.")
}
// The second install round opens up the search for packages on PyPI and
// also installs dependencies. The key is that if all the packages have
// been installed in the first round then this command will be a no-op.
args = []string{"-m", "pip", "install", "-r", filepath.Join(dir, name), "--disable-pip-version-check", "--find-links", dir}
args = []string{"-m", "pip", "install", "-r", filepath.Join(dir, name), "--no-cache-dir", "--disable-pip-version-check", "--find-links", dir}
return execx.Execute("python", args...)
}
}
Expand Down Expand Up @@ -76,18 +76,18 @@ func pipInstallPackage(files []string, dir, name string, force, optional bool, e
// installed version will match the package specified, the package itself
// will not be reinstalled, but its dependencies will now be resolved and
// installed if necessary. This achieves our goal outlined above.
args := []string{"-m", "pip", "install", "--disable-pip-version-check", "--upgrade", "--force-reinstall", "--no-deps",
args := []string{"-m", "pip", "install", "--no-cache-dir", "--disable-pip-version-check", "--upgrade", "--force-reinstall", "--no-deps",
filepath.Join(dir, packageSpec)}
err := execx.Execute("python", args...)
if err != nil {
return err
}
args = []string{"-m", "pip", "install", "--disable-pip-version-check", filepath.Join(dir, packageSpec)}
args = []string{"-m", "pip", "install", "--no-cache-dir", "--disable-pip-version-check", filepath.Join(dir, packageSpec)}
return execx.Execute("python", args...)
}

// Case when we do not perform a forced reinstall.
args := []string{"-m", "pip", "install", "--disable-pip-version-check", filepath.Join(dir, packageSpec)}
args := []string{"-m", "pip", "install", "--no-cache-dir", "--disable-pip-version-check", filepath.Join(dir, packageSpec)}
return execx.Execute("python", args...)
}
}
Expand Down

0 comments on commit b419b50

Please sign in to comment.