Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cachi2 rubygems / bundler design #565

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,7 @@ settings.py

# idea files
.idea/

# cachi2 default files
cachi2-output/
cachi2.env
22 changes: 20 additions & 2 deletions cachi2/core/models/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def show_error(error: "ErrorDict") -> str:


# Supported package managers
PackageManagerType = Literal["gomod", "npm", "pip", "rpm", "yarn"]
PackageManagerType = Literal["gomod", "npm", "pip", "rpm", "rubygems", "yarn"]

Flag = Literal[
"cgo-disable", "dev-package-managers", "force-gomod-tidy", "gomod-vendor", "gomod-vendor-check"
Expand Down Expand Up @@ -179,8 +179,21 @@ class YarnPackageInput(_PackageInputBase):
type: Literal["yarn"]


class RubygemsPackageInput(_PackageInputBase):
eskultety marked this conversation as resolved.
Show resolved Hide resolved
"""Accepted input for a Rubygems package."""

type: Literal["rubygems"]


PackageInput = Annotated[
Union[GomodPackageInput, NpmPackageInput, PipPackageInput, RpmPackageInput, YarnPackageInput],
Union[
GomodPackageInput,
NpmPackageInput,
PipPackageInput,
RpmPackageInput,
RubygemsPackageInput,
YarnPackageInput,
],
# https://pydantic-docs.helpmanual.io/usage/types/#discriminated-unions-aka-tagged-unions
pydantic.Field(discriminator="type"),
]
Expand Down Expand Up @@ -246,6 +259,11 @@ def rpm_packages(self) -> list[RpmPackageInput]:
"""Get the rpm packages specified for this request."""
return self._packages_by_type(RpmPackageInput)

@property
def rubygems_packages(self) -> list[RubygemsPackageInput]:
"""Get the Rubygems packages specified for this request."""
return self._packages_by_type(RubygemsPackageInput)

@property
def yarn_packages(self) -> list[YarnPackageInput]:
"""Get the yarn packages specified for this request."""
Expand Down
264 changes: 264 additions & 0 deletions cachi2/core/package_managers/rubygems.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,264 @@
import logging
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please don't put everything into the same module - those are the old ways we did implement package managers leading to a single big messy module. We should split the logic into a few self-contained modules wrapped in a package.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The file will be deleted anyway, so I did not bother with splitting it.

import re
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Optional

from gemlock_parser.gemfile_lock import Gem, GemfileLockParser # type: ignore
from packageurl import PackageURL

from cachi2.core.errors import FetchError
from cachi2.core.models.input import Request, RubygemsPackageInput
from cachi2.core.models.output import Component, EnvironmentVariable, ProjectFile, RequestOutput
from cachi2.core.package_managers.general import download_binary_file, extract_git_info
from cachi2.core.rooted_path import RootedPath
from cachi2.core.scm import clone_as_tarball

GEMFILE_LOCK = "Gemfile.lock"

GIT_REF_FORMAT = re.compile(r"^[a-fA-F0-9]{40}$")
PLATFORMS_RUBY = re.compile(r"^PLATFORMS\n {2}ruby\n\n", re.MULTILINE)

log = logging.getLogger(__name__)


def fetch_rubygems_source(request: Request) -> RequestOutput:
"""Resolve and fetch RubyGems dependencies."""
components = []
environment_variables = [
EnvironmentVariable(name="BUNDLE_CACHE_ALL", value="true"),
EnvironmentVariable(name="BUNDLE_CACHE_PATH", value="${output_dir}/deps/rubygems"),
EnvironmentVariable(name="BUNDLE_FORCE_RUBY_PLATFORM", value="true"),
Comment on lines +29 to +31
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a little confused if these variables are really relevant for the build phase. I was able to successfully build the test repo you set up and the only thing I really needed was to properly set BUNDLE_CACHE_PATH.

By teading the bundler docs, I got the impression that those variables would be relevant in case we'd use bundler for doing the prefetch, which is not the case.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I will remove BUNDLE_CACHE_ALL. Checking the ruby platform is currently done by gemlock-parser. Do we want to set that during the build as well ?

]
project_files: list[ProjectFile] = []

output_dir = request.output_dir.join_within_root("deps", "rubygems")
output_dir.path.mkdir(parents=True, exist_ok=True)

for package in request.rubygems_packages:
info = _resolve_rubygems(request.source_dir, output_dir, package)
components.append(Component.from_package_dict(info["package"]))
for dependency in info["dependencies"]:
components.append(
Component(
name=dependency["name"],
version=dependency["version"],
purl=dependency["purl"],
)
)

return RequestOutput.from_obj_list(
components,
environment_variables=environment_variables,
project_files=project_files,
)


def _resolve_rubygems(
source_dir: RootedPath,
output_dir: RootedPath,
package: RubygemsPackageInput,
) -> dict[str, Any]:
main_package_name, main_package_version = _get_metadata()
purl = PackageURL(
type="rubygems",
name=main_package_name,
version=main_package_version,
)

package_root = source_dir.join_within_root(package.path)
gemlock_path = package_root.join_within_root(GEMFILE_LOCK)

gems = _parse_gemlock(package_root, gemlock_path)
dependencies = _download_dependencies(output_dir, gems, package_root, set())

return {
"package": {
"name": main_package_name,
"version": main_package_version,
"type": "rubygems",
"path": package_root,
"purl": purl.to_string(),
},
"dependencies": dependencies,
}


def _get_metadata() -> tuple[str, str]:
return "foo", "0.1.0"


@dataclass
class GemMetadata:
"""Gem metadata."""

name: str
version: str
type: str
source: str
branch: Optional[str] = None


def _parse_gemlock(
source_dir: RootedPath,
gemlock_path: RootedPath,
) -> list[GemMetadata]:
_validate_gemlock_platforms(gemlock_path)

dependencies = []
parser = GemfileLockParser(str(gemlock_path))
log.info("Bundled with version %s", parser.bundled_with)

for gem in parser.all_gems.values():
if gem.version is None:
log.debug(
f"Skipping RubyGem {gem.name}, because of a missing version. "
f"This means gem is not used in a platform for which Gemfile.lock was generated."
)
continue

_validate_gem_metadata(gem, source_dir, gemlock_path.root)
source = gem.remote if gem.type != "PATH" else gem.path
dependencies.append(GemMetadata(gem.name, gem.version, gem.type, source, gem.branch))

return dependencies


def _validate_gemlock_platforms(gemlock_path: RootedPath) -> None:
with open(gemlock_path) as f:
contents = f.read()

if not PLATFORMS_RUBY.search(contents):
msg = "PLATFORMS section of Gemfile.lock has to contain one and only platform - ruby."
raise FetchError(msg)


def _validate_gem_metadata(gem: Gem, source_dir: RootedPath, gemlock_dir: Path) -> None:
if gem.type == "GEM":
if gem.remote != "https://rubygems.org/":
raise Exception(
"Cachito supports only https://rubygems.org/ as a remote for Ruby GEM dependencies."
)

elif gem.type == "GIT":
if not gem.remote.startswith("https://"):
raise Exception("All Ruby GIT dependencies have to use HTTPS protocol.")
if not GIT_REF_FORMAT.match(gem.version):
msg = (
f"No git ref for gem: {gem.name} (expected 40 hexadecimal characters, "
f"got: {gem.version})."
)
raise Exception(msg)

elif gem.type == "PATH":
_validate_path_dependency_dir(gem, source_dir, gemlock_dir)

else:
raise Exception("Gemfile.lock contains unsupported dependency type.")


def _validate_path_dependency_dir(gem: Gem, source_dir: RootedPath, gemlock_dir: Path) -> None:
dependency_dir = gemlock_dir.joinpath(gem.path)
try:
dependency_dir = dependency_dir.resolve(strict=True)
dependency_dir.relative_to(source_dir)
except FileNotFoundError:
raise FileNotFoundError(
f"PATH dependency {str(gem.name)} references a non-existing path: "
f"{str(dependency_dir)}."
)
except RuntimeError:
raise RuntimeError(
f"Path of PATH dependency {str(gem.name)} contains an infinite loop: "
f"{str(dependency_dir)}."
)
except ValueError:
raise ValueError(f"{str(dependency_dir)} is not a subpath of {str(source_dir)}")


def _download_dependencies(
output_dir: RootedPath,
dependencies: list[GemMetadata],
package_root: RootedPath,
allowed_path_deps: set[str],
) -> list[dict[str, Any]]:
downloads = []

for dep in dependencies:
log.info("Downloading %s (%s)", dep.name, dep.version)

if dep.type == "GEM":
download_info = _download_rubygems_package(dep, output_dir)
elif dep.type == "GIT":
download_info = _download_git_package(dep, output_dir)
elif dep.type == "PATH":
download_info = _get_path_package_info(dep, package_root)
else:
# Should not happen
raise RuntimeError(f"Unexpected dependency type: {dep.type!r}")

if dep.type != "PATH":
log.info(
"Successfully downloaded gem %s (%s) to %s",
dep.name,
dep.version,
download_info["path"],
)

download_info["kind"] = dep.type
download_info["type"] = "rubygems"
download_info["purl"] = PackageURL(
type="rubygems",
name=dep.name,
version=dep.version,
).to_string()
downloads.append(download_info)

return downloads


def _download_rubygems_package(gem: GemMetadata, deps_dir: RootedPath) -> dict[str, Any]:
download_path = deps_dir.join_within_root(f"{gem.name}-{gem.version}.gem")

url = f"https://rubygems.org/gems/{gem.name}-{gem.version}.gem"
download_binary_file(url, download_path.path)

return {
"name": gem.name,
"version": gem.version,
"path": download_path,
}


def _download_git_package(gem: GemMetadata, deps_dir: RootedPath) -> dict[str, Any]:
git_info = extract_git_info(f"{gem.source}@{gem.version}")

package_dir = deps_dir.join_within_root(
git_info["host"],
git_info["namespace"],
git_info["repo"],
)
package_dir.path.mkdir(parents=True, exist_ok=True)

clone_as_tarball(
git_info["url"],
git_info["ref"],
to_path=package_dir.join_within_root("source.tar.gz").path,
)

return {
"name": gem.name,
"version": gem.version,
"path": package_dir,
**git_info,
}


def _get_path_package_info(dep: GemMetadata, package_root: RootedPath) -> dict[str, Any]:
path = package_root.join_within_root(dep.source).subpath_from_root

return {
"name": dep.name,
"version": dep.version,
"path": path,
}
3 changes: 2 additions & 1 deletion cachi2/core/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from cachi2.core.errors import UnsupportedFeature
from cachi2.core.models.input import PackageManagerType, Request
from cachi2.core.models.output import RequestOutput
from cachi2.core.package_managers import gomod, npm, pip, rpm, yarn
from cachi2.core.package_managers import gomod, npm, pip, rpm, rubygems, yarn
from cachi2.core.rooted_path import RootedPath
from cachi2.core.utils import copy_directory

Expand All @@ -17,6 +17,7 @@
"npm": npm.fetch_npm_source,
"pip": pip.fetch_pip_source,
"yarn": yarn.fetch_yarn_source,
"rubygems": rubygems.fetch_rubygems_source,
}

# This is where we put package managers currently under development in order to
Expand Down
Loading
Loading