Skip to content

Commit

Permalink
As a user, I can perform a content-only mirror of a repo.
Browse files Browse the repository at this point in the history
  • Loading branch information
dralley committed Oct 13, 2021
1 parent 26c0e5a commit 57a4733
Show file tree
Hide file tree
Showing 10 changed files with 162 additions and 52 deletions.
1 change: 1 addition & 0 deletions CHANGES/9316.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added a `sync_policy` parameter to the `/sync/` endpoint which replaces the `mirror` parameter and provides options for how the sync should be carried out.
1 change: 0 additions & 1 deletion CHANGES/9448.bugfix
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
Use checksum type of a package for publication if it's not configured.

10 changes: 7 additions & 3 deletions docs/workflows/create_sync_publish.rst
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,13 @@ Sync repository ``foo`` using remote ``bar``
.. literalinclude:: ../_scripts/sync.sh
:language: bash

You can specify ``mirror=True`` for a mirror mode. It means Pulp won't update
repository using previous repository version but create a new copy of remote
repository as a new repository version.
There are 3 sync modes to chose from, using the ``sync_policy`` option.

- ``additive`` (the default) will retain the existing contents of the Pulp repository and add the contents of the remote repository being synced.
- ``mirror_content_only`` will synchronize the Pulp repository to contain the same content as the one remote repository being synced - removing any existing content that isn't present in the remote repo.
- ``mirror_complete`` will automatically create a publication that will be an _exact_ bit-for-bit copy of the remote repository being synced. This will keep metadata checksums intact, but is not possible for all repositories, as some use features which are incompatible with local clones.

The ``mirror`` option is deprecated, ``sync_policy`` should be used instead. If the ``mirror`` option used, a value of ``true`` will change the default ``sync_policy`` to ``mirror_complete``, while a value of ``false`` will not change the default ``sync_policy``.

Optionally, you can skip ``SRPM`` packages by using ``skip_types:="[\"srpm\"]"``
option.
Expand Down
12 changes: 12 additions & 0 deletions pulp_rpm/app/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,18 @@
ALLOWED_CHECKSUM_ERROR_MSG = """Checksum must be one of the allowed checksum types.
You can adjust these with the 'ALLOWED_CONTENT_CHECKSUMS' setting."""

SYNC_POLICIES = SimpleNamespace(
ADDITIVE="additive",
MIRROR_COMPLETE="mirror_complete",
MIRROR_CONTENT_ONLY="mirror_content_only",
)

SYNC_POLICY_CHOICES = (
(SYNC_POLICIES.ADDITIVE, SYNC_POLICIES.ADDITIVE),
(SYNC_POLICIES.MIRROR_COMPLETE, SYNC_POLICIES.MIRROR_COMPLETE),
(SYNC_POLICIES.MIRROR_CONTENT_ONLY, SYNC_POLICIES.MIRROR_CONTENT_ONLY),
)

CR_PACKAGE_ATTRS = SimpleNamespace(
ARCH="arch",
CHANGELOGS="changelogs",
Expand Down
42 changes: 42 additions & 0 deletions pulp_rpm/app/serializers/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
ALLOWED_CHECKSUM_ERROR_MSG,
CHECKSUM_CHOICES,
SKIP_TYPES,
SYNC_POLICY_CHOICES,
)
from pulp_rpm.app.models import (
RpmDistribution,
Expand Down Expand Up @@ -308,6 +309,29 @@ class RpmRepositorySyncURLSerializer(RepositorySyncURLSerializer):
Serializer for RPM Sync.
"""

mirror = serializers.BooleanField(
required=False,
allow_null=True,
help_text=_(
"DEPRECATED: If ``True``, ``sync_policy`` will default to 'mirror_complete' "
"instead of 'additive'."
),
)
sync_policy = serializers.ChoiceField(
help_text=_(
"Options: 'additive', 'mirror_complete', 'mirror_content_only'. Default: 'additive' "
"Modifies how the sync is performed. 'mirror_complete' will clone the original "
"metadata and create an automatic publication from it, but comes with some "
"limitations and does not work for certain repositories. 'mirror_content_only' will "
"change the repository contents to match the remote but the metadata will be "
"regenerated and will not be bit-for-bit identical. 'additive' will retain the "
"existing contents of the repository and add the contents of the repository being "
"synced."
),
choices=SYNC_POLICY_CHOICES,
required=False,
allow_null=True,
)
skip_types = serializers.ListField(
help_text=_("List of content types to skip during sync."),
required=False,
Expand All @@ -318,6 +342,24 @@ class RpmRepositorySyncURLSerializer(RepositorySyncURLSerializer):
help_text=_("Whether or not to optimize sync."), required=False, default=True
)

def validate(self, data):
"""
Validate sync parameters.
"""
data = super().validate(data)
mirror = data.get("mirror", None)
sync_policy = data.get("sync_policy", None)

if mirror and sync_policy:
raise serializers.ValidationError(
_(
"Cannot use 'mirror' and 'sync_policy' options simultaneously. The 'mirror' "
"option is deprecated, please use 'sync_policy' only."
)
)

return data


class CopySerializer(serializers.Serializer):
"""
Expand Down
48 changes: 28 additions & 20 deletions pulp_rpm/app/tasks/synchronizing.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
PACKAGE_REPODATA,
PULP_MODULE_ATTR,
PULP_MODULEDEFAULTS_ATTR,
SYNC_POLICIES,
UPDATE_REPODATA,
)
from pulp_rpm.app.models import (
Expand Down Expand Up @@ -341,24 +342,26 @@ def is_optimized_sync(repository, remote, url):
return is_optimized


def synchronize(remote_pk, repository_pk, mirror, skip_types, optimize):
def synchronize(remote_pk, repository_pk, sync_policy, skip_types, optimize):
"""
Sync content from the remote repository.
Create a new version of the repository that is synchronized with the remote.
If mirror=True, a publication will be created with a copy of the original metadata.
In this event, SRPMs and other types listed in "skip_types" will *not* be skipped.
If sync_policy=mirror_complete, a publication will be created with a copy of the original
metadata. This comes with some limitations, namely:
If mirror=True and the repository uses the xml:base / location_base feature, then
the sync will fail. This feature is incompatible with the intentions of most Pulp
users, as it will tell clients to look for metadata / packages from a source outside
of the repository.
* SRPMs and other types listed in "skip_types" will *not* be skipped.
* If the repository uses the xml:base / location_base feature, then the sync will fail.
This feature is incompatible with the intentions of most Pulp users, because the metadata
will tell clients to look for files at some source outside of the Pulp-hosted repo.
* If the repository uses Delta RPMs, the sync will fail, because Pulp does not support them,
and cannot change the repository metadata to remove them.
Args:
remote_pk (str): The remote PK.
repository_pk (str): The repository PK.
mirror (bool): Mirror mode.
sync_policy (str): How to perform the sync.
skip_types (list): List of content to skip.
optimize(bool): Optimize mode.
Expand Down Expand Up @@ -431,6 +434,9 @@ def get_treeinfo_data(remote, remote_url):

sub_repos = []

mirror = sync_policy.startswith("mirror")
mirror_metadata = sync_policy == SYNC_POLICIES.MIRROR_COMPLETE

if treeinfo:
treeinfo["repositories"] = {}
for repodata in set(treeinfo["download"]["repodatas"]):
Expand Down Expand Up @@ -460,12 +466,12 @@ def get_treeinfo_data(remote, remote_url):
remote,
sub_repo,
deferred_download,
mirror,
mirror_metadata,
skip_types=skip_types,
new_url=new_url,
namespace=directory,
)
dv = RpmDeclarativeVersion(first_stage=stage, repository=sub_repo)
dv = RpmDeclarativeVersion(first_stage=stage, repository=sub_repo, mirror=mirror)
subrepo_version = dv.create()
if subrepo_version:
sub_repo.last_sync_remote = remote
Expand All @@ -477,7 +483,7 @@ def get_treeinfo_data(remote, remote_url):
remote,
repository,
deferred_download,
mirror,
mirror_metadata,
skip_types=skip_types,
treeinfo=treeinfo,
new_url=remote_url,
Expand All @@ -488,7 +494,7 @@ def get_treeinfo_data(remote, remote_url):
repository.last_sync_remote = remote
repository.last_sync_repo_version = version.number
repository.save()
if mirror:
if mirror_metadata:
with RpmPublication.create(version, pass_through=False) as publication:
add_metadata_to_publication(publication, version)
for (name, subrepo_version) in sub_repos:
Expand Down Expand Up @@ -542,7 +548,7 @@ def __init__(
remote,
repository,
deferred_download,
mirror,
mirror_metadata,
skip_types=None,
new_url=None,
treeinfo=None,
Expand All @@ -554,8 +560,10 @@ def __init__(
Args:
remote (RpmRemote or UlnRemote): The remote data to be used when syncing
repository (RpmRepository): The repository to be compared when optimizing sync
deferred_download (bool): if True the downloading will not happen now. If False, it will
deferred_download (bool): If True the downloading will not happen now. If False, it will
happen immediately.
mirror_metadata (bool): Influences which metadata files are downloaded and wha
is done with them.
Keyword Args:
skip_types (list): List of content to skip
Expand All @@ -569,7 +577,7 @@ def __init__(
self.remote = remote
self.repository = repository
self.deferred_download = deferred_download
self.mirror = mirror
self.mirror_metadata = mirror_metadata

# How many directories deep this repo is nested within another repo (if at all).
# Backwards relative paths that are shallower than this depth are permitted (in mirror
Expand Down Expand Up @@ -651,14 +659,14 @@ async def run_repomdrecord_download(name, location_href, downloader):
checksum_types[record.type] = record_checksum_type
record.checksum_type = record_checksum_type

if self.mirror:
if self.mirror_metadata:
uses_base_url = record.location_base
illegal_relative_path = self.is_illegal_relative_path(record.location_href)

if uses_base_url or illegal_relative_path or record.type == "prestodelta":
raise ValueError(MIRROR_INCOMPATIBLE_REPO_ERR_MSG)

if not self.mirror and record.type not in types_to_download:
if not self.mirror_metadata and record.type not in types_to_download:
continue

base_url = record.location_base or self.remote_url
Expand All @@ -684,7 +692,7 @@ async def run_repomdrecord_download(name, location_href, downloader):
except FileNotFoundError:
raise

if self.mirror:
if self.mirror_metadata:
# optional signature and key files for repomd metadata
for file_href in ["repodata/repomd.xml.asc", "repodata/repomd.xml.key"]:
try:
Expand Down Expand Up @@ -1036,15 +1044,15 @@ async def parse_packages(self, primary_xml, filelists_xml, other_xml, file_exten

async with ProgressReport(**progress_data) as packages_pb:
# skip SRPM if defined
skip_srpms = "srpm" in self.skip_types and not self.mirror
skip_srpms = "srpm" in self.skip_types and not self.mirror_metadata

async def on_package(pkg):
"""Callback when handling a completed package.
Args:
pkg (createrepo_c.Package): A completed createrepo_c package.
"""
if self.mirror:
if self.mirror_metadata:
uses_base_url = pkg.location_base
illegal_relative_path = self.is_illegal_relative_path(pkg.location_href)

Expand Down
15 changes: 12 additions & 3 deletions pulp_rpm/app/viewsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
)

from pulp_rpm.app import tasks
from pulp_rpm.app.constants import SYNC_POLICIES
from pulp_rpm.app.models import (
DistributionTree,
Modulemd,
Expand Down Expand Up @@ -129,18 +130,26 @@ def sync(self, request, pk):
serializer.is_valid(raise_exception=True)
remote = serializer.validated_data.get("remote", repository.remote)
mirror = serializer.validated_data.get("mirror")
sync_policy = serializer.validated_data.get("sync_policy")
skip_types = serializer.validated_data.get("skip_types")
optimize = serializer.validated_data.get("optimize")

if repository.retain_package_versions > 0 and mirror:
raise DRFValidationError("Cannot use 'retain_package_versions' with mirror-mode sync")
if not sync_policy:
sync_policy = SYNC_POLICIES.ADDITIVE if not mirror else SYNC_POLICIES.MIRROR_COMPLETE

if sync_policy == SYNC_POLICIES.MIRROR_COMPLETE:
err_msg = "Cannot use '{}' in combination with a 'complete mirror' sync"
if repository.retain_package_versions > 0:
raise DRFValidationError(err_msg.format("retain_package_versions"))
if repository.autopublish:
raise DRFValidationError(err_msg.format("autopublish"))

result = dispatch(
tasks.synchronize,
shared_resources=[remote],
exclusive_resources=[repository],
kwargs={
"mirror": mirror,
"sync_policy": sync_policy,
"remote_pk": str(remote.pk),
"repository_pk": str(repository.pk),
"skip_types": skip_types,
Expand Down
Loading

0 comments on commit 57a4733

Please sign in to comment.