Skip to content

Commit

Permalink
Remove import/export's dependency on pulp_ids.
Browse files Browse the repository at this point in the history
Dealing this required slimming down the fields we export/import,
so #6515 got addressed along the way as well.

Required PR: pulp/pulp_file#406
closes #6807
closes #6515
  • Loading branch information
ggainey committed Jun 6, 2020
1 parent c516244 commit d77c6b4
Show file tree
Hide file tree
Showing 8 changed files with 116 additions and 39 deletions.
1 change: 1 addition & 0 deletions CHANGES/6515.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Removed unnecessary fields from the import/export transfer.
1 change: 1 addition & 0 deletions CHANGES/6807.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Cured import/export's addiction to pulp_ids.
8 changes: 2 additions & 6 deletions pulpcore/app/importexport.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from pulpcore.app.models.repository import Repository
from pulpcore.app.modelresource import (
ArtifactResource,
ContentResource,
ContentArtifactResource,
RepositoryResource,
)
Expand Down Expand Up @@ -101,17 +100,14 @@ def export_content(export, repository_version):
Args:
export (django.db.models.PulpExport): export instance that's doing the export
repository_version (django.db.models.RepositoryVersion): RepositoryVersion being exported
last_export (django.db.models.PulpExport): previous export of the 'owning' Exporter
"""
dest_dir = os.path.join(
"repository-{}_{}".format(
str(repository_version.repository.pulp_id), repository_version.number
str(repository_version.repository.name), repository_version.number
)
)
# export the resources pulpcore is responsible for
resource = ContentResource(repository_version)
_write_export(export.tarfile, resource, dest_dir)

# Export the connection between content and artifacts
resource = ContentArtifactResource(repository_version)
_write_export(export.tarfile, resource, dest_dir)

Expand Down
18 changes: 18 additions & 0 deletions pulpcore/app/migrations/0033_content_export_id.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 2.2.11 on 2020-06-05 13:17

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('core', '0032_export_to_chunks'),
]

operations = [
migrations.AddField(
model_name='content',
name='export_id',
field=models.UUIDField(null=True),
),
]
52 changes: 43 additions & 9 deletions pulpcore/app/modelresource.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
from import_export import fields
from import_export.widgets import ForeignKeyWidget
from logging import getLogger

from pulpcore.app.models.content import (
Artifact,
Content,
Expand All @@ -7,36 +11,66 @@
from pulpcore.plugin.importexport import QueryModelResource


log = getLogger(__name__)


#
# Artifact and Repository are different from other import-export entities, in that they are not
# repo-version-specific.
#
class ArtifactResource(QueryModelResource):
class Meta:
model = Artifact
exclude = (
"pulp_id",
"pulp_created",
"pulp_last_updated",
)
import_id_fields = ("sha256",)


class RepositoryResource(QueryModelResource):
class Meta:
model = Repository
import_id_fields = ("name",)
exclude = (
"pulp_id",
"pulp_created",
"pulp_last_updated",
"content",
)


#
# Content, and ContentArtifact are per-repo-version import/exports, and can
# follow the same pattern as a plugin writer would follow
# ContentArtifact is different from other import-export entities because it has no 'natural key'
# other than a pulp_id, which aren't shared across instances. We do some magic to link up
# ContentArtifacts to their matching imported Content.
#
class ContentResource(QueryModelResource):
def set_up_queryset(self):
return self.repo_version.content
class ContentArtifactResource(QueryModelResource):

class Meta:
model = Content
fields = ("pulp_id", "pulp_type")
artifact = fields.Field(
column_name="artifact", attribute="artifact", widget=ForeignKeyWidget(Artifact, "sha256")
)

def before_import_row(self, row, **kwargs):
# Find the 'original uuid' of the Content for this row, look it up as the
# 'export_id' of imported Content, and then replace the Content-pk with its
# (new) uuid
linked_content = Content.objects.get(export_id=row["content"])
row["content"] = str(linked_content.pulp_id)

class ContentArtifactResource(QueryModelResource):
def set_up_queryset(self):
return ContentArtifact.objects.filter(content__in=self.repo_version.content)

class Meta:
model = ContentArtifact
import_id_fields = (
"content",
"relative_path",
)
exclude = (
"pulp_created",
"pulp_last_updated",
"_artifacts",
"pulp_id",
)
2 changes: 2 additions & 0 deletions pulpcore/app/models/content.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,8 @@ class Content(MasterModel, QueryMixin):

objects = BulkCreateManager()

export_id = models.UUIDField(null=True) # Used by PulpImport/Export processing

class Meta:
verbose_name_plural = "content"
unique_together = ()
Expand Down
51 changes: 28 additions & 23 deletions pulpcore/app/tasks/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
from pulpcore.app.modelresource import (
ArtifactResource,
ContentArtifactResource,
ContentResource,
)
from pulpcore.tasking.tasks import enqueue_with_reservation

Expand All @@ -40,17 +39,22 @@


def _import_file(fpath, resource_class):
log.info(_("Importing file {}.").format(fpath))
with open(fpath, "r") as json_file:
data = Dataset().load(json_file.read(), format="json")
resource = resource_class()
return resource.import_data(data, raise_errors=True)
try:
log.info(_("Importing file {}.").format(fpath))
with open(fpath, "r") as json_file:
data = Dataset().load(json_file.read(), format="json")
resource = resource_class()
log.info(_("...Importing resource {}.").format(resource.__class__.__name__))
return resource.import_data(data, raise_errors=True)
except AttributeError:
log.error(_("FAILURE importing file {}!").format(fpath))
raise


def _repo_version_path(src_repo):
"""Find the repo version path in the export based on src_repo json."""
src_repo_version = int(src_repo["next_version"]) - 1
return f"repository-{src_repo['pulp_id']}_{src_repo_version}"
return f"repository-{src_repo['name']}_{src_repo_version}"


def _check_versions(version_json):
Expand Down Expand Up @@ -80,13 +84,13 @@ def _check_versions(version_json):
raise ValidationError((" ".join(error_messages)))


def import_repository_version(destination_repo_pk, source_repo_pk, tar_path):
def import_repository_version(destination_repo_pk, source_repo_name, tar_path):
"""
Import a repository version from a Pulp export.
Args:
destination_repo_pk (str): Primary key of Repository to import into.
source_repo_pk (str): Primary key of the Repository in the export.
source_repo_name (str): Name of the Repository in the export.
tar_path (str): A path to export tar.
"""
dest_repo = Repository.objects.get(pk=destination_repo_pk)
Expand All @@ -99,7 +103,7 @@ def import_repository_version(destination_repo_pk, source_repo_pk, tar_path):
with open(os.path.join(temp_dir, REPO_FILE), "r") as repo_data_file:
data = json.load(repo_data_file)

src_repo = next(repo for repo in data if repo["pulp_id"] == source_repo_pk)
src_repo = next(repo for repo in data if repo["name"] == source_repo_name)
rv_path = os.path.join(temp_dir, _repo_version_path(src_repo))

if dest_repo.pulp_type != src_repo["pulp_type"]:
Expand All @@ -121,23 +125,24 @@ def import_repository_version(destination_repo_pk, source_repo_pk, tar_path):
if re.match(fr"^{_repo_version_path(src_repo)}/.+", mem.name):
tar.extract(mem, path=temp_dir)

# Untyped Content
content_path = os.path.join(rv_path, CONTENT_FILE)
c_result = _import_file(content_path, ContentResource)
content = Content.objects.filter(pk__in=[r.object_id for r in c_result.rows])

# Content Artifacts
ca_path = os.path.join(rv_path, CA_FILE)
_import_file(ca_path, ContentArtifactResource)

# Content
plugin_name = src_repo["pulp_type"].split(".")[0]
cfg = get_plugin_config(plugin_name)

resulting_content_ids = []
for res_class in cfg.exportable_classes:
filename = f"{res_class.__module__}.{res_class.__name__}.json"
_import_file(os.path.join(rv_path, filename), res_class)
a_result = _import_file(os.path.join(rv_path, filename), res_class)
resulting_content_ids.extend(
row.object_id for row in a_result.rows if row.import_type in ("new", "update")
)

# Once all content exists, create the ContentArtifact links
ca_path = os.path.join(rv_path, CA_FILE)
_import_file(ca_path, ContentArtifactResource)

# Create the repo version
content = Content.objects.filter(pk__in=resulting_content_ids)
with dest_repo.new_version() as new_version:
new_version.set_content(content)

Expand All @@ -161,10 +166,10 @@ def destination_repo(source_repo_name):

log.info(_("Importing {}.").format(path))
importer = PulpImporter.objects.get(pk=importer_pk)
pulp_import = PulpImport.objects.create(
the_import = PulpImport.objects.create(
importer=importer, task=Task.current(), params={"path": path}
)
CreatedResource.objects.create(content_object=pulp_import)
CreatedResource.objects.create(content_object=the_import)

task_group = TaskGroup.objects.create(description=f"Import of {path}")
CreatedResource.objects.create(content_object=task_group)
Expand Down Expand Up @@ -207,7 +212,7 @@ def destination_repo(source_repo_name):
enqueue_with_reservation(
import_repository_version,
[dest_repo],
args=[dest_repo.pk, src_repo["pulp_id"], path],
args=[dest_repo.pk, src_repo["name"], path],
task_group=task_group,
)

Expand Down
22 changes: 21 additions & 1 deletion pulpcore/plugin/importexport.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,25 @@ def __init__(self, repo_version=None):
if repo_version:
self.queryset = self.set_up_queryset()


class BaseContentResource(QueryModelResource):
"""
A QueryModelResource that knows how to fill in the 'export_id' export-field
BaseContentResource knows to de/hydrate export_id with the content-being-exported's pulp_id.
All Content-based resources being import/exported should subclass from this class.
"""

class Meta:
import_id_fields = ("pulp_id",)
exclude = (
"_artifacts",
"content",
"content_ptr",
"pulp_id",
"pulp_created",
"pulp_last_updated",
)

def dehydrate_export_id(self, content):
return str(content.pulp_id)

0 comments on commit d77c6b4

Please sign in to comment.