Skip to content

Commit

Permalink
Fixed repository version repair with S3
Browse files Browse the repository at this point in the history
This patch also adds repair API tests for S3 storage backend.

fixes: pulp#4776
fixes: pulp#4806
  • Loading branch information
dkliban committed Nov 30, 2023
1 parent 701bbc4 commit 40f49b3
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 19 deletions.
2 changes: 2 additions & 0 deletions CHANGES/4776.bugfix
@@ -0,0 +1,2 @@
Fixed a bug where the repository version repair operation would accidently try to look for a file
in the default domain.
2 changes: 2 additions & 0 deletions CHANGES/4806.bugfix
@@ -0,0 +1,2 @@
Fixed a bug where the repository version repair operation would always report a digest mismatch when
using S3 storage.
33 changes: 26 additions & 7 deletions pulpcore/app/tasks/repository.py
Expand Up @@ -5,6 +5,7 @@
import hashlib

from asgiref.sync import sync_to_async
from botocore.exceptions import ClientError
from django.db import transaction
from rest_framework.serializers import ValidationError

Expand Down Expand Up @@ -96,16 +97,34 @@ async def _repair_ca(content_artifact, repaired=None):


def _verify_artifact(artifact):
try:
# verify files digest
hasher = hashlib.sha256()
with artifact.file as fp:
for chunk in fp.chunks(CHUNK_SIZE):
domain = artifact.pulp_domain
storage = domain.get_storage()
file_name = artifact.file.name
hasher = hashlib.sha256()
if domain.storage_class == "pulpcore.app.models.storage.FileSystem":
try:
# verify files digest
path = storage.path(file_name)
with open(path) as fp:
for chunk in fp.chunks(CHUNK_SIZE):
hasher.update(chunk)
except FileNotFoundError:
return False
elif domain.storage_class == "storages.backends.s3boto3.S3Boto3Storage":
bucket_name = domain.get_storage().bucket_name
s3_object = storage.connection.Object(bucket_name, file_name).get()
try:
for chunk in s3_object["Body"].iter_chunks(CHUNK_SIZE):
hasher.update(chunk)
return hasher.hexdigest() == artifact.sha256
except FileNotFoundError:
except ClientError:
return False
else:
# We don't currently support performing a checksum check with other storage backends
# Assume that it is wrong.
return False

return hasher.hexdigest() == artifact.sha256


async def _repair_artifacts_for_content(subset=None, verify_checksums=True):
loop = asyncio.get_event_loop()
Expand Down
38 changes: 26 additions & 12 deletions pulpcore/tests/functional/api/using_plugin/test_repair.py
@@ -1,6 +1,7 @@
import pytest
import os

from django.core.files.storage import default_storage
from random import sample

from pulpcore.client.pulpcore import Repair
Expand All @@ -14,6 +15,7 @@
SUPPORTED_STORAGE_FRAMEWORKS = [
"django.core.files.storage.FileSystemStorage",
"pulpcore.app.models.storage.FileSystem",
"storages.backends.s3boto3.S3Boto3Storage",
]

pytestmark = pytest.mark.skipif(
Expand Down Expand Up @@ -42,18 +44,30 @@ def repository_with_corrupted_artifacts(
# STEP 2: sample artifacts that will be modified on the filesystem later on
content1, content2 = sample(get_files_in_manifest(remote.url), 2)

# Modify one artifact on disk.
artifact1_path = os.path.join(
settings.MEDIA_ROOT, artifacts_api_client.list(sha256=content1[1]).results[0].file
)
with open(artifact1_path, "r+b") as f:
f.write(b"$a bit rot")

# Delete another one from disk.
artifact2_path = os.path.join(
settings.MEDIA_ROOT, artifacts_api_client.list(sha256=content2[1]).results[0].file
)
os.remove(artifact2_path)
if settings.DEFAULT_FILE_STORAGE == "pulpcore.app.models.storage.FileSystem":
# Modify one artifact on disk
artifact1_path = os.path.join(
settings.MEDIA_ROOT, artifacts_api_client.list(sha256=content1[1]).results[0].file
)
with open(artifact1_path, "r+b") as f:
f.write(b"$a bit rot")

# Delete another one from disk.
artifact2_path = os.path.join(
settings.MEDIA_ROOT, artifacts_api_client.list(sha256=content2[1]).results[0].file
)
os.remove(artifact2_path)
elif settings.DEFAULT_FILE_STORAGE == "storages.backends.s3boto3.S3Boto3Storage":
# Modify one object in S3
storage = default_storage
path1 = artifacts_api_client.list(sha256=content1[1]).results[0].file
s3_obj = storage.connection.Object(storage.bucket_name, path1)
s3_obj.put()

# Delete an object from S3
path2 = artifacts_api_client.list(sha256=content2[1]).results[0].file
s3_obj = storage.connection.Object(storage.bucket_name, path2)
s3_obj.delete()

return repo

Expand Down

0 comments on commit 40f49b3

Please sign in to comment.