Skip to content

Commit

Permalink
Taught import to be able to use less than all-available workers.
Browse files Browse the repository at this point in the history
IMPORT_WORKERS_PERCENT is configurable in settings. We will
document/expose this in a future PR, to keep this one maximally
backportable. Default behavior remains "all workers".

fixes pulp#4068.

(cherry picked from commit b5cb1d1)
  • Loading branch information
ggainey committed Jul 28, 2023
1 parent 1b9a85b commit 58f8fcd
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGES/4068.bugfix
@@ -0,0 +1 @@
Taught pulp-import to be able to use a subset of available worker-threads.
4 changes: 4 additions & 0 deletions pulpcore/app/settings.py
Expand Up @@ -296,6 +296,10 @@

DEFAULT_AUTO_FIELD = "django.db.models.AutoField"

# What percentage of available-workers will pulpimport use at a time, max
# By default, use all available workers.
IMPORT_WORKERS_PERCENT = 100

# HERE STARTS DYNACONF EXTENSION LOAD (Keep at the very bottom of settings.py)
# Read more at https://dynaconf.readthedocs.io/en/latest/guides/django.html
import dynaconf # noqa
Expand Down
21 changes: 20 additions & 1 deletion pulpcore/app/tasks/importer.py
Expand Up @@ -8,6 +8,7 @@
from gettext import gettext as _
from logging import getLogger

from django.conf import settings
from django.core.files.storage import default_storage
from django.db.models import F
from naya.json import stream_array, tokenize
Expand All @@ -34,6 +35,7 @@
ContentArtifactResource,
)
from pulpcore.constants import TASK_STATES
from pulpcore.tasking.pulpcore_worker import Worker
from pulpcore.tasking.tasks import dispatch

log = getLogger(__name__)
Expand Down Expand Up @@ -482,6 +484,18 @@ def validate_and_assemble(toc_filename):
default_storage.save(base_path, f)

# Now import repositories, in parallel.

# We want to be able to limit the number of available-workers that import will consume,
# so that pulp can continue to work while doing an import. We accomplish this by creating
# a reserved-resource string for each repo-import-task based on that repo's index in
# the dispatch loop, mod number-of-workers-to-consume.
#
# By default (setting is not-set), import will continue to use 100% of the available
# workers.
import_workers_percent = int(settings.get("IMPORT_WORKERS_PERCENT", 100))
total_workers = Worker.objects.online_workers().count()
import_workers = max(1, int(total_workers * (import_workers_percent / 100.0)))

with open(os.path.join(temp_dir, REPO_FILE), "r") as repo_data_file:
data = json.load(repo_data_file)
gpr = GroupProgressReport(
Expand All @@ -493,7 +507,10 @@ def validate_and_assemble(toc_filename):
)
gpr.save()

for src_repo in data:
for index, src_repo in enumerate(data):
# pulpcore-worker limiter
worker_rsrc = f"import-worker-{index % import_workers}"
exclusive_resources = [worker_rsrc]
try:
dest_repo = _destination_repo(importer, src_repo["name"])
except Repository.DoesNotExist:
Expand All @@ -503,6 +520,8 @@ def validate_and_assemble(toc_filename):
)
)
continue
else:
exclusive_resources.append(dest_repo)

dispatch(
import_repository_version,
Expand Down

0 comments on commit 58f8fcd

Please sign in to comment.