-
Notifications
You must be signed in to change notification settings - Fork 41
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* feat: code complete on task_topological_generations_without_scheduler. Getting 200 tasks complete per min with 50 workers on macbook pro- 4 tasks per second. * fixing spoke execution mode using reporting role
- Loading branch information
1 parent
a283eb0
commit bd2330e
Showing
28 changed files
with
1,011 additions
and
956 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
## export all env vars from a codebuild build id in your local | ||
aws codebuild batch-get-builds --ids servicecatalog-puppet-deploy-in-spoke:0568a289-ebb6-4189-9bc0-5d6a4ff4879d | jq -r '.builds[0].environment.environmentVariables[]| "export \(.name)=\"\(.value)\""' | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
COMPLETED = "COMPLETED" | ||
NOT_SET = "NOT_SET" | ||
ERRORED = "ERRORED" | ||
QUEUE_STATUS = "QUEUE_STATUS" | ||
IN_PROGRESS = "IN_PROGRESS" | ||
RESOURCES_REQUIRED = "resources_required" | ||
|
||
CONTROL_EVENT__COMPLETE = "CONTROL_EVENT__COMPLETE" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
import logging | ||
|
||
import networkx as nx | ||
|
||
from servicecatalog_puppet import constants | ||
from servicecatalog_puppet.waluigi.constants import ( | ||
COMPLETED, | ||
NOT_SET, | ||
ERRORED, | ||
QUEUE_STATUS, | ||
) | ||
|
||
logger = logging.getLogger(constants.PUPPET_SCHEDULER_LOGGER_NAME) | ||
|
||
|
||
def build_the_dag(tasks_to_run: dict): | ||
g = nx.DiGraph() | ||
#print("-- BUILDING THE DAG!!!") | ||
for uid, task in tasks_to_run.items(): | ||
g.add_nodes_from( | ||
[(uid, task),] | ||
) | ||
for duid in task.get("dependencies_by_reference", []): | ||
if tasks_to_run.get(duid): | ||
g.add_edge(uid, duid) | ||
else: | ||
logger.debug( | ||
f"{duid} is not in the task reference - this is fine when running in spoke execution mode and when the task was executed within the hub" | ||
) | ||
|
||
for uid, task in tasks_to_run.items(): | ||
if task.get(QUEUE_STATUS, NOT_SET) == COMPLETED: | ||
try: | ||
g.remove_node(uid) | ||
except nx.exception.NetworkXError as e: | ||
pass | ||
|
||
elif task.get(QUEUE_STATUS, NOT_SET) == ERRORED: | ||
# print( | ||
# f"looking at task {uid} with status {task.get(QUEUE_STATUS, NOT_SET)}" | ||
# ) | ||
for n in nx.ancestors(g, uid): | ||
try: | ||
g.remove_node(n) | ||
except nx.exception.NetworkXError as e: | ||
pass | ||
try: | ||
g.remove_node(uid) | ||
except nx.exception.NetworkXError as e: | ||
pass | ||
|
||
return g | ||
|
||
|
||
def make_readable_in_codebuild_logs(input): | ||
numbers = "zero one two three four five six seven eight nine".split() | ||
numbers.extend("ten eleven twelve thirteen fourteen fifteen sixteen".split()) | ||
numbers.extend("seventeen eighteen nineteen".split()) | ||
numbers.extend( | ||
tens if ones == "zero" else (tens + "-" + ones) | ||
for tens in "twenty thirty forty fifty sixty seventy eighty ninety".split() | ||
for ones in numbers[0:10] | ||
) | ||
return numbers[input] |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
from servicecatalog_puppet import serialisation_utils | ||
from servicecatalog_puppet.waluigi.constants import RESOURCES_REQUIRED | ||
from servicecatalog_puppet.waluigi.dag_utils import logger | ||
|
||
|
||
def are_resources_are_free_for_task(task_parameters: dict, resources_file_path: str): | ||
with open(resources_file_path, "rb") as f: | ||
resources_in_use = serialisation_utils.json_loads(f.read()) | ||
return are_resources_are_free_for_task_dict(task_parameters, resources_in_use) | ||
|
||
|
||
def are_resources_are_free_for_task_dict(task_parameters, resources_in_use): | ||
return ( | ||
all( | ||
resources_in_use.get(r, False) is False | ||
for r in task_parameters.get(RESOURCES_REQUIRED, []) | ||
), | ||
resources_in_use, | ||
) | ||
|
||
|
||
def lock_resources_for_task( | ||
task_reference: str, | ||
task_parameters: dict, | ||
resources_in_use: dict, | ||
resources_file_path: str, | ||
): | ||
#print(f"Worker locking {task_reference}") | ||
for r in task_parameters.get(RESOURCES_REQUIRED, []): | ||
resources_in_use[r] = task_reference | ||
with open(resources_file_path, "wb") as f: | ||
f.write(serialisation_utils.json_dumps(resources_in_use)) | ||
|
||
|
||
def unlock_resources_for_task(task_parameters: dict, resources_file_path: str): | ||
with open(resources_file_path, "rb") as f: | ||
resources_in_use = serialisation_utils.json_loads(f.read()) | ||
for r in task_parameters.get(RESOURCES_REQUIRED, []): | ||
try: | ||
del resources_in_use[r] | ||
except KeyError: | ||
logger.warn( | ||
f"{task_parameters.get('task_reference')} tried to unlock {r} but it wasn't present" | ||
) | ||
with open(resources_file_path, "wb") as f: | ||
f.write(serialisation_utils.json_dumps(resources_in_use)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,167 @@ | ||
import multiprocessing | ||
import os | ||
import time | ||
|
||
import networkx as nx | ||
|
||
from servicecatalog_puppet.waluigi.constants import CONTROL_EVENT__COMPLETE | ||
from servicecatalog_puppet.waluigi.dag_utils import ( | ||
logger, | ||
build_the_dag, | ||
make_readable_in_codebuild_logs, | ||
) | ||
from servicecatalog_puppet.waluigi.shared_tasks import task_processing_time | ||
from servicecatalog_puppet.waluigi.shared_tasks import task_trace | ||
from servicecatalog_puppet.waluigi.shared_tasks.task_topological_generations_with_scheduler import ( | ||
scheduler_task as task_topological_generations_with_scheduler_scheduler_task, | ||
) | ||
from servicecatalog_puppet.waluigi.shared_tasks.workers.worker_requiring_scheduler import ( | ||
worker_task as worker_requiring_scheduler_worker_task, | ||
) | ||
from servicecatalog_puppet.waluigi.shared_tasks.task_topological_generations_without_scheduler import ( | ||
worker_task as task_topological_generations_without_scheduler_worker_task, | ||
) | ||
|
||
QUEUE_REFILL_SLEEP_DURATION = 1 | ||
|
||
|
||
def get_tasks(scheduling_algorithm): | ||
if scheduling_algorithm == "topological_generations": | ||
return ( | ||
worker_requiring_scheduler_worker_task, | ||
task_topological_generations_with_scheduler_scheduler_task, | ||
) | ||
if scheduling_algorithm == "topological_generations_without_scheduler": | ||
return task_topological_generations_without_scheduler_worker_task, None | ||
raise ValueError(f"Unsupported scheduling_algorithm: {scheduling_algorithm}") | ||
|
||
|
||
def run( | ||
num_workers, | ||
tasks_reference, | ||
manifest_files_path, | ||
manifest_task_reference_file_path, | ||
puppet_account_id, | ||
execution_mode, | ||
scheduling_algorithm, | ||
): | ||
logger.info( | ||
f"Executing {len(tasks_reference.keys())} tasks with {make_readable_in_codebuild_logs(num_workers)} processes in {execution_mode} with scheduling_algorithm {scheduling_algorithm}!" | ||
) | ||
|
||
manager = multiprocessing.Manager() | ||
all_tasks = manager.dict(tasks_reference) | ||
resources = manager.dict() | ||
tasks_to_run = manager.list() | ||
|
||
dag = build_the_dag(tasks_reference) | ||
generations = list(nx.topological_generations(dag)) | ||
if not generations: | ||
raise ValueError("No tasks to run") | ||
while generations: | ||
tasks_to_run.extend(list(generations.pop())) | ||
|
||
resources_file_path = f"{manifest_files_path}/resources.json" | ||
start = time.time() | ||
os.environ["SCT_START_TIME"] = str(start) | ||
|
||
with open(resources_file_path, "w") as f: | ||
f.write("{}") | ||
|
||
QueueKlass = multiprocessing.Queue | ||
EventKlass = multiprocessing.Event | ||
ExecutorKlass = multiprocessing.Process | ||
LockKlass = multiprocessing.Lock | ||
|
||
lock = LockKlass() | ||
task_queue = QueueKlass() | ||
results_queue = QueueKlass() | ||
control_event = None | ||
task_processing_time_queue = QueueKlass() | ||
task_trace_queue = QueueKlass() | ||
control_queue = QueueKlass() | ||
complete_event = EventKlass() | ||
|
||
worker_task_args = ( | ||
lock, | ||
task_queue, | ||
results_queue, | ||
task_processing_time_queue, | ||
task_trace_queue, | ||
control_queue, | ||
control_event, | ||
manifest_files_path, | ||
manifest_task_reference_file_path, | ||
puppet_account_id, | ||
resources_file_path, | ||
all_tasks, | ||
resources, | ||
tasks_to_run, | ||
) | ||
scheduler_task_args = ( | ||
num_workers, | ||
task_queue, | ||
results_queue, | ||
control_queue, | ||
control_event, | ||
QUEUE_REFILL_SLEEP_DURATION, | ||
all_tasks, | ||
tasks_to_run, | ||
) | ||
task_processing_time_args = ( | ||
task_processing_time_queue, | ||
complete_event, | ||
execution_mode, | ||
) | ||
task_trace_args = ( | ||
task_trace_queue, | ||
complete_event, | ||
puppet_account_id, | ||
execution_mode, | ||
) | ||
|
||
worker_task_to_use, scheduler_task_to_use = get_tasks(scheduling_algorithm) | ||
|
||
processes = [ | ||
ExecutorKlass( | ||
name=f"worker#{i}", | ||
target=worker_task_to_use, | ||
args=(str(i),) + worker_task_args, | ||
) | ||
for i in range(num_workers) | ||
] | ||
scheduler_thread = None | ||
|
||
if scheduler_task_to_use: | ||
scheduler_thread = ExecutorKlass( | ||
name="scheduler", target=scheduler_task_to_use, args=scheduler_task_args, | ||
) | ||
on_task_processing_time_thread = ExecutorKlass( | ||
name="on_task_processing_time", | ||
target=task_processing_time.on_task_processing_time_task, | ||
args=task_processing_time_args, | ||
) | ||
on_task_trace_thread = ExecutorKlass( | ||
name="on_task_trace", target=task_trace.on_task_trace, args=task_trace_args, | ||
) | ||
|
||
on_task_processing_time_thread.start() | ||
on_task_trace_thread.start() | ||
|
||
for process in processes: | ||
process.start() | ||
if scheduler_thread: | ||
scheduler_thread.start() | ||
while True: | ||
logger.info("Waiting for shutdown message") | ||
message = control_queue.get() | ||
if message == CONTROL_EVENT__COMPLETE: | ||
logger.info(f"Got the {message}, starting shutdown process") | ||
break | ||
|
||
for process in processes: | ||
process.terminate() | ||
time.sleep(10) | ||
on_task_processing_time_thread.terminate() | ||
on_task_trace_thread.terminate() | ||
logger.info(f"Time taken = {time.time() - start:.10f}") |
Oops, something went wrong.