Skip to content

Commit

Permalink
Use separate script to prevent submit script too large (#380)
Browse files Browse the repository at this point in the history
Fix #377.

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
  • Loading branch information
njzjz committed Oct 19, 2023
1 parent 9720760 commit c3fa803
Show file tree
Hide file tree
Showing 9 changed files with 33 additions and 2 deletions.
3 changes: 3 additions & 0 deletions dpdispatcher/distributed_shell.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,9 @@ def do_submit(self, job):
job_id_name = job.job_hash + "_job_id"
output_name = job.job_hash + ".out"
self.context.write_file(fname=script_file_name, write_str=script_str)
script_run_str = self.gen_script_command(job)
script_run_file_name = f"{job.script_file_name}.run"
self.context.write_file(fname=script_run_file_name, write_str=script_run_str)

resources = job.resources
submit_command = (
Expand Down
5 changes: 5 additions & 0 deletions dpdispatcher/dp_cloud_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,11 @@ def gen_local_script(self, job):
script_str = self.gen_script(job)
script_file_name = job.script_file_name
self.context.write_local_file(fname=script_file_name, write_str=script_str)
script_run_str = self.gen_script_command(job)
script_run_file_name = f"{job.script_file_name}.run"
self.context.write_local_file(
fname=script_run_file_name, write_str=script_run_str
)
return script_file_name

def _gen_backward_files_list(self, job):
Expand Down
3 changes: 3 additions & 0 deletions dpdispatcher/fugaku.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ def do_submit(self, job):
job_id_name = job.job_hash + "_job_id"
# script_str = self.sub_script(job_dirs, cmd, args=args, resources=resources, outlog=outlog, errlog=errlog)
self.context.write_file(fname=script_file_name, write_str=script_str)
script_run_str = self.gen_script_command(job)
script_run_file_name = f"{job.script_file_name}.run"
self.context.write_file(fname=script_run_file_name, write_str=script_run_str)
# self.context.write_file(fname=os.path.join(self.context.submission.work_base, script_file_name), write_str=script_str)
# script_file_dir = os.path.join(self.context.submission.work_base)
script_file_dir = self.context.remote_root
Expand Down
3 changes: 3 additions & 0 deletions dpdispatcher/lsf.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ def do_submit(self, job):
script_str = self.gen_script(job)
job_id_name = job.job_hash + "_job_id"
self.context.write_file(fname=script_file_name, write_str=script_str)
script_run_str = self.gen_script_command(job)
script_run_file_name = f"{job.script_file_name}.run"
self.context.write_file(fname=script_run_file_name, write_str=script_run_str)

try:
stdin, stdout, stderr = self.context.block_checkcall(
Expand Down
7 changes: 5 additions & 2 deletions dpdispatcher/machine.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,17 +191,20 @@ def do_submit(self, job):
"abstract method do_submit should be implemented by derived class"
)

def gen_script_run_command(self, job):
return f"source $REMOTE_ROOT/{job.script_file_name}.run"

def gen_script(self, job):
script_header = self.gen_script_header(job)
script_custom_flags = self.gen_script_custom_flags_lines(job)
script_env = self.gen_script_env(job)
script_command = self.gen_script_command(job)
script_run_command = self.gen_script_run_command(job)
script_end = self.gen_script_end(job)
script = script_template.format(
script_header=script_header,
script_custom_flags=script_custom_flags,
script_env=script_env,
script_command=script_command,
script_command=script_run_command,
script_end=script_end,
)
return script
Expand Down
5 changes: 5 additions & 0 deletions dpdispatcher/openapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ def gen_local_script(self, job):
script_str = self.gen_script(job)
script_file_name = job.script_file_name
self.context.write_local_file(fname=script_file_name, write_str=script_str)
script_run_str = self.gen_script_command(job)
script_run_file_name = f"{job.script_file_name}.run"
self.context.write_local_file(
fname=script_run_file_name, write_str=script_run_str
)
return script_file_name

def _gen_backward_files_list(self, job):
Expand Down
3 changes: 3 additions & 0 deletions dpdispatcher/pbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ def do_submit(self, job):
job_id_name = job.job_hash + "_job_id"
# script_str = self.sub_script(job_dirs, cmd, args=args, resources=resources, outlog=outlog, errlog=errlog)
self.context.write_file(fname=script_file_name, write_str=script_str)
script_run_str = self.gen_script_command(job)
script_run_file_name = f"{job.script_file_name}.run"
self.context.write_file(fname=script_run_file_name, write_str=script_run_str)
# self.context.write_file(fname=os.path.join(self.context.submission.work_base, script_file_name), write_str=script_str)
# script_file_dir = os.path.join(self.context.submission.work_base)
script_file_dir = self.context.remote_root
Expand Down
3 changes: 3 additions & 0 deletions dpdispatcher/shell.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ def do_submit(self, job):
job_id_name = job.job_hash + "_job_id"
output_name = job.job_hash + ".out"
self.context.write_file(fname=script_file_name, write_str=script_str)
script_run_str = self.gen_script_command(job)
script_run_file_name = f"{job.script_file_name}.run"
self.context.write_file(fname=script_run_file_name, write_str=script_run_str)
ret, stdin, stdout, stderr = self.context.block_call(
"cd {} && {{ nohup bash {} 1>>{} 2>>{} & }} && echo $!".format(
shlex.quote(self.context.remote_root),
Expand Down
3 changes: 3 additions & 0 deletions dpdispatcher/slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@ def do_submit(self, job):
job_id_name = job.job_hash + "_job_id"
# script_str = self.sub_script(job_dirs, cmd, args=args, resources=resources, outlog=outlog, errlog=errlog)
self.context.write_file(fname=script_file_name, write_str=script_str)
script_run_str = self.gen_script_command(job)
script_run_file_name = f"{job.script_file_name}.run"
self.context.write_file(fname=script_run_file_name, write_str=script_run_str)
# self.context.write_file(fname=os.path.join(self.context.submission.work_base, script_file_name), write_str=script_str)
ret, stdin, stdout, stderr = self.context.block_call(
"cd {} && {} {}".format(
Expand Down

0 comments on commit c3fa803

Please sign in to comment.