Skip to content

Commit

Permalink
Merge pull request #28 from felix5572/master
Browse files Browse the repository at this point in the history
refact user api
  • Loading branch information
felix5572 committed May 31, 2021
2 parents b1f6521 + fe490d0 commit d1d1fb4
Show file tree
Hide file tree
Showing 38 changed files with 593 additions and 649 deletions.
85 changes: 58 additions & 27 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,50 +44,81 @@ dpdispatcher is maintained by deepmodeling's developers now and welcome other pe


## ussage example
## example


```python3
machine = Machine.load_from_json('machine.json')
resources = Resources.load_from_json('resources.json')

machine = Machine.load_from_json_file(json_path='jsons/machine_local_shell.json')
submission = Submission(work_base='parent_dir/', resources=machine.resources, forward_common_files=['graph.pb'], backward_common_files=[])
## with open('compute.json', 'r') as f:
## compute_dict = json.load(f)

task1 = Task(command='cat example.txt', task_work_path='dir1/', forward_files=['example.txt'], backward_files=['out.txt'], outlog='out.txt')
## machine = Machine.load_from_dict(compute_dict['machine'])
## resources = Resources.load_from_dict(compute_dict['resources'])

task2 = Task(command='cat example.txt', task_work_path='dir2/', forward_files=['example.txt'], backward_files=['out.txt'], outlog='out.txt')
task0 = Task.load_from_json('task.json')

task1 = Task(command='cat example.txt', task_work_path='dir1/', forward_files=['example.txt'], backward_files=['out.txt'], outlog='out.txt')
task2 = Task(command='cat example.txt', task_work_path='dir2/', forward_files=['example.txt'], backward_files=['out.txt'], outlog='out.txt')
task3 = Task(command='cat example.txt', task_work_path='dir3/', forward_files=['example.txt'], backward_files=['out.txt'], outlog='out.txt')

task4 = Task(command='cat example.txt', task_work_path='dir4/', forward_files=['example.txt'], backward_files=['out.txt'], outlog='out.txt')

submission.register_task_list([task1, task2, task3, task4, ])
submission.generate_jobs()
submission.bind_batch(batch=machine.batch)
submission.run_submission(clean=False)
```
task_list = [task0, task1, task2, task3, task4]

submission = Submission(work_base='lammps_md_300K_5GPa/',
machine=machine,
resources=reasources,
task_list=task_list,
forward_common_files=['graph.pb'],
backward_common_files=[]
)

the machine_local_shell.json looks like:
(more machine examples, see: tests/jsons/*json)
## submission.register_task_list(task_list=task_list)

submission.run_submission(clean=False)
```

tests/jsons/machine_local_shell.json
machine.json
```json
{
"machine_type": "Slurm",
"context_type": "SSHContext",
"local_root" : "/home/user123/workplace/22_new_project/",
"remote_root": "~/dpdispatcher_work_dir/",
"remote_profile":{
"hostname": "39.106.xx.xxx",
"username": "user1",
"port": 22,
"timeout": 10
}
}
```

resources.json
```json
{
"number_node": 1,
"cpu_per_node": 4,
"gpu_per_node": 1,
"queue_name": "GPUV100",
"group_size": 5
}
```

task.json
```json
{
"batch":{
"batch_type": "shell",
"context_type": "local",
"local_root" : "./test_shell_trival_dir",
"remote_root" : "./tmp_shell_trival_dir"
},
"resources":{
"number_node": 1,
"cpu_per_node": 4,
"gpu_per_node": 0,
"queue_name": "CPU",
"group_size": 2
}
"command": "lmp -i input.lammps",
"task_work_path": "bct-0/",
"forward_files": [
"conf.lmp",
"input.lammps"
],
"backward_files": [
"log.lammps"
],
"outlog": "log",
"errlog": "err",
}
```
50 changes: 50 additions & 0 deletions dpdispatcher/base_context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import os,shutil,uuid,hashlib
import subprocess as sp
from glob import glob
from dpdispatcher import dlog

class BaseContext(object):
subclasses_dict = {}
def __init__(self):
raise NotImplementedError('abstract method')

def __init_subclass__(cls, **kwargs):
super().__init_subclass__(**kwargs)
cls.subclasses_dict[cls.__name__]=cls

@classmethod
def load_from_dict(cls, context_dict):
context_type = context_dict['context_type']
print("debug778:context_type", cls.subclasses_dict, context_type)
context_class = cls.subclasses_dict[context_type]
context = context_class.load_from_dict(context_dict)
return context

def bind_submission(self, submission):
self.submission = submission

def upload(self, submission):
raise NotImplementedError('abstract method')

def download(self,
submission,
check_exists = False,
mark_failure = True,
back_error=False):
raise NotImplementedError('abstract method')

def clean(self):
raise NotImplementedError('abstract method')

def write_file(self, fname, write_str):
raise NotImplementedError('abstract method')

def read_file(self, fname):
raise NotImplementedError('abstract method')

def kill(self, proc):
raise NotImplementedError('abstract method')

def check_finish(self, proc):
raise NotImplementedError('abstract method')

130 changes: 0 additions & 130 deletions dpdispatcher/batch_object.py

This file was deleted.

44 changes: 16 additions & 28 deletions dpdispatcher/dp_cloud_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,39 +2,27 @@

from dpdispatcher.JobStatus import JobStatus
from dpdispatcher import dlog
from dpdispatcher.batch import Batch
from dpdispatcher.machine import Machine
from dpdispatcher.shell import Shell
from dpdispatcher.dpcloudserver import api
from dpdispatcher.dpcloudserver.config import API_HOST, ALI_OSS_BUCKET_URL

# input_data = {
# 'job_type': 'indicate',
# 'log_file': 'dp_cloud_server.log',
# 'command': '',
# 'backward_files': [],
# 'job_name': 'dpdispatcher_job',
# 'machine': {
# 'platform': 'ali',
# 'resources': {
# 'gpu_type': '1 * NVIDIA P100',
# 'cpu_num': 4,
# 'mem_limit': 28,
# 'time_limit': '2:00:00',
# 'image_name': 'yfb-deepmd-kit-1.2.4-cuda10'
# }
# },
# 'job_resources': ''
# }

class DpCloudServer(Shell):
def __init__(self, context, input_data):
shell_script_header_template="""
#!/bin/bash -l
"""

class DpCloudServer(Machine):
def __init__(self, context):
self.context = context
self.input_data = input_data
self.input_data = context.remote_profile['input_data'].copy()

def gen_script(self, job):
shell_script = super(DpCloudServer, self).gen_script(job)
return shell_script

# @classmethod
# def from_jdata(cls, jdata):

# pass
def gen_script_header(self, job):
shell_script_header = shell_script_header_template
return shell_script_header

def gen_local_script(self, job):
script_str = self.gen_script(job)
Expand All @@ -52,7 +40,7 @@ def do_submit(self, job):
oss_task_zip = 'indicate/' + job.job_hash + '/' + zip_filename
job_resources = ALI_OSS_BUCKET_URL + oss_task_zip
# job_resources = ALI_STS_ENDPOINT + '/' + oss_task_zip
print(897, job_resources)
# print(897, job_resources)
# oss_task_zip = 'indicate'
# oss_path =

Expand Down

0 comments on commit d1d1fb4

Please sign in to comment.