add and run pre-commit to format codes (#302)

- add pre-commit config - run pre-commit
deepmodeling · Jan 26, 2023 · 99e2611 · 99e2611
1 parent 4e9a88f
commit 99e2611
Show file tree

Hide file tree

Showing 96 changed files with 5,506 additions and 3,357 deletions.
diff --git a/.gitignore b/.gitignore
@@ -45,4 +45,4 @@ dbconfig.json
 */_date.py
 *.egg
 *.egg-info
-venv/*
+venv/*
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,27 @@
+# See https://pre-commit.com for more information
+# See https://pre-commit.com/hooks.html for more hooks
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.4.0
+    hooks:
+    -   id: trailing-whitespace
+        exclude: "^tests/"
+    -   id: end-of-file-fixer
+        exclude: "^tests/"
+    -   id: check-yaml
+        exclude: "^conda/"
+    -   id: check-json
+    -   id: check-added-large-files
+    -   id: check-merge-conflict
+    -   id: check-symlinks
+    -   id: check-toml
+# Python
+-   repo: https://github.com/psf/black
+    rev: 22.12.0
+    hooks:
+    -   id: black-jupyter
+# Python inside docs
+-   repo: https://github.com/asottile/blacken-docs
+    rev: 1.13.0
+    hooks:
+    -   id: blacken-docs
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -4,7 +4,7 @@ DPDispatcher welcomes every people (or organization) to use under the LGPL-3.0 L
 
 And Contributions are welcome and are greatly appreciated! Every little bit helps, and credit will always be given.
 
-If you want to contribute to dpdispatcher, just open a issue, submiit a pull request , leave a comment on github discussion, or contact deepmodeling team. 
+If you want to contribute to dpdispatcher, just open a issue, submiit a pull request , leave a comment on github discussion, or contact deepmodeling team.
 
 Any forms of improvement are welcome.
 

diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # DPDispatcher
 
-DPDispatcher is a python package used to generate HPC(High Performance Computing) scheduler systems (Slurm/PBS/LSF/dpcloudserver) jobs input scripts and submit these  scripts to HPC systems and poke until they finish.  
+DPDispatcher is a python package used to generate HPC(High Performance Computing) scheduler systems (Slurm/PBS/LSF/dpcloudserver) jobs input scripts and submit these  scripts to HPC systems and poke until they finish.
 
 DPDispatcher will monitor (poke) until these jobs finish and download the results files (if these jobs is running on remote systems connected by SSH).
 

diff --git a/ci/LICENSE b/ci/LICENSE
@@ -27,4 +27,4 @@ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-THE POSSIBILITY OF SUCH DAMAGE.
+THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/ci/ssh/docker-compose.yml b/ci/ssh/docker-compose.yml
@@ -27,4 +27,4 @@ services:
       - server
 
 volumes:
-  ssh_config:
+  ssh_config:
diff --git a/doc/conf.py b/doc/conf.py
@@ -14,14 +14,15 @@
 import sys
 import subprocess
 from datetime import date
+
 # sys.path.insert(0, os.path.abspath('.'))
 
 
 # -- Project information -----------------------------------------------------
 
-project = 'DPDispatcher'
-copyright = '2020-%d, Deep Modeling' % date.today().year
-author = 'Deep Modeling'
+project = "DPDispatcher"
+copyright = "2020-%d, Deep Modeling" % date.today().year
+author = "Deep Modeling"
 
 
 # -- General configuration ---------------------------------------------------
@@ -30,52 +31,66 @@
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = [
-    'deepmodeling_sphinx',
-    'dargs.sphinx',
-    'myst_parser',
+    "deepmodeling_sphinx",
+    "dargs.sphinx",
+    "myst_parser",
     "sphinx_rtd_theme",
-    'sphinx.ext.viewcode',
-    'sphinx.ext.intersphinx',
-    'numpydoc',
-    'sphinx.ext.autosummary'
+    "sphinx.ext.viewcode",
+    "sphinx.ext.intersphinx",
+    "numpydoc",
+    "sphinx.ext.autosummary",
 ]
 
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
 
 
 # -- Options for HTML output -------------------------------------------------
 
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 #
-html_theme = 'sphinx_rtd_theme'
+html_theme = "sphinx_rtd_theme"
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ["_static"]
 html_css_files = []
 
-autodoc_default_flags = ['members']
+autodoc_default_flags = ["members"]
 autosummary_generate = True
-master_doc = 'index'
+master_doc = "index"
+
 
 def run_apidoc(_):
     from sphinx.ext.apidoc import main
-    sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
+
+    sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
     cur_dir = os.path.abspath(os.path.dirname(__file__))
     module = os.path.join(cur_dir, "..", "dpdispatcher")
-    main(['-M', '--tocfile', 'api', '-H', 'DPDispatcher API', '-o', os.path.join(cur_dir, "api"), module, '--force'])
+    main(
+        [
+            "-M",
+            "--tocfile",
+            "api",
+            "-H",
+            "DPDispatcher API",
+            "-o",
+            os.path.join(cur_dir, "api"),
+            module,
+            "--force",
+        ]
+    )
 
 
 def setup(app):
-    app.connect('builder-inited', run_apidoc)
+    app.connect("builder-inited", run_apidoc)
 
 
 intersphinx_mapping = {

diff --git a/doc/dpdispatcher_on_yarn.md b/doc/dpdispatcher_on_yarn.md
@@ -1,6 +1,6 @@
 # Support DPDispatcher on Yarn
 ## Background
-Currently, DPGen(or other DP softwares) supports for HPC systems like Slurm, PBS, LSF and cloud machines. In order to run DPGen jobs on ByteDance internal platform, we need to extend it to support yarn resources. Hadoop Ecosystem is a very commonly used platform to process the big data, and in the process of developing the new interface, we found it can be implemented by only using hadoop opensource components. So for the convenience of the masses, we decided to contribute the codes to opensource community. 
+Currently, DPGen(or other DP softwares) supports for HPC systems like Slurm, PBS, LSF and cloud machines. In order to run DPGen jobs on ByteDance internal platform, we need to extend it to support yarn resources. Hadoop Ecosystem is a very commonly used platform to process the big data, and in the process of developing the new interface, we found it can be implemented by only using hadoop opensource components. So for the convenience of the masses, we decided to contribute the codes to opensource community.
 
 ## Design
 We use DistributedShell and HDFS to implement it. The control flow shows as follows:
@@ -28,7 +28,7 @@ class HDFSContext(BaseContext) :
     none
     """
         pass
-    
+
     def download(self, submission):
     """ download backward files from HDFS root dir
 
@@ -42,7 +42,7 @@ class HDFSContext(BaseContext) :
     none
     """
         pass
-        
+
    def check_file_exists(self, fname):
    """ check whether the given file exists, often used in checking whether the belonging job has finished
 
@@ -74,7 +74,7 @@ class DistributedShell(Machine):
         usually a yarn application id
     """
         pass
-        
+
     def check_status(self, job):
     """ check the yarn job status
 
@@ -86,9 +86,9 @@ class DistributedShell(Machine):
     Returns
     -------
     status: JobStatus
-    """        
+    """
         pass
-    
+
     def gen_script_command(self, job):
     """ Generate the shell script to be executed in DistibutedShell container
 
@@ -101,7 +101,7 @@ class DistributedShell(Machine):
     -------
     script: string
         script command string
-    """          
+    """
         pass
 ```
 
@@ -124,15 +124,15 @@ hadoop fs -test -e /root/uuid/sys-0001-0015/tag_0_finished
   cur_dir=`pwd`
   cd t sys-0001-0015
   test $? -ne 0 && exit 1
-  
+
   ## do your job here
   mpirun -n 32 vasp_std  1>> log 2>> err
-  
+
   if test $? -ne 0; then
       exit 1
   else
       hadoop fs -touchz /root/uuid/sys-0001-0015/tag_0_finished
-  fi 
+  fi
   cd $cur_dir
   test $? -ne 0 && exit 1
 fi }&
@@ -178,4 +178,4 @@ An example of machine.json is as follows, whose batch_type is `DistributedShell`
       }
     }
   ]
-```
+```
diff --git a/doc/getting-started.md b/doc/getting-started.md
@@ -6,8 +6,8 @@ DPDispatcher provides the following classes:
 - {class}`Submission <dpdispatcher.submission.Submission>` class, which represents a collection of jobs defined by the HPC system.
 And there may be common files to be uploaded by them.
 DPDispatcher will create and submit these jobs when a `submission` instance execute {meth}`run_submission <dpdispatcher.submission.Submission.run_submission>` method.
-This method will poke until the jobs finish and return.  
-- {class}`Job <dpdispatcher.submission.Job>` class, a class used by {class}`Submission <dpdispatcher.submission.Submission>` class, which represents a job on the HPC system. 
+This method will poke until the jobs finish and return.
+- {class}`Job <dpdispatcher.submission.Job>` class, a class used by {class}`Submission <dpdispatcher.submission.Submission>` class, which represents a job on the HPC system.
 {class}`Submission <dpdispatcher.submission.Submission>` will generate `job`s' submitting scripts used by HPC systems automatically with the {class}`Task <dpdispatcher.submission.Task>` and {class}`Resources <dpdispatcher.submission.Resources>`
 - {class}`Resources <dpdispatcher.submission.Resources>` class, which represents the computing resources for each job  within a `submission`.
 
@@ -16,24 +16,49 @@ You can use DPDispatcher in a Python script to submit five tasks:
 ```python
 from dpdispatcher import Machine, Resources, Task, Submission
 
-machine = Machine.load_from_json('machine.json')
-resources = Resources.load_from_json('resources.json')
+machine = Machine.load_from_json("machine.json")
+resources = Resources.load_from_json("resources.json")
 
-task0 = Task.load_from_json('task.json')
+task0 = Task.load_from_json("task.json")
 
-task1 = Task(command='cat example.txt', task_work_path='dir1/', forward_files=['example.txt'], backward_files=['out.txt'], outlog='out.txt')
-task2 = Task(command='cat example.txt', task_work_path='dir2/', forward_files=['example.txt'], backward_files=['out.txt'], outlog='out.txt')
-task3 = Task(command='cat example.txt', task_work_path='dir3/', forward_files=['example.txt'], backward_files=['out.txt'], outlog='out.txt')
-task4 = Task(command='cat example.txt', task_work_path='dir4/', forward_files=['example.txt'], backward_files=['out.txt'], outlog='out.txt')
+task1 = Task(
+    command="cat example.txt",
+    task_work_path="dir1/",
+    forward_files=["example.txt"],
+    backward_files=["out.txt"],
+    outlog="out.txt",
+)
+task2 = Task(
+    command="cat example.txt",
+    task_work_path="dir2/",
+    forward_files=["example.txt"],
+    backward_files=["out.txt"],
+    outlog="out.txt",
+)
+task3 = Task(
+    command="cat example.txt",
+    task_work_path="dir3/",
+    forward_files=["example.txt"],
+    backward_files=["out.txt"],
+    outlog="out.txt",
+)
+task4 = Task(
+    command="cat example.txt",
+    task_work_path="dir4/",
+    forward_files=["example.txt"],
+    backward_files=["out.txt"],
+    outlog="out.txt",
+)
 
 task_list = [task0, task1, task2, task3, task4]
 
-submission = Submission(work_base='lammps_md_300K_5GPa/',
-    machine=machine, 
+submission = Submission(
+    work_base="lammps_md_300K_5GPa/",
+    machine=machine,
     resources=resources,
     task_list=task_list,
-    forward_common_files=['graph.pb'], 
-    backward_common_files=[]
+    forward_common_files=["graph.pb"],
+    backward_common_files=[],
 )
 
 submission.run_submission()
@@ -92,12 +117,12 @@ resources = Resources(
     queue_name="GPU_2080Ti",
     group_size=4,
     custom_flags=[
-        "#SBATCH --nice=100", 
+        "#SBATCH --nice=100",
         "#SBATCH --time=24:00:00"
     ],
     strategy={
         # used when you want to add CUDA_VISIBLE_DIVECES automatically
-        "if_cuda_multi_devices": True 
+        "if_cuda_multi_devices": True
     },
     para_deg=1,
     # will unload these modules before running tasks

diff --git a/doc/index.rst b/doc/index.rst
@@ -6,15 +6,15 @@
 DPDispatcher's documentation
 ======================================
 
-DPDispatcher is a Python package used to generate HPC (High Performance Computing) scheduler systems (Slurm/PBS/LSF/dpcloudserver) jobs input scripts and submit these scripts to HPC systems and poke until they finish.  
+DPDispatcher is a Python package used to generate HPC (High Performance Computing) scheduler systems (Slurm/PBS/LSF/dpcloudserver) jobs input scripts and submit these scripts to HPC systems and poke until they finish.
 
-DPDispatcher will monitor (poke) until these jobs finish and download the results files (if these jobs is running on remote systems connected by SSH). 
+DPDispatcher will monitor (poke) until these jobs finish and download the results files (if these jobs is running on remote systems connected by SSH).
 
 .. toctree::
    :maxdepth: 2
    :caption: Contents:
 
-   
+
    install
    getting-started
    context
@@ -33,7 +33,7 @@ DPDispatcher will monitor (poke) until these jobs finish and download the result
 .. toctree::
    :caption: Project details
    :glob:
-   
+
    credits
 
 Indices and tables

diff --git a/dpdispatcher/JobStatus.py b/dpdispatcher/JobStatus.py
@@ -1,12 +1,15 @@
 from enum import IntEnum
-class JobStatus(IntEnum) :
+
+
+class JobStatus(IntEnum):
     unsubmitted = 1
     waiting = 2
     running = 3
     terminated = 4
     finished = 5
     completing = 6
     unknown = 100
+
+
 #     def __str__(self):
 #         return repr(self)
-