docs: adding missing doc strings for biothings.cli apidocs

biothings · Jun 21, 2023 · 931be12 · 931be12
1 parent d9c2010
commit 931be12
Show file tree

Hide file tree

Showing 6 changed files with 127 additions and 56 deletions.
diff --git a/biothings/cli/__init__.py b/biothings/cli/__init__.py
@@ -68,6 +68,7 @@
 
 
 def setup_config():
+    """Setup a config module necessary to launch the CLI"""
     working_dir = pathlib.Path().resolve()
     _config = DummyConfig("config")
     _config.HUB_DB_BACKEND = {
@@ -95,6 +96,7 @@ def setup_config():
 
 
 def main():
+    """The main entry point for running the BioThings CLI to test your local data plugins."""
     if not typer_avail:
         logger.error('"typer" package is required for CLI feature. Use "pip install typer[all]" to install.')
         return

diff --git a/biothings/cli/config.py.sample b/biothings/cli/config.py.sample
@@ -1,26 +1,47 @@
 ########################################
 # DATA PLUGIN CONFIGURATION VARIABLES #
 ########################################
-# This is a minimal config.py file that will be used when developing a new data plugin without run a Biothing Hub instance.
+# Typicaly, you don't need to include a config.py module to run the BioThings CLI tool to
+# test your data plugin locally. A default config module is setup at the launch of the CLI.
+# However, you can always include an additional config.py module to override the default
+# config settings, e.g. alternative DATA_ARCHIVE_ROOT, HUB_DB_BACKEND for different db path.
+# The available config settings can be found at biothings.hub.default_config module (note that
+# not all settings are relevant to the CLI)
+
 # This file should be place at the same directory with developed data plugin:
+
+# When using dataplugin-hub sub commands
 # $ ls -al
 #  config.py
-#  data_archive_root
-#  hub_db_folder
-#  your_data_plugin
-
-import os
+#  .biothings_hub
+#     .data_src_database
+#     archive
+#     biothings_hubdb
+#  your_data_plugin_folder
+#     manifest.json
+#     parser.py
 
-from biothings.utils.loggers import setup_default_log
-from biothings.web.settings.default import *
+# When using dataplugin sub commands inside a data plugin folder
+# $ ls -al
+#  config.py
+#  .biothings_hub
+#     .data_src_database
+#     archive
+#     biothings_hubdb
+#  manifest.json
+#  parser.py
 
-DATA_SRC_DATABASE = 'data_src_database'
-DATA_SRC_DUMP_COLLECTION = 'data_src_dump_collection'
+DATA_SRC_DATABASE = '.data_src_database'
 DATA_HUB_DB_DATABASE = 'data_hub_db_database'
 HUB_DB_BACKEND = {
     "module": "biothings.utils.sqlite3",
-    "sqlite_db_folder": "./hub_db_folder"
+    "sqlite_db_folder": ".biothings_hub""
 }
-DATA_ARCHIVE_ROOT = "./data_archive_root"
-LOG_FOLDER = os.path.join(DATA_ARCHIVE_ROOT, "logs")
-logger = setup_default_log("hub", LOG_FOLDER)
+DATA_ARCHIVE_ROOT = ".biothings_hub/archive"
+
+# Add new entry in DOCKER_CONFIG if you want to use a different docker host for your
+# docker-based data plugin, other than the default docker host running on your localhost.
+# DOCKER_CONFIG = {
+#     "docker1": {"tls_cert_path": None, "tls_key_path": None, "client_url": ""},
+#     "localhost": {"client_url": "unix://var/run/docker.sock"},
+# }
diff --git a/biothings/cli/dataplugin.py b/biothings/cli/dataplugin.py
@@ -1,22 +1,13 @@
 from typing import Optional
 
 import typer
-from rich import print as rprint
 from typing_extensions import Annotated
 
 from biothings.cli import utils
 
 logger = utils.get_logger("dataplugin")
 
 
-def extra_help_msg():
-    """print additional help msg here"""
-    rprint("[magenta]:sparkles: Always run this command inside of your data plugin folder. [/magenta]")
-    rprint(
-        "[magenta]:sparkles: To override the default biothing.config, please define the config.py at the working directory. :rocket::boom:[/magenta]"
-    )
-
-
 short_help = (
     "[green]Test an individual data plugin locally and make simple queries to inspect your parsed data objects.[/green]"
 )
@@ -57,6 +48,7 @@ def create_data_plugin(
         typer.Option("--parallelizer", help="If provided, the data plugin's upload step will run in parallel"),
     ] = False,
 ):
+    """*create* command for creating a new data plugin from the template"""
     utils.do_create(name, multi_uploaders, parallelizer, logger=logger)
 
 
@@ -65,6 +57,7 @@ def create_data_plugin(
     help="Download source data files to local",
 )
 def dump_data():
+    """*dump* command for downloading source data files to local"""
     utils.do_dump(plugin_name=None, logger=logger)
 
 
@@ -81,6 +74,8 @@ def upload_source(
         ),
     ] = None,
 ):
+    """*upload* command for converting downloaded data from dump step into JSON documents and upload the to the source database.
+    A local sqlite database used to store the uploaded data"""
     utils.do_upload(plugin_name=None, logger=logger)
 
 
@@ -96,6 +91,8 @@ def dump_and_upload(
     #     False, "--parallelizer", help="Using parallelizer or not? Default: No"
     # ),
 ):
+    """*dump_and_upload* command for downloading source data files to local, then converting them into JSON documents and uploading them to the source database.
+    Two steps in one command."""
     utils.do_dump_and_upload(plugin_name=None, logger=logger)
 
 
@@ -108,6 +105,7 @@ def listing(
     upload: Annotated[Optional[bool], typer.Option("--upload", help="Listing uploaded sources")] = False,
     hubdb: Annotated[Optional[bool], typer.Option("--hubdb", help="Listing internal hubdb content")] = False,
 ):
+    """*list* command for listing dumped files and/or uploaded sources"""
     utils.do_list(plugin_name=None, dump=dump, upload=upload, hubdb=hubdb, logger=logger)
 
 
@@ -161,6 +159,8 @@ def inspect_source(
         ),
     ] = None,
 ):
+    """*inspect* command for giving detailed information about the structure of documents coming from the parser after the upload step"""
+
     utils.do_inspect(
         plugin_name=None,
         sub_source_name=sub_source_name,
@@ -191,21 +191,24 @@ def serve(
     ] = 9999,
 ):
     """
-    Run the simple API server for serving documents from the source database, \n
-    Support pagination by using: start=&limit= \n
-    Support filtering by document keys, for example:\n
-    After run 'dump_and_upload', we have a source_name = "test"\n
-    doc = {"_id": "123", "key": {"a":{"b": "1"},"x":[{"y": "3", "z": "4"}, "5"]}}.\n
-    - You can see all available sources on the index page: http://host:port/
-    - You can list all docs by:\n
-    http://host:port/<your source name>/\n
-    http://host:port/<your source name>/start=10&limit=10\n
-    - You can filter out this doc by:\n
-    http://host:port/<your source name>/?key.a.b=1 (find all docs that have nested dict keys a.b)\n
-    http://host:port/<your source name>/?key.x.y=3 (find all docs that have mixed type dict-list)\n
-    http://host:port/<your source name>/?key.x.z=4\n
-    http://host:port/<your source name>/?key.x=5\n
-    - Or you can retrieve this doc by: http://host:port/<your source name>/123/\n
+    *serve* command runs a simple API server for serving documents from the source database.
+
+    For example, after run 'dump_and_upload', we have a source_name = "test" with a document structure
+    like this:
+
+    doc = {"_id": "123", "key": {"a":{"b": "1"},"x":[{"y": "3", "z": "4"}, "5"]}}.
+
+    An API server will run at http://host:port/<your source name>/, like http://localhost:9999/test/:
+
+        - You can see all available sources on the index page: http://localhost:9999/
+        - You can list all docs: http://localhost:9999/test/ (default is to return the first 10 docs)
+        - You can paginate doc list: http://localhost:9999/test/?start=10&limit=10
+        - You can retrieve a doc by id: http://localhost:9999/test/123
+        - You can filter out docs with one or multiple fielded terms:
+            - http://localhost:9999/test/?q=key.a.b:1 (query by any field with dot notation like key.a.b=1)
+            - http://localhost:9999/test/?q=key.a.b:1%20AND%20key.x.y=3 (find all docs that match two fields)
+            - http://localhost:9999/test/?q=key.x.z:4*  (field value can contain wildcard * or ?)
+            - http://localhost:9999/test/?q=key.x:5&start=10&limit=10 (pagination also works)
     """
     utils.do_serve(plugin_name=None, host=host, port=port, logger=logger)
 
@@ -226,4 +229,5 @@ def clean_data(
         ),
     ] = False,
 ):
+    """*clean* command for deleting all dumped files and/or drop uploaded sources tables"""
     utils.do_clean(plugin_name=None, dump=dump, upload=upload, clean_all=clean_all, logger=logger)
diff --git a/biothings/cli/dataplugin_hub.py b/biothings/cli/dataplugin_hub.py
@@ -46,6 +46,7 @@ def create_data_plugin(
         typer.Option("--parallelizer", help="If provided, the data plugin's upload step will run in parallel"),
     ] = False,
 ):
+    """*create* command for creating a new data plugin from the template"""
     utils.do_create(name, multi_uploaders, parallelizer, logger=logger)
 
 
@@ -59,6 +60,7 @@ def dump_data(
         typer.Option("--name", "-n", help="Provide a data plugin name", prompt="What's your data plugin name?"),
     ] = "",
 ):
+    """*dump* command for downloading source data files to local"""
     utils.do_dump(plugin_name, logger=logger)
 
 
@@ -85,6 +87,8 @@ def upload_source(
     #     False, "--parallelizer", help="Using parallelizer or not? Default: No"
     # ),
 ):
+    """*upload* command for converting downloaded data from dump step into JSON documents and upload the to the source database.
+    A local sqlite database used to store the uploaded data"""
     utils.do_upload(plugin_name, logger=logger)
 
 
@@ -104,6 +108,8 @@ def dump_and_upload(
     #     False, "--parallelizer", help="Using parallelizer or not? Default: No"
     # ),
 ):
+    """*dump_and_upload* command for downloading source data files to local, then converting them into JSON documents and uploading them to the source database.
+    Two steps in one command."""
     utils.do_dump_and_upload(plugin_name, logger=logger)
 
 
@@ -120,6 +126,7 @@ def listing(
     upload: Annotated[Optional[bool], typer.Option("--upload", help="Listing uploaded sources")] = False,
     hubdb: Annotated[Optional[bool], typer.Option("--hubdb", help="Listing internal hubdb content")] = False,
 ):
+    """*list* command for listing dumped files and/or uploaded sources"""
     utils.do_list(plugin_name, dump, upload, hubdb, logger=logger)
 
 
@@ -177,6 +184,7 @@ def inspect_source(
         ),
     ] = None,
 ):
+    """*inspect* command for giving detailed information about the structure of documents coming from the parser after the upload step"""
     utils.do_inspect(
         plugin_name=plugin_name,
         sub_source_name=sub_source_name,
@@ -211,21 +219,24 @@ def serve(
     ] = 9999,
 ):
     """
-    Run the simple API server for serving documents from the source database, \n
-    Support pagination by using: start=&limit= \n
-    Support filtering by document keys, for example:\n
-    After run 'dump_and_upload', we have a source_name = "test"\n
-    doc = {"_id": "123", "key": {"a":{"b": "1"},"x":[{"y": "3", "z": "4"}, "5"]}}.\n
-    - You can see all available sources on the index page: http://host:port/
-    - You can list all docs by:\n
-    http://host:port/<your source name>/\n
-    http://host:port/<your source name>/start=10&limit=10\n
-    - You can filter out this doc by:\n
-    http://host:port/<your source name>/?key.a.b=1 (find all docs that have nested dict keys a.b)\n
-    http://host:port/<your source name>/?key.x.y=3 (find all docs that have mixed type dict-list)\n
-    http://host:port/<your source name>/?key.x.z=4\n
-    http://host:port/<your source name>/?key.x=5\n
-    - Or you can retrieve this doc by: http://host:port/<your source name>/123/\n
+    *serve* command runs a simple API server for serving documents from the source database.
+
+    For example, after run 'dump_and_upload', we have a source_name = "test" with a document structure
+    like this:
+
+    doc = {"_id": "123", "key": {"a":{"b": "1"},"x":[{"y": "3", "z": "4"}, "5"]}}.
+
+    An API server will run at http://host:port/<your source name>/, like http://localhost:9999/test/:
+
+        - You can see all available sources on the index page: http://localhost:9999/
+        - You can list all docs: http://localhost:9999/test/ (default is to return the first 10 docs)
+        - You can paginate doc list: http://localhost:9999/test/?start=10&limit=10
+        - You can retrieve a doc by id: http://localhost:9999/test/123
+        - You can filter out docs with one or multiple fielded terms:
+            - http://localhost:9999/test/?q=key.a.b:1 (query by any field with dot notation like key.a.b=1)
+            - http://localhost:9999/test/?q=key.a.b:1%20AND%20key.x.y=3 (find all docs that match two fields)
+            - http://localhost:9999/test/?q=key.x.z:4*  (field value can contain wildcard * or ?)
+            - http://localhost:9999/test/?q=key.x:5&start=10&limit=10 (pagination also works)
     """
     utils.do_serve(plugin_name=plugin_name, host=host, port=port, logger=logger)
 
@@ -250,4 +261,5 @@ def clean_data(
         ),
     ] = False,
 ):
+    """*clean* command for deleting all dumped files and/or drop uploaded sources tables"""
     utils.do_clean(plugin_name, dump=dump, upload=upload, clean_all=clean_all, logger=logger)