Skip to content

Commit

Permalink
feat: dataplugin commands now use consistent logic as dataplugin-hub …
Browse files Browse the repository at this point in the history
…commands
  • Loading branch information
newgene committed Jun 6, 2023
1 parent 48f6837 commit 0ce2653
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 30 deletions.
11 changes: 8 additions & 3 deletions biothings/hub/dataplugin/assistant.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def get_code_for_mod_name(self, mod_name):
try:
pymod = importlib.import_module(modpath)
# self.logger.info("Imported custom module %s for plugin %s", modpath, self.plugin_path_name)
except ImportError:
except (ImportError, TypeError):
# Some data plugins use BioThings generic parser, e.g. CHEBI plugin uses {"parser" : "hub.dataload.data_parsers:load_obo"}
# In such cases, `self.plugin_path_name` is not part of the module path.
pymod = importlib.import_module(mod)
Expand Down Expand Up @@ -337,8 +337,13 @@ def get_uploader_dynamic_class(self, uploader_section, metadata, sub_source_name
try:
from {self.plugin_path_name}.{mod} import {func} as parser_func
except ImportError:
from .{mod} import {func} as parser_func
try:
from .{mod} import {func} as parser_func
except ImportError:
# When relative import fails, try to import it directly
import sys
sys.path.insert(0, ".")
from {mod} import {func} as parser_func
parser_kwargs = {parser_kwargs_serialized}
"""
)
Expand Down
71 changes: 50 additions & 21 deletions biothings/management/dataplugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,21 +74,33 @@ def dump_data(
):
if verbose:
logger.setLevel("DEBUG")

working_dir = pathlib.Path().resolve()
if not utils.is_valid_working_directory(working_dir, logger=logger):
return exit(1)
manifest = utils.get_manifest_content(working_dir)
to_dumps = utils.get_todump_list(manifest.get("dumper"))
for to_dump in to_dumps:
utils.download(
logger,
to_dump["schema"],
to_dump["remote_url"],
to_dump["local_file"],
to_dump["uncompress"],
)
plugin_name = working_dir.name
data_folder = os.path.join(working_dir, ".biothings_hub", "data_folder")

mode = "v2"
if mode == "v1":
manifest = utils.get_manifest_content(working_dir)
to_dumps = utils.get_todump_list(manifest.get("dumper"))
for to_dump in to_dumps:
utils.download(
logger,
to_dump["schema"],
to_dump["remote_url"],
to_dump["local_file"],
to_dump["uncompress"],
)
else:
from biothings.management.utils import do_dump

dumper_manager, uploader_manager = utils.load_plugin(working_dir, data_folder=".")
del uploader_manager
dumper_class = dumper_manager[plugin_name][0]
data_folder = do_dump(dumper_class, plugin_name)

rprint("[green]Success![/green]")
utils.show_dumped_files(data_folder, plugin_name)

Expand All @@ -112,21 +124,32 @@ def upload_source(
):
if verbose:
logger.setLevel("DEBUG")

working_dir = pathlib.Path().resolve()
if not utils.is_valid_working_directory(working_dir, logger=logger):
return exit(1)
plugin_name = working_dir.name
local_archive_dir = os.path.join(working_dir, ".biothings_hub")
data_folder = os.path.join(working_dir, ".biothings_hub", "data_folder")
os.makedirs(local_archive_dir, exist_ok=True)
manifest = utils.get_manifest_content(working_dir)
upload_sections = manifest.get("uploaders")
if not upload_sections:
upload_section = manifest.get("uploader")
upload_sections = [upload_section]

for section in upload_sections:
utils.process_uploader(working_dir, data_folder, plugin_name, section, logger, batch_limit)
mode = "v2"
if mode == "v1":
local_archive_dir = os.path.join(working_dir, ".biothings_hub")
data_folder = os.path.join(working_dir, ".biothings_hub", "data_folder")
os.makedirs(local_archive_dir, exist_ok=True)
manifest = utils.get_manifest_content(working_dir)
upload_sections = manifest.get("uploaders")
if not upload_sections:
upload_section = manifest.get("uploader")
upload_sections = [upload_section]
for section in upload_sections:
utils.process_uploader(working_dir, data_folder, plugin_name, section, logger, batch_limit)
else:
from biothings.management.utils import do_upload

dumper_manager, uploader_manager = utils.load_plugin(working_dir, data_folder=".")
del dumper_manager
uploader_classes = uploader_manager[plugin_name]
do_upload(uploader_classes)

rprint("[green]Success![/green]")
utils.show_uploaded_sources(working_dir, plugin_name)

Expand All @@ -147,7 +170,13 @@ def listing(
plugin_name = working_dir.name
if not utils.is_valid_working_directory(working_dir, logger=logger):
return exit(1)
data_folder = os.path.join(working_dir, ".biothings_hub", "data_folder")
dumper_manager, uploader_manager = utils.load_plugin(working_dir, data_folder=".")
del uploader_manager
dumper_class = dumper_manager[plugin_name][0]
dumper = dumper_class()
dumper.prepare()
utils.run_sync_or_async_job(dumper.create_todump_list, force=True)
data_folder = dumper.new_data_folder
if dump:
utils.show_dumped_files(data_folder, plugin_name)
return
Expand Down
19 changes: 13 additions & 6 deletions biothings/management/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ def create_data_plugin_template(name, multi_uploaders=False, parallelizer=False,
logger.info(f"Successful create data plugin template at: \n {plugin_dir}")


def load_plugin(plugin_name):
def load_plugin(plugin_path, plugin_name=None, data_folder=None):
from biothings import config as btconfig
from biothings.hub.dataload.dumper import DumperManager
from biothings.hub.dataload.uploader import UploaderManager
from biothings.hub.dataplugin.assistant import LocalAssistant
Expand All @@ -99,7 +100,12 @@ def load_plugin(plugin_name):
LocalAssistant.dumper_manager = dmanager
LocalAssistant.uploader_manager = upload_manager

_plugin_path = pathlib.Path(plugin_path).resolve()
btconfig.DATA_PLUGIN_FOLDER = _plugin_path.parent.as_posix()
plugin_name = plugin_name or _plugin_path.name
data_folder = data_folder or f"./{plugin_name}"
assistant = LocalAssistant(f"local://{plugin_name}")
# print(assistant.plugin_name, plugin_name, _plugin_path.as_posix(), btconfig.DATA_PLUGIN_FOLDER)
dp = get_data_plugin()
dp.remove({"_id": assistant.plugin_name})
dp.insert_one(
Expand All @@ -111,8 +117,7 @@ def load_plugin(plugin_name):
"active": True,
},
"download": {
# "data_folder": "/data/biothings_studio/plugins/pharmgkb", # tmp fake
"data_folder": f"./{plugin_name}", # tmp path to your data plugin
"data_folder": data_folder, # tmp path to your data plugin
},
}
)
Expand Down Expand Up @@ -246,8 +251,10 @@ def download(logger, schema, remote_url, local_file, uncompress=True):

def do_dump(dumper_class, plugin_name):
"""Perform dump for the given dumper_class"""
import biothings.hub # this import is to make config is setup before get_data_plugin is imported
from biothings.utils.hub_db import get_data_plugin
from biothings import config
from biothings.utils import hub_db

hub_db.setup(config)

dumper = dumper_class()
dumper.prepare()
Expand All @@ -261,7 +268,7 @@ def do_dump(dumper_class, plugin_name):
# cleanup
# Commented out this line below. we should keep the dump info in src_dump collection for other cmds, e.g. upload, list etc
# dumper.src_dump.remove({"_id": dumper.src_name})
dp = get_data_plugin()
dp = hub_db.get_data_plugin()
dp.remove({"_id": plugin_name})
return dumper.new_data_folder

Expand Down

0 comments on commit 0ce2653

Please sign in to comment.