Skip to content

Commit

Permalink
Merge pull request #173 from databio/dev
Browse files Browse the repository at this point in the history
Release 0.9.1
  • Loading branch information
nsheff committed May 1, 2020
2 parents b87f901 + 9d0dfa8 commit 9ced533
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 28 deletions.
9 changes: 9 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,15 @@

This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format.

## [0.9.1] - 2020-05-01

### Added
- added option (`-f`/`--force`) to confirm assets overwriting upfront in `refgenie add` add `refgenie pull`

### Changed
- fixed bug in hisat2_index that pointed to the parent folder. The seek key now points to the folder/{genome}, as expected by the tool
- fixed bug in bwa_index that pointed to the parent folder. The seek key now points to the folder/{genome}.fa, as expected by the tool

## [0.9.0] - 2020-03-17

### Changed
Expand Down
6 changes: 5 additions & 1 deletion docs/faq.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# FAQ

## Why isn't the refgenie executable available on PATH?

By default, Python command-line programs are installed to ~/.local/bin. You can add that location to your path by appending it (export PATH=$PATH:~/.local/bin). Add this to your `.bashrc` if you want it to persist.

## Can I use `refgenie` with my own genome resources I've already set up?

Yes, you can. Of course, one of refgenie's strengths is that it makes it easy to start a new genome resource folder from scratch. But if you've already set yours up and want to use *other* parts of the refgenie system (like the Python API, for instance), you can also do that. All you need to do is write your assets into your genome configuration file, which is easy using [refgenie add](custom_assets).
Expand All @@ -18,4 +22,4 @@ Yes, this is now the recommended way to use refgenie for groups. Starting with r

## How can I track how a downloaded asset was created?

Starting with the server API `v2`, you can use an endpoint that will provide a detailed log output: `/v2/asset/{genome}/{asset}/log`. This log file specifies exactly how the asset was created.
Starting with the server API `v2`, you can use an endpoint that will provide a detailed log output: `/v2/asset/{genome}/{asset}/log`. This log file specifies exactly how the asset was created.
2 changes: 1 addition & 1 deletion refgenie/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.9.0"
__version__ = "0.9.1"
4 changes: 2 additions & 2 deletions refgenie/asset_build_packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@
"bwa_index": {
DESC: "Genome index for Burrows-Wheeler Alignment Tool, produced with bwa index",
ASSETS: {
"bwa_index": "."
"bwa_index": "{genome}.fa"
},
REQ_FILES: [],
REQ_ASSETS: [
Expand All @@ -171,7 +171,7 @@
"hisat2_index": {
DESC: "Genome index for HISAT2, produced with hisat2-build",
ASSETS: {
"hisat2_index": "."
"hisat2_index": "{genome}"
},
REQ_FILES: [],
REQ_ASSETS: [
Expand Down
66 changes: 42 additions & 24 deletions refgenie/refgenie.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,14 +129,15 @@ def add_subparser(cmd, description):
help="One or more registry path strings that identify assets (e.g. hg38/fasta or hg38/fasta:tag"
+ (" or hg38/fasta.fai:tag)." if cmd == GET_ASSET_CMD else ")."))

for cmd in [PULL_CMD, REMOVE_CMD, INSERT_CMD]:
sps[cmd].add_argument(
"-f", "--force", action="store_true",
help="Do not prompt before action, approve upfront.")

sps[PULL_CMD].add_argument(
"-u", "--no-untar", action="store_true",
help="Do not extract tarballs.")

sps[REMOVE_CMD].add_argument(
"-f", "--force", action="store_true",
help="Do not prompt before removal, approve.")

sps[INSERT_CMD].add_argument(
"-p", "--path", required=True,
help="Relative local path to asset.")
Expand Down Expand Up @@ -236,26 +237,35 @@ def get_asset_vars(genome, asset_key, tag, outfolder, specific_args=None, specif
return asset_vars


def refgenie_add(rgc, asset_dict, path):
def refgenie_add(rgc, asset_dict, path, force=False):
"""
Add an external asset to the config.
File existence is checked and asset files are transferred to the selected tag subdirectory
File existence is checked and asset files are transferred to the selected
tag subdirectory
:param refgenconf.RefGenConf rgc: genome configuration object
:param dict asset_dict: a single parsed registry path
:param str path: the path provided by the user. Must be relative to the specific genome directory
:param str path: the path provided by the user. Must be relative to the
specific genome directory
:param bool force: whether the replacement of a possibly existing asset
should be forced
"""
# remove the first directory from the provided path if it is the genome name
path = os.path.join(*path.split(os.sep)[1:]) if path.split(os.sep)[0] == asset_dict["genome"] else path
tag = asset_dict["tag"] or rgc.get_default_tag(asset_dict["genome"], asset_dict["asset"])
outfolder = os.path.abspath(os.path.join(rgc[CFG_FOLDER_KEY], asset_dict["genome"]))
path = os.path.join(*path.split(os.sep)[1:]) \
if path.split(os.sep)[0] == asset_dict["genome"] else path
tag = asset_dict["tag"] \
or rgc.get_default_tag(asset_dict["genome"], asset_dict["asset"])
outfolder = \
os.path.abspath(os.path.join(rgc[CFG_FOLDER_KEY], asset_dict["genome"]))
abs_asset_path = os.path.join(outfolder, path)
if asset_dict["seek_key"] is None:
# if seek_key is not specified we're about to move a directory to the tag subdir
# if seek_key is not specified we're about to move a directory to
# the tag subdir
tag_path = os.path.join(abs_asset_path, tag)
from shutil import copytree as cp
else:
# if seek_key is specified we're about to move just a single file to the tag subdir
# if seek_key is specified we're about to move just a single file to
# he tag subdir
tag_path = os.path.join(os.path.dirname(abs_asset_path), tag)
if not os.path.exists(tag_path):
os.makedirs(tag_path)
Expand All @@ -264,24 +274,32 @@ def refgenie_add(rgc, asset_dict, path):
if not os.path.exists(tag_path):
cp(abs_asset_path, tag_path)
else:
if not query_yes_no("Path '{}' exists. Do you want to overwrite?".format(tag_path)):
if not force and not \
query_yes_no("Path '{}' exists. Do you want to overwrite?".
format(tag_path)):
return False
else:
_remove(tag_path)
cp(abs_asset_path, tag_path)
else:
raise OSError("Absolute path '{}' does not exist. The provided path must be relative to: {}".
raise OSError("Absolute path '{}' does not exist. "
"The provided path must be relative to: {}".
format(abs_asset_path, rgc[CFG_FOLDER_KEY]))
rgc.make_writable()
gat_bundle = [asset_dict["genome"], asset_dict["asset"], tag]
rgc.update_tags(*gat_bundle,
data={CFG_ASSET_PATH_KEY: path if os.path.isdir(abs_asset_path) else os.path.dirname(path)})
td = {CFG_ASSET_PATH_KEY:
path if os.path.isdir(abs_asset_path) else os.path.dirname(path)}
rgc.update_tags(*gat_bundle, data=td)
# seek_key points to the entire dir if not specified
seek_key_value = os.path.basename(abs_asset_path) if asset_dict["seek_key"] is not None else "."
rgc.update_seek_keys(*gat_bundle, keys={asset_dict["seek_key"] or asset_dict["asset"]: seek_key_value})
seek_key_value = os.path.basename(abs_asset_path) \
if asset_dict["seek_key"] is not None else "."
sk = {asset_dict["seek_key"] or asset_dict["asset"]: seek_key_value}
rgc.update_seek_keys(*gat_bundle, keys=sk)
rgc.set_default_pointer(asset_dict["genome"], asset_dict["asset"], tag)
# a separate update_tags call since we want to use the get_asset method that requires a complete asset entry in rgc
rgc.update_tags(*gat_bundle, data={CFG_ASSET_CHECKSUM_KEY: get_dir_digest(_seek(rgc, *gat_bundle))})
# a separate update_tags call since we want to use the get_asset method
# that requires a complete asset entry in rgc
td = {CFG_ASSET_CHECKSUM_KEY: get_dir_digest(_seek(rgc, *gat_bundle))}
rgc.update_tags(*gat_bundle, data=td)
# Write the updated refgenie genome configuration
rgc.write()
rgc.make_readonly()
Expand Down Expand Up @@ -635,10 +653,11 @@ def main():
if len(asset_list) > 1:
raise NotImplementedError("Can only add 1 asset at a time")
else:
refgenie_add(rgc, asset_list[0], args.path)
refgenie_add(rgc, asset_list[0], args.path, args.force)

elif args.command == PULL_CMD:
rgc = RefGenConf(filepath=gencfg, writable=False)
force = None if not args.force else True
outdir = rgc[CFG_FOLDER_KEY]
if not os.path.exists(outdir):
raise MissingFolderError(outdir)
Expand All @@ -651,7 +670,8 @@ def main():
return

for a in asset_list:
rgc.pull(a["genome"], a["asset"], a["tag"],unpack=not args.no_untar)
rgc.pull(a["genome"], a["asset"], a["tag"],
unpack=not args.no_untar, force=force)

elif args.command in [LIST_LOCAL_CMD, LIST_REMOTE_CMD]:
rgc = RefGenConf(filepath=gencfg, writable=False)
Expand Down Expand Up @@ -707,8 +727,6 @@ def main():
_LOGGER.info("Removed an incomplete asset '{}/{}:{}'".
format(*bundle))
return
else:
_seek(rgc, *bundle, enclosing_dir=True)
except (KeyError, MissingAssetError, MissingGenomeError):
_LOGGER.info("Asset '{}/{}:{}' does not exist".format(*bundle))
return
Expand Down

0 comments on commit 9ced533

Please sign in to comment.