Skip to content

Commit

Permalink
Apply Black style
Browse files Browse the repository at this point in the history
  • Loading branch information
bede committed Nov 16, 2023
1 parent a9f8385 commit 4f3720d
Show file tree
Hide file tree
Showing 5 changed files with 562 additions and 402 deletions.
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ An `x86_64` Miniconda installation is required in order to install Vespasian.
### Development install

```bash
conda create -y -n vespasian-dev python=3.11 paml==4.10.6 pytest -c conda-forge -c bioconda
conda create -y -n vespasian-dev python=3.11 paml==4.10.6 pytest black pre-commit -c conda-forge -c bioconda
conda activate vespasian-dev
git clone https://github.com/bede/vespasian
pip install --editable ./vespasian
Expand Down Expand Up @@ -136,9 +136,9 @@ e.g. `vespasian codeml-setup --progress --warnings --branches branches.yml input
- File `codeml-commands.sh` containing list of commands to execute the model tests
- File `Snakefile` for running the contents of `codeml-commands.sh` locally or using a cluster



N.B. By default, at least two taxa must be present within a given family for a named internal node to be labelled. Use `--strict` to skip named internal nodes unless all child leaf nodes are present.

N.B. By default, at least two taxa must be present within a given family for a named internal node to be labelled. Use `--strict` to skip named internal nodes unless all child leaf nodes are present.

```
$ vespasian codeml-setup -h
Expand Down Expand Up @@ -180,12 +180,12 @@ e.g. `cd codeml && snakemake --cores 8`
- Ensure `codeml` binary is present inside `$PATH`
- Using PAML version `4.9=h01d97ff_5` from Conda is recommended
- `cd codeml` (the directory created by `codeml-setup` in step 2)
- *Local execution (for small jobs)*
- *Local execution (for small jobs)*
- `snakemake -k --cores 8` (recommended)
- Or, using GNU parallel (*not* recommended – doesn't catch errors!)
- `parallel --bar :::: codeml-commands.sh`
- *Cluster execution*

- `snakemake -k --cores MAXJOBS --cluster OPTIONS`
- SGE example:
- `snakemake -k --jobs 100 --cluster "qsub -cwd -V" --max-status-checks-per-second 0.1`
Expand Down Expand Up @@ -229,4 +229,4 @@ optional arguments:
- [ ] Specify site and/or branch-site models only
- [ ] Renaming:
- [ ] `infer-gene-trees` -> `infer-trees`
- [ ] Consider B-H correction
- [ ] Consider B-H correction
2 changes: 1 addition & 1 deletion vespasian/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.5.3'
__version__ = "0.5.3"
210 changes: 110 additions & 100 deletions vespasian/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,134 +10,144 @@
from vespasian import vespasian, util, __version__



def configure_warnings(show_warnings):
'''Show or suppress warnings, mainly for TreeSwift Tree.mrca() operations'''
"""Show or suppress warnings, mainly for TreeSwift Tree.mrca() operations"""
if show_warnings:
warnings.filterwarnings('always')
warnings.filterwarnings("always")
else:
warnings.filterwarnings('ignore')


def infer_gene_trees(input: 'path to directory containing gene families',
tree: 'path to newick formatted species tree',
output: 'path to output directory' = 'gene-trees',
separator: 'character separating taxon name and identifier(s)' = '|',
warnings: 'show warnings' = False,
progress: 'show progress bar' = False):
'''Create gene trees by pruning a given species tree'''
warnings.filterwarnings("ignore")


def infer_gene_trees(
input: "path to directory containing gene families",
tree: "path to newick formatted species tree",
output: "path to output directory" = "gene-trees",
separator: "character separating taxon name and identifier(s)" = "|",
warnings: "show warnings" = False,
progress: "show progress bar" = False,
):
"""Create gene trees by pruning a given species tree"""
configure_warnings(warnings)
vespasian.infer_gene_trees(input, tree, output, separator, progress)


def codeml_setup(input: 'path to directory containing aligned gene families',
gene_trees: 'path to directory containing gene trees',
branches: 'path to yaml file containing branches to be labelled' = None,
output: 'path to output directory' = 'codeml',
separator: 'character separating taxon name and identifier(s)' = '|',
strict: 'label only branches with all taxa present in tree (default is >= 2)' = False,
threads: 'number of parallel workers' = ceil(os.cpu_count()/2),
warnings: 'show warnings' = False,
progress: 'show progress bar' = False):
'''Create suite of branch and branch-site codeml environments'''
def codeml_setup(
input: "path to directory containing aligned gene families",
gene_trees: "path to directory containing gene trees",
branches: "path to yaml file containing branches to be labelled" = None,
output: "path to output directory" = "codeml",
separator: "character separating taxon name and identifier(s)" = "|",
strict: "label only branches with all taxa present in tree (default is >= 2)" = False,
threads: "number of parallel workers" = ceil(os.cpu_count() / 2),
warnings: "show warnings" = False,
progress: "show progress bar" = False,
):
"""Create suite of branch and branch-site codeml environments"""
configure_warnings(warnings)
if not branches:
print('No branch file supplied. Branch-site tests will not be configured.', file=sys.stderr)
vespasian.codeml_setup(input, gene_trees, branches, output, separator, strict, threads, progress)


def report(input: 'path to codeml-setup output directory',
output: 'path to output directory' = 'report-codeml',
hide: 'hide gratuitous emperor portrait' = False,
progress: 'show progress bar' = False):
'''Perform likelihood ratio tests and and report positively selected sites'''
print(
"No branch file supplied. Branch-site tests will not be configured.",
file=sys.stderr,
)
vespasian.codeml_setup(
input, gene_trees, branches, output, separator, strict, threads, progress
)


def report(
input: "path to codeml-setup output directory",
output: "path to output directory" = "report-codeml",
hide: "hide gratuitous emperor portrait" = False,
progress: "show progress bar" = False,
):
"""Perform likelihood ratio tests and and report positively selected sites"""
vespasian.report(input, output, progress)
if not hide:
print(''' ,*//(##((##((*((,.
*##/(###(*/(#(//(/((###%%%%#/
/(#(#((*****/**////*,,*(**/*((%%%%%%/
///(/***,/***/**,**,,*//////**/(((###%%&%%%(.
.((///,**,*//,*/*,***/****////(//((((##%%%%%%%%#%%#.
((((/(///****,*,..,,,,,*,,,,,,,,,,,*////(###%%%%###%&%#
###(//**,,,**,,......,.,.......,,,,,,,,,**((((#####%((###%##
###(//***,,**/,,......,.,.............,,,,,,/*/((#(/((##%%###%#%.
./(#(//*,,**,,,.. .....................,,,,,,,,***/(((#(/////(#((###/
.(((((//*,,*,,,,.......,,,,,,,.,,.........,,,,,,,****//((((*/%#((/((%(#(
*//((/****,,,,,,..,,,,,,,.,...,,,,,.....,,,,,,,,*,**//*/((((///*//((((/#(%
,//((/*//**,,,,...,,,,,,,,,,,,,*,,...,.,,,,,***,,,,,**//((/((*(*(/##*(#/(#(/
,/(#((/***/**,,..,..,.........,......,,.,,,,,*,,,*,,***//////#*,*#*(*/&%*/%(%
*//#%#(*,***..,,.......,,......,.,.,,,,,,,,,,,,*****/(////(/((/////(**(///%
.(((%%#(/*,,.,,,,..........,...,,,,,....,,,,,,,,******//((#(/(////((/#%(*(//#*
.(##%#(*,...,,,,,.......,....,,,,..,,,,,,,,,**,,,***/(/####((/**,#%%(###*//#
.#//%&%##(*.,,,,,,,,,,.....,,..,,,,,,,**,,,,,,,,,,,*/*(((#(((////*/###((#&@(//
%(&%#%#/*,,.,.,,,**,,,,,*,,,,,,,,,,,,,,*****,******(###//**//*//((*#(&%&(%%
.&%/&%%#%/*/*/**,,,*****,,,,,,*/**//#%%%%##/***(///#/(###(/***(//(/*#/#%%/#
#@/##%%(%%/,,,.,,,/&@&&(/****,,*#&@&%(//*,,,,,,,*/(#(#%#((**//(((//(&&%%#(%#/
./%/%##(##***,**,,,..#@@,...,*##%(///#&&&%##(((/(#%%%#(/(////(//##%/(*%&
,#&%#/**&&&&&&(,,*#%&**/(*,,,***,,,*%%/,,,,,*(%&&%##/,////***/**/(&%,.,,*%#*
print(
""" ,*//(##((##((*((,.
*##/(###(*/(#(//(/((###%%%%#/
/(#(#((*****/**////*,,*(**/*((%%%%%%/
///(/***,/***/**,**,,*//////**/(((###%%&%%%(.
.((///,**,*//,*/*,***/****////(//((((##%%%%%%%%#%%#.
((((/(///****,*,..,,,,,*,,,,,,,,,,,*////(###%%%%###%&%#
###(//**,,,**,,......,.,.......,,,,,,,,,**((((#####%((###%##
###(//***,,**/,,......,.,.............,,,,,,/*/((#(/((##%%###%#%.
./(#(//*,,**,,,.. .....................,,,,,,,,***/(((#(/////(#((###/
.(((((//*,,*,,,,.......,,,,,,,.,,.........,,,,,,,****//((((*/%#((/((%(#(
*//((/****,,,,,,..,,,,,,,.,...,,,,,.....,,,,,,,,*,**//*/((((///*//((((/#(%
,//((/*//**,,,,...,,,,,,,,,,,,,*,,...,.,,,,,***,,,,,**//((/((*(*(/##*(#/(#(/
,/(#((/***/**,,..,..,.........,......,,.,,,,,*,,,*,,***//////#*,*#*(*/&%*/%(%
*//#%#(*,***..,,.......,,......,.,.,,,,,,,,,,,,*****/(////(/((/////(**(///%
.(((%%#(/*,,.,,,,..........,...,,,,,....,,,,,,,,******//((#(/(////((/#%(*(//#*
.(##%#(*,...,,,,,.......,....,,,,..,,,,,,,,,**,,,***/(/####((/**,#%%(###*//#
.#//%&%##(*.,,,,,,,,,,.....,,..,,,,,,,**,,,,,,,,,,,*/*(((#(((////*/###((#&@(//
%(&%#%#/*,,.,.,,,**,,,,,*,,,,,,,,,,,,,,*****,******(###//**//*//((*#(&%&(%%
.&%/&%%#%/*/*/**,,,*****,,,,,,*/**//#%%%%##/***(///#/(###(/***(//(/*#/#%%/#
#@/##%%(%%/,,,.,,,/&@&&(/****,,*#&@&%(//*,,,,,,,*/(#(#%#((**//(((//(&&%%#(%#/
./%/%##(##***,**,,,..#@@,...,*##%(///#&&&%##(((/(#%%%#(/(////(//##%/(*%&
,#&%#/**&&&&&&(,,*#%&**/(*,,,***,,,*%%/,,,,,*(%&&%##/,////***/**/(&%,.,,*%#*
,#&%#(*. %@(,.. ..***,..,*//,,,,,,*,...,,,*(///,,,*,**,,*/(//(%*.,(%&&%%/
**. (#(/,..../#,...***,.*/*,,,,/(//,,,,,/##(((##(*,,..,,,,,**/*/(##&*,#@&*(((#%
/@@#,/(*,.....,/***,. .,/(,,,,*(//**,,,.,,,,,,,,,,,,,,,,,,,*////#%%%,/#,,*(/**(
.*&@&%(*.,............,*(/,,.,*#(//**,,,,,,.,,.,,,,,*******/(/(##%#**%,.,*(*,,*
..*(&(**,............,//*,.,,*((#(*,.,,,,.,...,,,,,,***//((###%&%/,//**,/#/(&
.,/%(/*............,(*,,,,,*(%((/*,,,......,,,,,,***((###%%%&%%,**#.,*/%((/
,,%(/***,........,(#**,,**(##((((,,,,.....,,,,****/(#%%%%%&%###@@%*,*(###
#./%/**,,,,,.,..,,&%/***/(%%%((##,**,,,,.,,,,,,**//#%%%%%%%%##(**(#*((*#
%#@(*****,,,,,,,.&@@@&%@@&@@@@&/.*/**,,,,**//////(#%%%#%(##/,,*%#/*(
.%/****,,***,.,#@@@@@@@@&%(*...,*///****/(((/((########/(##*,.,*(#(
,(#&/*(/*,,*,..,,*%&@@@@&/........,,*//***//##(/((#%#((##(#%#,*,,(%,
(#&(*((,,,,,..****(((/,.,,**,..,,,,,*/////(###(/(##(/#((/#&&&&&%%
,//#,......,*/**,,........,*/((#*,,,*((#%%(//(#(/(%((#&@@@#*
.&(/%/,.....,,,**/((####(///*,,..,,,,*(##%%(/(((((##((%@@&%
#%(%#*,,..,**,,..,*///(/****,*,,,,,,/(#%%#(///(/#%##%&@&%.
%%#@%*,,****,,,.,,,,,,.,..,**,,,,,*/#%&%%(///(%%&%%&@&%%
@%&@&/,,,.......,..,......***,.,*/(#%%%#(/(##&&&&@@@%#*
,@&&@@&(,,,..................,,,(%%&&%#(/(#%&&&&@@&(#%
*@@@@&@@@#*,,,,,,.,...,.,,,,***(%%&&&%###%&&@@@@@%(/#(
@&@@@@&@@@@@###%#%%##%%%###%&&@@@&%#%%%&@@&&&%//##&.
,%##&&&&&@@@@@@&@&@&@@@@@@@@@@@@@@&&&%%%&@@&%%%/(/#%%&
/**/*/%&&&&&@@@@@@@@@@@@@@@@@@@@&%%#%%&@&&%#(**,/#%%&/
#(,*///(#&&&&&&@@&&&&&&&&&&&&&%%##%%&&%%%#(*,,**(###%,
//#(,,,*///#%&&&&&&&&&&&&%%%%%%%#%%%%%%%(/,,,,,,/((##&
*(///*,,,,////((##%%%#%%&&%%#########/*,,,,,.,,*(##%&&
##/****,...,**/******,,*,,*/((///////,,,,,,,,,/(##%&&&
///(/*,,,,,,,..,,,,,,,*,,,,,,,,,,,,,*,,,,,,,,**(###%&@&
(*,*/*,,,..,,,,....,,,*//***,,,,,,,,,,,,,,,,**/#%#%%&&&
.#,,,,,***,,.,,,,,,..,,,,,,,....,,,,,,,,,,,,,**#%%&&&&&%/
./*,,,,**,,,,,..,,,,,,,,,,,.,,,,,,,,,,,,,***((((%%%&&%#/
,.,,*,,,,.,,...,,.,,,,*,**,,,,,*,*,,,,***//(#%##%%#(,
,,,,,,,,,,,...,*,,.,,..,....,,,.,,,*****((###%##((#
,.,,*...,......,,,,***,,,,,,,,,*****/(######(#*
.,(,,,,.,.,.,.,,,,,,,,,,,,,,,,,,,*/(####%((,
....,,,,,.,.......,.,,,,,,,,*/(#(%###(
.,..,,,*,...,,..,**,**/***/((#%#,
.,*,.....,,,,,***/((//###/.
,,,,,/,****///*.
''', file=sys.stderr)
print(f'Report written to {output}', file=sys.stderr)

..*(&(**,............,//*,.,,*((#(*,.,,,,.,...,,,,,,***//((###%&%/,//**,/#/(&
.,/%(/*............,(*,,,,,*(%((/*,,,......,,,,,,***((###%%%&%%,**#.,*/%((/
,,%(/***,........,(#**,,**(##((((,,,,.....,,,,****/(#%%%%%&%###@@%*,*(###
#./%/**,,,,,.,..,,&%/***/(%%%((##,**,,,,.,,,,,,**//#%%%%%%%%##(**(#*((*#
%#@(*****,,,,,,,.&@@@&%@@&@@@@&/.*/**,,,,**//////(#%%%#%(##/,,*%#/*(
.%/****,,***,.,#@@@@@@@@&%(*...,*///****/(((/((########/(##*,.,*(#(
,(#&/*(/*,,*,..,,*%&@@@@&/........,,*//***//##(/((#%#((##(#%#,*,,(%,
(#&(*((,,,,,..****(((/,.,,**,..,,,,,*/////(###(/(##(/#((/#&&&&&%%
,//#,......,*/**,,........,*/((#*,,,*((#%%(//(#(/(%((#&@@@#*
.&(/%/,.....,,,**/((####(///*,,..,,,,*(##%%(/(((((##((%@@&%
#%(%#*,,..,**,,..,*///(/****,*,,,,,,/(#%%#(///(/#%##%&@&%.
%%#@%*,,****,,,.,,,,,,.,..,**,,,,,*/#%&%%(///(%%&%%&@&%%
@%&@&/,,,.......,..,......***,.,*/(#%%%#(/(##&&&&@@@%#*
,@&&@@&(,,,..................,,,(%%&&%#(/(#%&&&&@@&(#%
*@@@@&@@@#*,,,,,,.,...,.,,,,***(%%&&&%###%&&@@@@@%(/#(
@&@@@@&@@@@@###%#%%##%%%###%&&@@@&%#%%%&@@&&&%//##&.
,%##&&&&&@@@@@@&@&@&@@@@@@@@@@@@@@&&&%%%&@@&%%%/(/#%%&
/**/*/%&&&&&@@@@@@@@@@@@@@@@@@@@&%%#%%&@&&%#(**,/#%%&/
#(,*///(#&&&&&&@@&&&&&&&&&&&&&%%##%%&&%%%#(*,,**(###%,
//#(,,,*///#%&&&&&&&&&&&&%%%%%%%#%%%%%%%(/,,,,,,/((##&
*(///*,,,,////((##%%%#%%&&%%#########/*,,,,,.,,*(##%&&
##/****,...,**/******,,*,,*/((///////,,,,,,,,,/(##%&&&
///(/*,,,,,,,..,,,,,,,*,,,,,,,,,,,,,*,,,,,,,,**(###%&@&
(*,*/*,,,..,,,,....,,,*//***,,,,,,,,,,,,,,,,**/#%#%%&&&
.#,,,,,***,,.,,,,,,..,,,,,,,....,,,,,,,,,,,,,**#%%&&&&&%/
./*,,,,**,,,,,..,,,,,,,,,,,.,,,,,,,,,,,,,***((((%%%&&%#/
,.,,*,,,,.,,...,,.,,,,*,**,,,,,*,*,,,,***//(#%##%%#(,
,,,,,,,,,,,...,*,,.,,..,....,,,.,,,*****((###%##((#
,.,,*...,......,,,,***,,,,,,,,,*****/(######(#*
.,(,,,,.,.,.,.,,,,,,,,,,,,,,,,,,,*/(####%((,
....,,,,,.,.......,.,,,,,,,,*/(#(%###(
.,..,,,*,...,,..,**,**/***/((#%#,
.,*,.....,,,,,***/((//###/.
,,,,,/,****///*.
""",
file=sys.stderr,
)
print(f"Report written to {output}", file=sys.stderr)


###################################################################################################


def reformat_environments(input: 'path to directory containing codeml environments'):
'''Reformat vespasian codeml environments for use with legacy codeml_reader'''
def reformat_environments(input: "path to directory containing codeml environments"):
"""Reformat vespasian codeml environments for use with legacy codeml_reader"""
util.reformat_environments(input)


def version():
'''Show version'''
"""Show version"""
print(__version__)


def main():
argh.dispatch_commands([infer_gene_trees,
codeml_setup,
report,
reformat_environments,
version])
argh.dispatch_commands(
[infer_gene_trees, codeml_setup, report, reformat_environments, version]
)


if __name__ == '__main__':
if __name__ == "__main__":
main()
25 changes: 12 additions & 13 deletions vespasian/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,26 +9,25 @@


def convert_phylip(fasta_path, phylip_path):
'''Convert fasta to sequential phylip format'''
chunkify_string = lambda x, n: [x[i:i+n] for i in range(0, len(x), n)]
records = list(SeqIO.parse(fasta_path, 'fasta'))
"""Convert fasta to sequential phylip format"""
chunkify_string = lambda x, n: [x[i : i + n] for i in range(0, len(x), n)]
records = list(SeqIO.parse(fasta_path, "fasta"))
indent_len = max(len(r.id) for r in records) + 2
indent_fmt = '\n' + ' ' * indent_len
phylip = f' {len(records)} {len(records[0].seq)}'
indent_fmt = "\n" + " " * indent_len
phylip = f" {len(records)} {len(records[0].seq)}"

for r in records:
chunks = chunkify_string(str(r.seq), 60)
phylip += f'\n{r.id:<{indent_len}}{chunks[0]}'
phylip += f"\n{r.id:<{indent_len}}{chunks[0]}"
phylip += indent_fmt + indent_fmt.join(chunks)

with open(phylip_path, 'w+') as phylip_fh:
phylip_fh.write(phylip)

with open(phylip_path, "w+") as phylip_fh:
phylip_fh.write(phylip)


def reformat_environments(codeml_root):
'''Reformat vespasian codeml environments for use with legacy codeml_reader'''
"""Reformat vespasian codeml environments for use with legacy codeml_reader"""
codeml_envs = vespa.list_codeml_dirs(codeml_root)
for env in codeml_envs:
convert_phylip(f'{env}/align.fa', f'{env}/align.phy')
shutil.copy(f'{env}/tree.nwk', f'{env}/tree')
convert_phylip(f"{env}/align.fa", f"{env}/align.phy")
shutil.copy(f"{env}/tree.nwk", f"{env}/tree")

0 comments on commit 4f3720d

Please sign in to comment.