Skip to content

Commit

Permalink
various changes
Browse files Browse the repository at this point in the history
  • Loading branch information
jodyhey committed Feb 27, 2021
1 parent e650871 commit d58570b
Show file tree
Hide file tree
Showing 17 changed files with 201 additions and 74 deletions.
2 changes: 1 addition & 1 deletion pgpipe/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

# Basic Information
__title__ = "py-popgen"
__version__ = '0.1.7'
__version__ = '0.1.10'
__summary__ = "Software platform for facilitating population genomic analyses"
__url__ = "https://ppp.csusm.edu/"
__license__ = "MIT"
Expand Down
23 changes: 12 additions & 11 deletions pgpipe/admixture.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,9 @@

from pgpipe.logging_module import initLogger, logArgs
from pgpipe.plink import confirm_ped_prefix, confirm_bed_prefix, confirm_ped_files, confirm_bed_files
from pgpipe.misc import confirm_executable
from pgpipe.misc import confirm_executable, argprase_kwargs

def admix_parser(passed_arguments):
def admix_parser(passed_arguments = []):
'''admix Argument Parser - Assigns arguments from command line'''

def parser_confirm_file ():
Expand Down Expand Up @@ -171,15 +171,18 @@ def metavar_list (var_list):


if passed_arguments:
return admix_parser.parse_args(passed_arguments)
return vars(admix_parser.parse_args(passed_arguments))
else:
return admix_parser.parse_args()
return vars(admix_parser.parse_args())

def run(**kwargs):

def run(passed_arguments = []):
# Update kwargs with defaults
if __name__ != "__main__":
kwargs = argprase_kwargs(kwargs, admix_parser)

# Grab admixture arguments from command line
admix_args = admix_parser(passed_arguments)
# Assign arguments
admix_args = argparse.Namespace(**kwargs)

# Adds the arguments (i.e. parameters) to the log file
logArgs(admix_args, func_name = 'admixture')
Expand Down Expand Up @@ -277,9 +280,7 @@ def run(passed_arguments = []):
# Check if the executable was found
if not admixture_path:
raise IOError('admixture not found. Please confirm the executable is installed')

#admixture_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),'bin','admixture')


# Run 'admixture' executable file with options provided by user
admixture_call = subprocess.Popen([admixture_path] + list(map(str, admix_call_args)), stdout = subprocess.PIPE, stderr = subprocess.PIPE)

Expand All @@ -292,4 +293,4 @@ def run(passed_arguments = []):

if __name__ == "__main__":
initLogger()
run()
run(**admix_parser())
124 changes: 117 additions & 7 deletions pgpipe/bed_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
a single BED; v) merge features within one or more BED files; vi) create a BED of
complementary features.
******************
##################
Command-line Usage
******************
##################
The BED utilites function may be called using the following command:
.. code-block:: bash
Expand All @@ -20,6 +20,22 @@
Utilites
########
***************
Windows Utility
***************
Given a chromosome size file and a window size, the windows utility will generate a
BED file of interval features.
=============
Example usage
=============
Return a BED with interval features that do not extend outside the chromosomes:
.. code-block:: bash
bed_utilities.py --utility windows --chrom-file hg18.chrom.sizes --window-size 1000 --out hg18_windows.bed
**************
Sample Utility
**************
Expand Down Expand Up @@ -116,6 +132,22 @@
bed_utilities.py --utility complement --bed examples/files/chr1_sites.bed --chrom-file examples/files/chr_sizes.txt
*****************
Intersect Utility
*****************
Given a BED file and an intersect file, return only the interval features within the BED
file that overlap with the intersect file.
=============
Example usage
=============
Return a BED with only intersecting interval features:
.. code-block:: bash
bed_utilities.py --utility intersect --bed hg18_windows.bed --intersect-file Intersect.vcf.gz --out hg18_intersects.bed
*************
Merge Utility
*************
Expand Down Expand Up @@ -153,8 +185,20 @@
times.
**--chrom-file** *<chrom_filename>*
Argument used to define the filename of a file with the sizes of each
chromosome file. Appropriate files may be downloaded from the UCSC Genome
Browser.
chromosome. Chromosome size files must be tab-delimited as follows:
.. code-block:: bash
chr1\t247249719
chr2\t242951149
...
chrX\t154913754
chrY\t57772954
Appropriate files may be downloaded from the `UCSC Genome Browser <http://hgdownload.soe.ucsc.edu/downloads.html>`_.
The supported **ASSEMBLY.chrom.sizes** file for each assembly may be found by
clicking **Genome sequence files and select annotations** (followed by
**Standard genome sequence files and select annotations** on select assemblies).
#############################
Output Command-line Arguments
Expand All @@ -176,6 +220,12 @@
files (merge); create a BED file of complementary features - i.e. features that
do not overlap - from a BED file (complement).
*************************************
Window Utility Command-line Arguments
*************************************
**--window-size** *<window_size_int>*
Argument used to define the window/interval size to return.
*************************************
Sample Utility Command-line Arguments
*************************************
Expand Down Expand Up @@ -219,6 +269,14 @@
Argument used to define the length of base pairs (bp) to extend downstream of
features.
****************************************
Intersect Utility Command-line Arguments
****************************************
**--intersect-file** *<intersect_file_filename>*
Argument used to define the BED/VCF/VCF.gz file used to remove features that
do not intersect with the given file's features/variants.
removing features/positions.
************************************
Merge Utility Command-line Arguments
************************************
Expand Down Expand Up @@ -276,15 +334,26 @@ def metavar_list (var_list):
bed_parser = argparse.ArgumentParser()

# Input arguments
bed_input = bed_parser.add_mutually_exclusive_group(required = True)
bed_input = bed_parser.add_mutually_exclusive_group()
bed_input.add_argument('--bed', help = 'Defines the filename of the BED file', type = str, action = parser_confirm_file())
bed_input.add_argument('--beds', help = 'Defines the filenames of the BED files (may be used multiple times)', type = str, nargs = '+', action = parser_confirm_file_list())
bed_parser.add_argument('--chrom-file', help = 'File of chromosome sizes', type = str, action = parser_confirm_file())

# Utility based arguments
utility_list = ['sample', 'subtract', 'extend', 'sort', 'merge', 'complement']
utility_list = ['sample', 'subtract', 'extend', 'sort', 'merge', 'complement', 'windows', 'intersect']
bed_parser.add_argument('--utility', metavar = metavar_list(utility_list), help = 'Specifies the utility to be used', type = str, choices = utility_list, required = True)

# Intersect-specific arguments
intersect_group = bed_parser.add_argument_group('Intersect Utility Arguments')
intersect_group.add_argument('--intersect-file', help = 'File to intersect', type = str, action = parser_confirm_file())
intersect_group.add_argument('--return_only_intersects', help = 'Only return intersecting intervals', action = 'store_true')
intersect_group.add_argument('--sorted-intersect', help = 'Invokes the sorted algorithm. Use to reduce memory usage with a large --intersect-file. Requires both files to be sorted in the same manner', action = 'store_true')

# Window-specific arguments
window_group = bed_parser.add_argument_group('Windows Utility Arguments')
window_group.add_argument('--window-size', help = 'Size (in bp) of windows to be created', type = int)
window_group.add_argument('--window-step', help = 'Step size (in bp) between new windows. Defaults to the window size', type = int)

# Sample-specific arguments
sample_group = bed_parser.add_argument_group('Sample Utility Arguments')
sample_group.add_argument('--sample-size', help="Defines the total sample size", type = int)
Expand Down Expand Up @@ -429,6 +498,10 @@ def run (**kwargs):
# Assign arguments
bed_args = argparse.Namespace(**kwargs)

# Confirm an input BED has been given, unless windows are being created
if bed_args.utility != 'windows' and (not bed_args.bed and not bed_args.beds):
raise Exception('--bed or --beds required')

# Adds the arguments (i.e. parameters) to the log file
logArgs(bed_args, 'bed_utilities')

Expand Down Expand Up @@ -491,7 +564,7 @@ def run (**kwargs):
else:

# Check if the utility requires a chromosome size file
if bed_args.utility in ['complement', 'extend']:
if bed_args.utility in ['complement', 'extend', 'windows']:

# Check if a chromosome sizes file has been defined
if not bed_args.chrom_file:
Expand Down Expand Up @@ -586,6 +659,43 @@ def run (**kwargs):
# Assign the --chrom-file argument
bedtools_arg_list.extend(['-g', bed_args.chrom_file])

# Check if the create windows utility has been selected
elif bed_args.utility == 'windows':

# Assign the utility
bedtools_arg_list.append(f'make{bed_args.utility}')

# Assign the --chrom-file argument
bedtools_arg_list.extend(['-g', bed_args.chrom_file])

# Check if --window-size has been specified
if bed_args.window_size:
bedtools_arg_list.extend(['-w', bed_args.window_size])

# Check if --window-step has been specified
if bed_args.window_step:
bedtools_arg_list.extend(['-s', bed_args.window_step])

# Check if the create intersect utility has been selected
elif bed_args.utility == 'intersect':

# Assign the utility
bedtools_arg_list.append(bed_args.utility)

# Assign the input arguments
bedtools_arg_list.extend(['-a', bed_args.bed])

# Assign the --chrom-file argument
bedtools_arg_list.extend(['-b', bed_args.intersect_file])

# Check if --return-only-intersect has not been specified
if not bed_args.return_only_intersects:
bedtools_arg_list.append('-u')

# Check if --sorted-intersect has not been specified
if bed_args.sorted_intersect:
bedtools_arg_list.append('-sorted')

# Catch unknown utilities
else:
raise Exception('%s is an unknown utility. Please check input' % bed_args.utility)
Expand Down
19 changes: 12 additions & 7 deletions pgpipe/eigenstrat_fstats.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,9 @@
from pgpipe.eigenstrat_wrapper import *
from pgpipe.model import read_model_file, pops_not_in_model
from pgpipe.logging_module import initLogger, logArgs
from pgpipe.misc import argprase_kwargs

def admix_parser (passed_arguments):
def admix_parser (passed_arguments = []):
'''admix Argument Parser - Assigns arguments from command line'''

def parser_confirm_file ():
Expand Down Expand Up @@ -265,9 +266,9 @@ def metavar_list (var_list):
admix_parser.add_argument('--admix-o-pop-file', dest = 'o_file', help = 'O populations for admixure analysis', type = str, action = parser_confirm_file())

if passed_arguments:
return admix_parser.parse_args(passed_arguments)
return vars(admix_parser.parse_args(passed_arguments))
else:
return admix_parser.parse_args()
return vars(admix_parser.parse_args())

def read_admix_pops_file (pop_filename):

Expand All @@ -286,10 +287,14 @@ def read_admix_pops_file (pop_filename):
# Return the admix pops
return admix_pops

def run(passed_arguments = []):
def run(**kwargs):

# Grab admixtools arguments from command line
admix_args = admix_parser(passed_arguments)
# Update kwargs with defaults
if __name__ != "__main__":
kwargs = argprase_kwargs(kwargs, admix_parser)

# Assign arguments
admix_args = argparse.Namespace(**kwargs)

# Adds the arguments (i.e. parameters) to the log file
logArgs(admix_args, func_name='admixtools')
Expand Down Expand Up @@ -555,4 +560,4 @@ def run(passed_arguments = []):

if __name__ == "__main__":
initLogger()
run()
run(**admix_parser())
24 changes: 13 additions & 11 deletions pgpipe/fasta_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,12 @@
import logging
import pandas as pd

# Insert Jared's directory path, required for calling Jared's functions. Change when directory structure changes.
#sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, 'pppipe')))

from pgpipe.logging_module import initLogger, logArgs

from pgpipe.picard import call_picard
from pgpipe.misc import argprase_kwargs

def fasta_utility_parser(passed_arguments):
def fasta_utility_parser(passed_arguments = []):
'''fasta Argument Parser - Assigns arguments from command line'''

def parser_confirm_file ():
Expand Down Expand Up @@ -55,11 +53,11 @@ def metavar_list (var_list):
fasta_parser.add_argument('--picard-path', help = "Defines path to locate picard.jar", type = str)

if passed_arguments:
return fasta_parser.parse_args(passed_arguments)
return vars(fasta_parser.parse_args(passed_arguments))
else:
return fasta_parser.parse_args()
return vars(fasta_parser.parse_args())

def run (passed_arguments = []):
def run (**kwargs):
'''
Liftover for VCF-formatted files
Expand Down Expand Up @@ -95,8 +93,12 @@ def run (passed_arguments = []):
Output file already exists
'''

# Grab FASTA arguments from command line
fasta_args = fasta_utility_parser(passed_arguments)
# Update kwargs with defaults
if __name__ != "__main__":
kwargs = argprase_kwargs(kwargs, fasta_utility_parser)

# Assign arguments
fasta_args = argparse.Namespace(**kwargs)

# Adds the arguments (i.e. parameters) to the log file
logArgs(fasta_args, func_name = 'fasta_utilities')
Expand All @@ -115,5 +117,5 @@ def run (passed_arguments = []):


if __name__ == "__main__":
#initLogger()
run()
initLogger()
run(**fasta_utility_parser())
2 changes: 1 addition & 1 deletion pgpipe/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def create_ind_file (self, file_ext = '', file_path = '', overwrite = False):
if not overwrite:
# Check if the file already exists
if os.path.isfile(ind_filename):
raise IOError('Individuals file exists.')
raise IOError('Individuals file exists. Use --overwrite to ignore')

# Create the population file
ind_file = open(ind_filename, 'w')
Expand Down
2 changes: 1 addition & 1 deletion pgpipe/plink.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,7 @@ def check_plink_for_errors (plink_stderr):

# Print warning, if found
if 'Warning' in plink_stderr:
logging.warning(plink_stderr)
logging.warning(plink_stderr.replace('\n',' '))

# Print output if error found. Build up as errors are discovered
elif plink_stderr:
Expand Down

0 comments on commit d58570b

Please sign in to comment.