Skip to content

Commit

Permalink
updated functions and tests with kwargs
Browse files Browse the repository at this point in the history
  • Loading branch information
jaredgk committed Oct 8, 2020
1 parent 1da7a9a commit c7d2c85
Show file tree
Hide file tree
Showing 6 changed files with 156 additions and 171 deletions.
19 changes: 13 additions & 6 deletions pgpipe/bed_invert.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,14 @@
import pysam
from pgpipe.genome_region import Region, RegionList, getIntervalsBetween
import pgpipe.argparse_sets
from pgpipe.misc import argprase_kwargs
import argparse



#Given a list of CDS intervals and optional buffer length,
#Generate full set of intervals between regions.
def createParser():
def parseArguments(passed_arguments = []):
parser = argparse.ArgumentParser(description=("Generates list of "
"intervals within a provided BED file with option "
"to pad intervals by a fixed amount"))
Expand All @@ -80,11 +81,14 @@ def createParser():
cg.add_argument('--removechr',dest="remove_chr",action="store_true")
parser.add_argument('--out', dest="output_name",
help="Output filename, default is stdout")
return parser
if passed_arguments:
return vars(parser.parse_args(passed_arguments))
else:
return vars(parser.parse_args())



def get_intergenic(sysargs):
def get_intergenic(**kwargs):
"""
Creates a BED file with regions that are not covered in the input BED.
Expand Down Expand Up @@ -125,8 +129,11 @@ def get_intergenic(sysargs):
"""
parser = createParser()
args = parser.parse_args(sysargs)
#parser = createParser()
#args = parser.parse_args(sysargs)
if __name__ != "__main__":
kwargs = argprase_kwargs(kwargs, parseArguments)
args = argparse.Namespace(**kwargs)
if args.region_name is None:
raise Exception("BED input filename required")
reg_list = RegionList(filename=args.region_name, colstr=args.colstr,
Expand All @@ -136,4 +143,4 @@ def get_intergenic(sysargs):

if __name__ == "__main__":
#initLogger()
get_intergenic(sys.argv[1:])
get_intergenic(**parseArguments())
20 changes: 14 additions & 6 deletions pgpipe/get_nonmissing_chunks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
import io
from pgpipe.vcf_reader_func import VcfReader
from pgpipe.model import Model, read_single_model
from pgpipe.misc import argprase_kwargs

def createParser():
def parseArguments(passed_arguments = []):
parser = argparse.ArgumentParser(description=("Outputs windows in a "
"VCF file where all sites in the window have below"
" a set limit of missing data. Limit default "
Expand Down Expand Up @@ -35,7 +36,11 @@ def createParser():
"in model file if more than one is contained"))
parser.add_argument("--tbi", dest="tabix_index", help="Path to bgzipped "
"file's index if name doesn't match VCF file")
return parser
if passed_arguments:
return vars(parser.parse_args(passed_arguments))
else:
return vars(parser.parse_args())
#return parser

def getl(f,compressed=False):
l = f.readline()
Expand All @@ -61,7 +66,7 @@ def outputLine(chrom,start_pos,end_pos,args):
chrom = fixChromName(chrom,args.addchr,args.removechr)
return chrom+'\t'+str(sp)+'\t'+str(ep)+'\n'

def regionsWithData(sysargs):
def regionsWithData(**kwargs):
"""Returns a BED file with regions where all SNPs contained have
less than a given limit of missing data.
Expand Down Expand Up @@ -114,8 +119,11 @@ def regionsWithData(sysargs):
"""
parser = createParser()
args = parser.parse_args(sysargs)
#parser = createParser()
#args = parser.parse_args(sysargs)
if __name__ != "__main__":
kwargs = argprase_kwargs(kwargs, parseArguments)
args = argparse.Namespace(**kwargs)
compressed_input = False
#if args.vcfname is None:
# instream = sys.stdin
Expand Down Expand Up @@ -218,4 +226,4 @@ def regionsWithData(sysargs):
pass

if __name__ == '__main__':
regionsWithData(sys.argv[1:])
regionsWithData(**parseArguments())
39 changes: 27 additions & 12 deletions pgpipe/vcf_four_gamete.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@
from pgpipe.logging_module import initLogger
from pgpipe.genome_region import Region, RegionList
from pgpipe.vcf_reader_func import getRecordList, vcfRegionName, getRecordsInRegion, VcfReader
from pgpipe.misc import argprase_kwargs


class BaseData():
Expand Down Expand Up @@ -516,7 +517,8 @@ def checkVcfRegion(self):
self.seqs = [[base_list[row][col] for row in range(0, len(base_list))] for col in range(0, len(base_list[0]))]


def createParser():
#def createParser(passed_arguments = []):
def parseArguments(passed_arguments = []):
parser = argparse.ArgumentParser(description=("Given a file of aligned"
"sequences or variable sites, will return intervals based"
" on 4 gamete tests"))
Expand All @@ -542,7 +544,7 @@ def createParser():
action='store_const', const="HandK85",help = "get a set of "
"parsimonious intervals each containing at least 1 recombination"
" event. Follows appendix 2 of Hudson and Kaplan 1985")
intervaltype_group.add_argument("--4gcompat", dest="intervaltype",
intervaltype_group.add_argument("--fourgcompat", dest="intervaltype",
action='store_const', const="CONTIG4GPASS",help = "get a set of "
" intervals such that for each interval all the included bases are"
" compatible with each other based on the 4 gamete test")
Expand Down Expand Up @@ -593,7 +595,12 @@ def createParser():
#add checks for correct input type
parser.add_argument('--tbi', dest="tabix_index",help=("Filepath for tabix "
"index file if using a single bgzipped VCF"))
return parser
parser.add_argument('--log', dest="log_name", help=("Filepath for log file"))
#return parser
if passed_arguments:
return vars(parser.parse_args(passed_arguments))
else:
return vars(parser.parse_args())

def logArgs(args):
logging.info('Arguments for vcf_region_write:')
Expand Down Expand Up @@ -775,7 +782,8 @@ def outputSubregion(args, interval, basedata, region=None, filename=None):



def sample_fourgametetest_intervals(sys_args):
#def sample_fourgametetest_intervals(sys_args):
def sample_fourgametetest_intervals(**kwargs):
"""Returns interval(s) from aligned sequences or snps.
Given a set of aligned sequences or a vcf file, this function
Expand Down Expand Up @@ -809,7 +817,7 @@ def sample_fourgametetest_intervals(sys_args):
Tests (one required)
--hk : bool
Will find intervals that contain at least one recombination event
--4gcompat : bool
--fourgcompat : bool
Will find intervals with zero recombination events in them
Return options:
Expand Down Expand Up @@ -845,12 +853,18 @@ def sample_fourgametetest_intervals(sys_args):
Extend region to overlapping informative sites
"""
parser = createParser()
#parser = createParser()
# parser.print_help()
if len(sys_args) == 0:
parser.print_help()
sys.exit(1)
args = parser.parse_args(sys_args)
#if len(kwargs) == 0:
#parser.print_help()
#sys.exit(1)
#args = parser.parse_args(sys_args)
if __name__ != "__main__":
kwargs = argprase_kwargs(kwargs, parseArguments)

args = argparse.Namespace(**kwargs)
if args.log_name is not None:
initLogger(filename=log_name)
logArgs(args)
#argdic = vars(args)
interval_list = []
Expand Down Expand Up @@ -953,7 +967,8 @@ def sample_fourgametetest_intervals(sys_args):


if __name__ == '__main__':
initLogger()
#initLogger()

sample_fourgametetest_intervals(sys.argv[1:])
#sample_fourgametetest_intervals(sys.argv[1:])
sample_fourgametetest_intervals(**parseArguments())

31 changes: 19 additions & 12 deletions pgpipe/vcf_split_pysam.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,17 @@
from pgpipe.genome_region import Region, RegionList
import pgpipe.vcf_reader_func as vf
from pgpipe.model import Model, read_model_file
from pgpipe.misc import argprase_kwargs


def createParser():
def parseArguments(passed_arguments = []):
parser= argparse.ArgumentParser(description=("Given a range or a file of "
"ranges and a VCF file, will generate one or "
"more VCF files with variants only from the "
"region(s) specified. For regions, start and "
"end coordinates required (default is zero-"
"based, half-open intervals, chromosome "
"optional unless VCF has more than one"))
parser.add_argument("vcfname", help="Input VCF name")
parser.add_argument("--vcf",dest="vcfname", help="Input VCF name")
region_group = parser.add_mutually_exclusive_group()
region_group.add_argument("--r", dest="gene_str", help=("Comma "
"separated string, formatted \"start,end,"
Expand Down Expand Up @@ -69,7 +69,11 @@ def createParser():
subsamp_group.add_argument('--model-file',dest="modelname",help="Model file for selecting individuals for writing")
parser.add_argument('--model',dest="poptag",help="If model file is used, will use model with this name")
parser.add_argument("--forceempty",dest="forceempty",action="store_true",help=("Will create empty VCF if a region is empty rather than throw an error"))
return parser
if passed_arguments:
return vars(parser.parse_args(passed_arguments))
else:
return vars(parser.parse_args())
#return parser

def logArgs(args):
logging.info('Arguments for vcf_region_write:')
Expand Down Expand Up @@ -157,7 +161,7 @@ def writeFile(args, vcf_reader, filter_sites, remove_cpg, fasta_ref, header):
outfile.write(rec)


def vcf_region_write(sys_args):
def vcf_region_write(**kwargs):
"""Returns a VCF file with variants from regions in a list
Given a VCF file and gene region information (from either a file with one
Expand Down Expand Up @@ -226,11 +230,14 @@ def vcf_region_write(sys_args):
"""

parser = createParser()
if len(sys_args) == 0:
parser.print_help()
sys.exit(1)
args = parser.parse_args(sys_args)
#parser = createParser()
#if len(sys_args) == 0:
# parser.print_help()
# sys.exit(1)
#args = parser.parse_args(sys_args)
if __name__ != "__main__":
kwargs = argprase_kwargs(kwargs,parseArguments)
args = argparse.Namespace(**kwargs)
logArgs(args)
popmodel = None
if args.modelname is not None:
Expand Down Expand Up @@ -318,5 +325,5 @@ def vcf_region_write(sys_args):


if __name__ == '__main__':
initLogger()
vcf_region_write(sys.argv[1:])
#initLogger()
vcf_region_write(**parseArguments())
31 changes: 19 additions & 12 deletions pgpipe/vcf_to_ima.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@
from pgpipe.genome_region import Region, RegionList
from pgpipe.parse_functions import defaultsDictForFunction, getConfigFilename, makeRequiredList, getArgsWithConfig
from pgpipe.model import Model, read_single_model
from pgpipe.misc import argprase_kwargs
#from tabix_wrapper import prepVcf

#Input: VCF file, reference sequence, region list (possibly .bed file)
Expand Down Expand Up @@ -197,7 +198,7 @@ def writeHeader(self):



def createParser():
def parseArguments(passed_arguments = []):
parser = argparse.ArgumentParser(description=("Generates an IMa input "
"file from a VCF file, a reference"
" genome, a list of gene regions, "
Expand Down Expand Up @@ -268,7 +269,10 @@ def createParser():
parser.add_argument("--out-prefix",dest="multi_out",type=str,
help=("If output is fasta, generate one file"
"per loci."))
return parser
if passed_arguments:
return vars(parser.parse_args(passed_arguments))
else:
return vars(parser.parse_args())


def checkArgs(args):
Expand Down Expand Up @@ -444,7 +448,7 @@ def hasMissingData(rec_list, indiv_idx):
return False


def vcf_to_ima(sys_args):
def vcf_to_ima(**kwargs):
"""Returns an IMa input file given four-gamete filtered loci in one or
multiple VCFs.
Expand Down Expand Up @@ -532,13 +536,16 @@ def vcf_to_ima(sys_args):
Will be named either '--out' value or (vcfinput).ima.u.
Contains variants in designated loci for IMa run.
"""
parser = createParser()
if len(sys_args) == 0:
parser.print_help()
sys.exit(1)

required_args = ['vcfname','popname']
args = getArgsWithConfig(parser,sys_args,required_args,'vcf_to_ima')
#parser = createParser()
#if len(sys_args) == 0:
# parser.print_help()
# sys.exit(1)

#required_args = ['vcfname','popname']
#args = getArgsWithConfig(parser,sys_args,required_args,'vcf_to_ima')
if __name__ != "__main__":
kwargs = argprase_kwargs(kwargs,parseArguments)
args = argparse.Namespace(**kwargs)
checkArgs(args)
logArgs(args)
#validateFiles(args)
Expand Down Expand Up @@ -674,5 +681,5 @@ def vcf_to_ima(sys_args):
output_file.close()

if __name__ == "__main__":
initLogger()
vcf_to_ima(sys.argv[1:])
#initLogger()
vcf_to_ima(**parseArguments())

0 comments on commit c7d2c85

Please sign in to comment.