Skip to content

Commit

Permalink
Updated vcf_filter
Browse files Browse the repository at this point in the history
  • Loading branch information
aewebb80 committed Jun 15, 2017
1 parent e9440ae commit 748c57a
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 67 deletions.
155 changes: 88 additions & 67 deletions andrew/vcf_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,42 @@ def __call__(self, parser, args, value, option_string=None):
# Other file arguments. Expand as needed
vcf_parser.add_argument('--out', help = 'Specifies the output filename', type = str, default = 'out', action = parser_confirm_no_file())

### Site Filters
out_format_list = ['vcf', 'bcf']
out_format_default = 'bcf'

# Chromosome Filters
vcf_parser.add_argument('--filter-chr', help = 'Specifies the chromosome(s) to include', nargs = '+', type = str)
vcf_parser.add_argument('--filter-not-chr', help = 'Specifies the chromosome(s) to exclude', nargs = '+', type = str)
vcf_parser.add_argument('--out-format', metavar = '{' + ', '.join(out_format_list) + '}', help = 'Specifies the output format', type = str, choices = out_format_list, default = out_format_default)
### Filters

# Position Filters
# Chromosome filters
vcf_parser.add_argument('--filter-include-chr', help = 'Specifies the chromosome(s) to include', nargs = '+', type = str)
vcf_parser.add_argument('--filter-exclude-chr', help = 'Specifies the chromosome(s) to exclude', nargs = '+', type = str)

# Basic position filters
vcf_parser.add_argument('--filter-from-bp', help = 'Specifies the lower bound of sites to include (May only be used with a single chromosome)', type = int)
vcf_parser.add_argument('--filter-to-bp', help = 'Specifies the upper bound of sites to include (May only be used with a single chromosome)', type = int)

# BED-based position filters
vcf_parser.add_argument('--filter-include-bed', help = 'Specifies a set of sites to include within a BED file', action = parser_confirm_file())
vcf_parser.add_argument('--filter-exclude-bed', help = 'Specifies a set of sites to exclude within a BED file', action = parser_confirm_file())

# Filter-flag filters
vcf_parser.add_argument('--filter-include-passed', help = "Specifies that only sites with the filter flag 'PASS' should be included", action = 'store_true')
vcf_parser.add_argument('--filter-include-filtered', help = 'Specifies that all sites with the given filter flag should be included', nargs = '+', type = str)
vcf_parser.add_argument('--filter-exclude-filtered', help = 'Specifies that all sites with the given filter flag should be excluded', nargs = '+', type = str)

# Info-flag filters
vcf_parser.add_argument('--filter-include-info', help = 'Specifies that all sites with the given info flag should be included', nargs = '+', type = str)
vcf_parser.add_argument('--filter-exclude-info', help = 'Specifies that all sites with the given info flag should be excluded', nargs = '+', type = str)

# Allele count filters
vcf_parser.add_argument('--filter-min-alleles', help = 'Specifies that only sites with a number of allele >= to the number given should be included', type = int)
vcf_parser.add_argument('--filter-max-alleles', help = 'Specifies that only sites with a number of allele <= to the number given should be included', type = int)

# Missing data filter
vcf_parser.add_argument('--filter-max-missing', help = 'Specifies that only sites with more than this number of genotypes among individuals should be included', type = int)

# Additional Filters
vcf_parser.add_argument('--filter-distance', help = 'Specifies a distance that no two sites may be within', type = int)

if passed_arguments:
return vcf_parser.parse_args(passed_arguments)
Expand Down Expand Up @@ -98,69 +124,64 @@ def run (passed_arguments = []):
# Grab VCF arguments from command line
vcf_args = vcf_filter_parser(passed_arguments)

print vcf_args

'''
# Argument container for vcftools
vcftools_call_args = ['--out', vcf_args.out]

if vcf_args.calc_statistic == 'windowed-weir-fst':
# Confirms that at least two population files have been specified
if not vcf_args.pop_file or len(vcf_args.pop_file) < 2:
sys.exit('Two or more population files requried. Please assign using --pop-file')
# Assigns specific vcftools arguments for calculating fst
vcftools_pop_args = [population_args for population_file in vcf_args.pop_file for population_args in ['--weir-fst-pop', population_file]]
vcftools_window_args = ['--fst-window-size', vcf_args.statistic_window_size, '--fst-window-step', vcf_args.statistic_window_step]
# Assigns all the vcftools arguments for calculating windowed fst
vcftools_call_args.extend(vcftools_pop_args + vcftools_window_args)
# Assigns the suffix for the vcftools log file
vcftools_log_suffix = '.windowed.weir.fst'
elif vcf_args.calc_statistic == 'weir-fst':
# Confirms that at least two population files have been specified
if not vcf_args.pop_file or len(vcf_args.pop_file) < 2:
sys.exit('Two or more population files requried. Please assign using --pop-file')
# Assigns specific vcftools arguments for calculating site-based fst
vcftools_call_args.extend([population_args for population_file in vcf_args.pop_file for population_args in ['--weir-fst-pop', population_file]])
# Assigns the suffix for the vcftools log file
vcftools_log_suffix = '.weir.fst'
elif vcf_args.calc_statistic == 'TajimaD':
# Assigns all the vcftools arguments for calculating TajimaD
vcftools_call_args.extend(['--TajimaD', vcf_args.statistic_window_size])
# Assigns the suffix for the vcftools log file
vcftools_log_suffix = '.Tajima.D'
elif vcf_args.calc_statistic == 'pi':
# Assigns all the vcftools arguments for calculating pi
vcftools_call_args.extend(['--window-pi', vcf_args.statistic_window_size, '--window-pi-step', vcf_args.statistic_window_step])
# Assigns the suffix for the vcftools log file
vcftools_log_suffix = '.windowed.pi'
elif vcf_args.calc_statistic == 'freq':
# Assigns all the vcftools arguments for the allele frequency
vcftools_call_args.extend(['--freq'])
# Assigns the suffix for the vcftools log file
vcftools_log_suffix = '.frq'
elif vcf_args.calc_statistic == 'het':
# Assigns all the vcftools arguments for calculating heterozygosity
vcftools_call_args.extend(['--het'])
# Assigns the suffix for the vcftools log file
vcftools_log_suffix = '.het'
if vcf_args.out_format:
if vcf_args.out_format == 'bcf':
vcftools_call_args.append('--recode-bcf')
elif vcf_args.out_format == 'vcf':
vcftools_call_args.append('--recode')

if vcf_args.filter_include_chr or vcf_args.filter_exclude_chr:
if vcf_args.filter_include_chr:
for chr_to_include in vcf_args.filter_include_chr:
vcftools_call_args.extend(['--chr', chr_to_include])
if vcf_args.filter_exclude_chr:
for chr_to_exclude in vcf_args.filter_exclude_chr:
vcftools_call_args.extend(['--not-chr', chr_to_exclude])

if vcf_args.filter_from_bp or vcf_args.filter_to_bp:
if vcf_args.filter_include_chr:
vcftools_call_args.extend(['--from-bp', vcf_args.filter_from_bp])
if vcf_args.filter_exclude_chr:
vcftools_call_args.extend(['--to-bp', vcf_args.filter_to_bp])

if vcf_args.filter_include_bed or vcf_args.filter_exclude_bed:
if vcf_args.filter_include_bed:
vcftools_call_args.extend(['--bed', vcf_args.filter_include_bed])
if vcf_args.filter_exclude_bed:
vcftools_call_args.extend(['--exclude-bed', vcf_args.filter_exclude_bed])

if vcf_args.filter_include_passed or vcf_args.filter_include_filtered or vcf_args.filter_exclude_filtered:
if vcf_args.filter_include_passed:
vcftools_call_args.append('--remove-filtered-all')
if vcf_args.filter_include_filtered:
for filtered_to_include in vcf_args.filter_include_filtered:
vcftools_call_args.extend(['--keep-filtered', filtered_to_include])
if vcf_args.filter_exclude_filtered:
for filtered_to_exclude in vcf_args.filter_exclude_filtered:
vcftools_call_args.extend(['--remove-filtered', filtered_to_exclude])

if vcf_args.filter_include_info or vcf_args.filter_exclude_info:
if vcf_args.filter_include_info:
for info_to_include in vcf_args.filter_include_info:
vcftools_call_args.extend(['--keep-INFO', info_to_include])
if vcf_args.filter_exclude_info:
for info_to_exclude in vcf_args.filter_exclude_info:
vcftools_call_args.extend(['--remove-INFO', info_to_exclude])

if vcf_args.filter_min_alleles or vcf_args.filter_max_alleles:
if vcf_args.filter_min_alleles:
vcftools_call_args.extend(['--min-alleles', vcf_args.filter_min_alleles])
if vcf_args.filter_max_alleles:
vcftools_call_args.extend(['--max-alleles', vcf_args.filter_max_alleles])

if vcf_args.filter_max_missing:
vcftools_call_args.extend(['--max-missing-count', vcf_args.filter_max_missing])

if vcf_args.filter_distance:
vcftools_call_args.extend(['--thin', vcf_args.filter_distance])

# Assigns the file argument for vcftools
vcfname_arg = assign_vcftools_input_arg(vcf_args.vcfname)
Expand All @@ -171,8 +192,8 @@ def run (passed_arguments = []):

# Check that the log file was created correctly, get the suffix for the log file, and create the file
if check_vcftools_for_errors(vcftools_err):
produce_vcftools_log(vcftools_err, vcf_args.out, vcftools_log_suffix)
'''
produce_vcftools_log(vcftools_err, vcf_args.out, '.filter')


if __name__ == "__main__":
#initLogger()
Expand Down
Binary file modified jared/vcf_reader_func.pyc
Binary file not shown.

0 comments on commit 748c57a

Please sign in to comment.