Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/jaredgk/ppp
Browse files Browse the repository at this point in the history
  • Loading branch information
jaredgk committed Jun 15, 2017
2 parents 362f7f4 + 9e6fbf7 commit e9440ae
Show file tree
Hide file tree
Showing 9 changed files with 339 additions and 51 deletions.
36 changes: 18 additions & 18 deletions andrew/andrew_tests.py
Original file line number Diff line number Diff line change
@@ -1,56 +1,56 @@
import unittest, filecmp, sys, os
import vcftools
import vcftools_calc
import vcf_calc

def compare_to_expected(test_output, expected_output):
return filecmp.cmp(test_output, expected_output)

class vcftools_tests (unittest.TestCase):

def test_vcf_argument_parser (self):
input_arg = vcftools.assign_vcftools_input_arg('example/locus8.vcf.gz')
self.assertEqual(input_arg, ['--gzvcf', 'example/locus8.vcf.gz'])

def test_produce_vcftools_log (self):
vcftools.produce_vcftools_log('Log Test:\n1\n2\n3\n', 'out', '.logTest')
self.assertTrue(compare_to_expected('out.logTest.log', 'example/locus8.logTest.log'))
self.addCleanup(os.remove, 'out.logTest.log')

def test_check_vcftools_for_errors (self):
self.assertTrue(vcftools.check_vcftools_for_errors('Log Test:\n1\n2\n3\nRun Time'))
with self.assertRaises(SystemExit) as cm:
vcftools.check_vcftools_for_errors('Log Test:\n1\n2\n3\nError: No Input')
self.assertEqual(cm.exception.code, 'Error: No Input')

def test_Fst_window (self):
vcftools_calc.run(['example/locus8.vcf.gz', '--calc-statistic', 'windowed-weir-fst', '--pop-file', 'example/Paniscus.txt', '--pop-file', 'example/Troglodytes.txt', '--out', 'out'])
vcf_calc.run(['example/locus8.vcf.gz', '--calc-statistic', 'windowed-weir-fst', '--pop-file', 'example/Paniscus.txt', '--pop-file', 'example/Troglodytes.txt', '--out', 'out'])
self.assertTrue(compare_to_expected('out.windowed.weir.fst', 'example/locus8.windowed.weir.fst'))
self.addCleanup(os.remove, 'out.windowed.weir.fst')
self.addCleanup(os.remove, 'out.windowed.weir.fst.log')

def test_tajimasD (self):
vcftools_calc.run(['example/locus8.vcf.gz', '--calc-statistic', 'TajimaD', '--out', 'out'])
def test_tajimasD (self):
vcf_calc.run(['example/locus8.vcf.gz', '--calc-statistic', 'TajimaD', '--out', 'out'])
self.assertTrue(compare_to_expected('out.Tajima.D', 'example/locus8.Tajima.D'))
self.addCleanup(os.remove, 'out.Tajima.D')
self.addCleanup(os.remove, 'out.Tajima.D.log')
def test_window_pi (self):
vcftools_calc.run(['example/locus8.vcf.gz', '--calc-statistic', 'pi', '--out', 'out'])

def test_window_pi (self):
vcf_calc.run(['example/locus8.vcf.gz', '--calc-statistic', 'pi', '--out', 'out'])
self.assertTrue(compare_to_expected('out.windowed.pi', 'example/locus8.windowed.pi'))
self.addCleanup(os.remove, 'out.windowed.pi')
self.addCleanup(os.remove, 'out.windowed.pi.log')
def test_freq (self):
vcftools_calc.run(['example/locus8.vcf.gz', '--calc-statistic', 'freq', '--out', 'out'])

def test_freq (self):
vcf_calc.run(['example/locus8.vcf.gz', '--calc-statistic', 'freq', '--out', 'out'])
self.assertTrue(compare_to_expected('out.frq', 'example/locus8.frq'))
self.addCleanup(os.remove, 'out.frq')
self.addCleanup(os.remove, 'out.frq.log')
def test_het (self):
vcftools_calc.run(['example/locus8.vcf.gz', '--calc-statistic', 'het', '--out', 'out'])

def test_het (self):
vcf_calc.run(['example/locus8.vcf.gz', '--calc-statistic', 'het', '--out', 'out'])
self.assertTrue(compare_to_expected('out.het', 'example/locus8.het'))
self.addCleanup(os.remove, 'out.het')
self.addCleanup(os.remove, 'out.het.log')

if __name__ == "__main__":
unittest.main()
unittest.main()
27 changes: 18 additions & 9 deletions andrew/vcftools_calc.py → andrew/vcf_calc.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
from vcftools import *

# Insert Jared's directory path, required for calling Jared's functions. Change when directory structure changes.
sys.path.insert(0, os.path.abspath(os.path.join(os.pardir,'jared')))
sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, 'jared')))

#from logging_module import initLogger

def vcf_calc_parser(passed_arguments):
'''VCF Argument Parser'''
'''VCF Argument Parser - Assigns arguments from command line'''

def parser_confirm_file ():
'''Custom action to confirm file exists'''
Expand Down Expand Up @@ -70,14 +70,14 @@ def run (passed_arguments = []):
'''
Statistic calculation using VCFTools.
Automates the calculation of specific statistics (Fst (site/windowed),
Tajima's D, Pi, allele frequency, and heterozygosity) using VCFTools. If no
statistic is specified, windowed Fst is used by default.
Automates the calculation of site/windowed fixation index (Fst), Tajima's D,
nucleotide diversity (Pi), allele frequency, and heterozygosity using
VCFTools. If no statistic is specified, windowed Fst is used by default.
Parameters
----------
VCF_Input : str
Input VCF filename
Specifies the input VCF filename
--out : str
Specifies the output filename
--pop-file : str
Expand All @@ -87,14 +87,23 @@ def run (passed_arguments = []):
Specifies the statistic to calculate. Choices: weir-fst,
windowed-weir-fst (Default), TajimaD, pi, freq, het
--statistic-window-size : int
Specifies the window size of the statistic
Specifies the window size for window-based statistics
--statistic-window-step : int
Specifies step size between windows
Specifies step size between windows for spcific window-based statistics
Returns
-------
file
output : file
Statistic file output
log : file
Log file output
Raises
------
IOError
Input VCF file does not exist
IOError
Output file already exists
'''
Expand Down
179 changes: 179 additions & 0 deletions andrew/vcf_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
import os
import sys
import subprocess
import argparse
import logging

from vcftools import *

# Insert Jared's directory path, required for calling Jared's functions. Change when directory structure changes.
sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, 'jared')))

#from logging_module import initLogger

def vcf_filter_parser(passed_arguments):
'''VCF Argument Parser - Assigns arguments from command line'''

def parser_confirm_file ():
'''Custom action to confirm file exists'''
class customAction(argparse.Action):
def __call__(self, parser, args, value, option_string=None):
if not os.path.isfile(value):
raise IOError # File not found
setattr(args, self.dest, value)
return customAction

def parser_confirm_no_file ():
'''Custom action to confirm file does not exist'''
class customAction(argparse.Action):
def __call__(self, parser, args, value, option_string=None):
if os.path.isfile(value):
raise IOError # File found
setattr(args, self.dest, value)
return customAction

def parser_confirm_files ():
'''Custom action to confirm multiple file exists'''
class customAction(argparse.Action):
def __call__(self, parser, args, value, option_string=None):
if not os.path.isfile(value):
raise IOError # File not found
getattr(args, self.dest).append(value)
return customAction

vcf_parser = argparse.ArgumentParser(formatter_class = argparse.ArgumentDefaultsHelpFormatter)

# Input arguments.
vcf_parser.add_argument("vcfname", metavar='VCF_Input', help = "Input VCF filename", type = str, action = parser_confirm_file())

# Other file arguments. Expand as needed
vcf_parser.add_argument('--out', help = 'Specifies the output filename', type = str, default = 'out', action = parser_confirm_no_file())

### Site Filters

# Chromosome Filters
vcf_parser.add_argument('--filter-chr', help = 'Specifies the chromosome(s) to include', nargs = '+', type = str)
vcf_parser.add_argument('--filter-not-chr', help = 'Specifies the chromosome(s) to exclude', nargs = '+', type = str)

# Position Filters
vcf_parser.add_argument('--filter-from-bp', help = 'Specifies the lower bound of sites to include (May only be used with a single chromosome)', type = int)
vcf_parser.add_argument('--filter-to-bp', help = 'Specifies the upper bound of sites to include (May only be used with a single chromosome)', type = int)


if passed_arguments:
return vcf_parser.parse_args(passed_arguments)
else:
return vcf_parser.parse_args()

def run (passed_arguments = []):
'''
Filter VCF files using VCFTools.
Automates various filters to VCF files using VCFtools.
Parameters
----------
VCF_Input : str
Specifies the input VCF filename
--out : str
Specifies the output filename
Returns
-------
output : file
Filtered VCF file output
log : file
Log file output
Raises
------
IOError
Input VCF file does not exist
IOError
Output file already exists
'''

# Grab VCF arguments from command line
vcf_args = vcf_filter_parser(passed_arguments)

print vcf_args

'''
# Argument container for vcftools
vcftools_call_args = ['--out', vcf_args.out]
if vcf_args.calc_statistic == 'windowed-weir-fst':
# Confirms that at least two population files have been specified
if not vcf_args.pop_file or len(vcf_args.pop_file) < 2:
sys.exit('Two or more population files requried. Please assign using --pop-file')
# Assigns specific vcftools arguments for calculating fst
vcftools_pop_args = [population_args for population_file in vcf_args.pop_file for population_args in ['--weir-fst-pop', population_file]]
vcftools_window_args = ['--fst-window-size', vcf_args.statistic_window_size, '--fst-window-step', vcf_args.statistic_window_step]
# Assigns all the vcftools arguments for calculating windowed fst
vcftools_call_args.extend(vcftools_pop_args + vcftools_window_args)
# Assigns the suffix for the vcftools log file
vcftools_log_suffix = '.windowed.weir.fst'
elif vcf_args.calc_statistic == 'weir-fst':
# Confirms that at least two population files have been specified
if not vcf_args.pop_file or len(vcf_args.pop_file) < 2:
sys.exit('Two or more population files requried. Please assign using --pop-file')
# Assigns specific vcftools arguments for calculating site-based fst
vcftools_call_args.extend([population_args for population_file in vcf_args.pop_file for population_args in ['--weir-fst-pop', population_file]])
# Assigns the suffix for the vcftools log file
vcftools_log_suffix = '.weir.fst'
elif vcf_args.calc_statistic == 'TajimaD':
# Assigns all the vcftools arguments for calculating TajimaD
vcftools_call_args.extend(['--TajimaD', vcf_args.statistic_window_size])
# Assigns the suffix for the vcftools log file
vcftools_log_suffix = '.Tajima.D'
elif vcf_args.calc_statistic == 'pi':
# Assigns all the vcftools arguments for calculating pi
vcftools_call_args.extend(['--window-pi', vcf_args.statistic_window_size, '--window-pi-step', vcf_args.statistic_window_step])
# Assigns the suffix for the vcftools log file
vcftools_log_suffix = '.windowed.pi'
elif vcf_args.calc_statistic == 'freq':
# Assigns all the vcftools arguments for the allele frequency
vcftools_call_args.extend(['--freq'])
# Assigns the suffix for the vcftools log file
vcftools_log_suffix = '.frq'
elif vcf_args.calc_statistic == 'het':
# Assigns all the vcftools arguments for calculating heterozygosity
vcftools_call_args.extend(['--het'])
# Assigns the suffix for the vcftools log file
vcftools_log_suffix = '.het'
# Assigns the file argument for vcftools
vcfname_arg = assign_vcftools_input_arg(vcf_args.vcfname)
# vcftools subprocess call
vcftools_call = subprocess.Popen(['vcftools'] + vcfname_arg + list(map(str, vcftools_call_args)), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
vcftools_out, vcftools_err = vcftools_call.communicate()
# Check that the log file was created correctly, get the suffix for the log file, and create the file
if check_vcftools_for_errors(vcftools_err):
produce_vcftools_log(vcftools_err, vcf_args.out, vcftools_log_suffix)
'''

if __name__ == "__main__":
#initLogger()
run()
38 changes: 35 additions & 3 deletions andrew/vcf_phase.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import argparse
import glob

sys.path.insert(0, os.path.abspath(os.path.join(os.pardir,'jared')))
sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, 'jared')))

import vcf_reader_func

Expand Down Expand Up @@ -41,7 +41,7 @@ def __call__(self, parser, args, value, option_string=None):

# Other basic arguments. Expand as needed
phase_parser.add_argument('--out', help = 'Defines the output filename', default = 'out', action = parser_confirm_no_file())
phase_parser.add_argument('--estimate-file', help = 'Defines the estimated genotype frequency filename. For used if using beagle', default = 'estimated_gt', action = parser_confirm_no_file())
phase_parser.add_argument('--estimate-file', help = 'Defines the estimated genotype frequency filename. Required for the beagle algorithm', default = 'estimated_gt', action = parser_confirm_no_file())

if passed_arguments:
return phase_parser.parse_args(passed_arguments)
Expand All @@ -68,7 +68,39 @@ def assign_vcf_extension (filename):
sys.exit('Unknown file format')

def run (passed_arguments = []):
''' Wrapper code for Phasing. Commands are assigned using argparse.'''
'''
Phaser for VCF files.
Automates the phasing process for a specified VCF file. The function
allows users to select between multiple phasing algorithms: beagle
(default) and shapit.
Parameters
----------
VCF_Input : str
Specifies the input VCF filename
--phase-algorithm : str
Specifies the algorithm to be used. Choices: beagle (default) and
shapit
--out : str
Specifies the output filename
--estimate-file : str
Defines the estimated genotype frequency filename. Required for the
beagle algorithm
Returns
-------
output : file
Phased VCF file
Raises
------
IOError
Input VCF file does not exist
IOError
Output file already exists
'''

# Grab VCF arguments from command line
phase_args = phase_argument_parser(passed_arguments)
Expand Down

0 comments on commit e9440ae

Please sign in to comment.