Skip to content

Commit

Permalink
removing defunct option for setting file compression
Browse files Browse the repository at this point in the history
  • Loading branch information
jaredgk committed Aug 11, 2017
1 parent b05d376 commit cb27119
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 32 deletions.
34 changes: 22 additions & 12 deletions jared/vcf_ref_to_seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import argparse
import os.path
import logging
from logging_module import initLogger
from logging_module import initLogger, logArgs
from random import sample
from gene_region import Region, RegionList
import vcf_reader_func as vf
Expand Down Expand Up @@ -35,9 +35,6 @@ def createParser():
"Comma-separated list of columns for gene region "
" data, format is start/end if no chromosome "
" data, start/end/chrom if so"))
parser.add_argument("--ext", dest="var_ext", help=(
"Format for variant file if filename doesn't "
"contain extension"))
parser.add_argument("--compress-vcf", dest="compress_flag",
action="store_true", help=("If input VCF is not "
"compressed, will compress and use zip search"))
Expand All @@ -47,6 +44,9 @@ def createParser():
action="store_false", help=("Prevents exception "
"generated by mismatched reference alleles from "
"VCF file compared to reference"))
parser.add_argument("--ima", dest="ima", action="store_true",
help=("If set, will output IMa-format instead of "
"FASTA format"))
subsamp_group = parser.add_mutually_exclusive_group()
subsamp_group.add_argument('--subsamp-list', dest="subsamp_fn",
help="List of sample names to be used")
Expand All @@ -56,11 +56,6 @@ def createParser():
return parser


def logArgs(args):
logging.info('Arguments for vcf_to_seq:')
for k in vars(args):
logging.info('Argument %s: %s' % (k, vars(args)[k]))

def validateFiles(args):
"""Validates that files provided to args all exist on users system"""
for var in ['vcfname', 'refname', 'genename']:
Expand Down Expand Up @@ -144,7 +139,6 @@ def generateSequence(rec_list, ref_seq, fasta_ref,
return seq[:len(ref_seq)]
return seq


def getHeader(record_count, chrom, region, oneidx=False, halfopen=True):
start = region.start
end = region.end
Expand All @@ -155,6 +149,24 @@ def getHeader(record_count, chrom, region, oneidx=False, halfopen=True):
start -= 1
return '>'+str(record_count)+' '+chrom+' '+str(start)+':'+str(end)

#def getHeader(record_count, chrom, region, oneidx=False, halfopen=True,
# pop=None, indiv=None):
# start = region.start
# end = region.end
# if oneidx:
# start += 1
# end += 1
# if not halfopen:
# start -= 1
# coords = chrom+':'+str(start)+':'+str(end)
# header = '>'+str(record_count)+' '+coords+'|'
# if pop is not None:
# header+=str(pop)
# header += '|'
# if indiv is not None:
# header+=str(indiv)
# return header


def getFastaFilename(args):
vcfname = args.vcfname
Expand Down Expand Up @@ -205,8 +217,6 @@ def vcf_to_seq(sys_args):
coordinates of a region. If length 3, the third element
specifies the index of the chromosome column. Default is "1,2,0",
to match column order in a BED file.
--ext : str ['vcf','vcf.gz'], optional
Required if VCF filename does not end with the typical extension.
Expand Down
26 changes: 6 additions & 20 deletions jared/vcf_to_ima.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@


def createParser():
parser = argparse.ArgumentParser(description=("Generates sequences from"
" samples from a VCF file, a reference"
" genome, and a list of gene regions."))
parser = argparse.ArgumentParser(description=("Generates an IMa input "
"file from a VCF file, a reference"
" genome, a list of gene regions, "
"and a population info file."))
parser.add_argument("vcfname", help="Input VCF filename")
parser.add_argument("refname", help="Reference FASTA file")
parser.add_argument("genename", help="Name of gene region file")
Expand All @@ -38,9 +39,6 @@ def createParser():
"Comma-separated list of columns for gene region "
" data, format is start/end if no chromosome "
" data, start/end/chrom if so"))
parser.add_argument("--ext", dest="var_ext", help=(
"Format for variant file if filename doesn't "
"contain extension"))
parser.add_argument("--compress-vcf", dest="compress_flag",
action="store_true", help=("If input VCF is not "
"compressed, will compress and use zip search"))
Expand All @@ -64,7 +62,7 @@ def createParser():

def validateFiles(args):
"""Validates that files provided to args all exist on users system"""
for var in ['vcfname', 'refname', 'genename']:
for var in ['vcfname', 'refname', 'genename','popname']:
f = vars(args)[var]
if not os.path.exists(f):
raise ValueError('Filepath for %s not found at %s' %
Expand All @@ -83,8 +81,7 @@ def readSuperPop(pop_fn):
pop_file = open(pop_fn)
pop_fns = [l.strip() for l in pop_file.readlines()]
for pop in pop_fns:
popname = pop.strip('.txt')
#pop_list.append(pop)
popname = os.path.splitext(os.path.basename(pop))[0]
t_pop = [popname]
t_sample = []
popf = open(pop,'r')
Expand Down Expand Up @@ -114,15 +111,6 @@ def getMaxAlleleLength(alleles):
return max([len(r) for r in alleles])


def getNextIdx(rec, prev_indiv, prev_idx):
"""Using record sample array, find individual and haplotype indices for
next sample. Will work for any ploidy. Returns -1's when all haplotypes
have been iterated through"""
if len(rec.samples[prev_indiv].alleles) > prev_idx + 1:
return prev_indiv, prev_idx+1
if len(rec.samples) > prev_indiv+1:
return prev_indiv+1, 0
return -1, -1

def getNextIdxName(rec, prev_pop, prev_indiv, prev_idx, pop_data):

Expand Down Expand Up @@ -265,8 +253,6 @@ def vcf_to_ima(sys_args):
coordinates of a region. If length 3, the third element
specifies the index of the chromosome column. Default is "1,2,0",
to match column order in a BED file.
--ext : str ['vcf','vcf.gz'], optional
Required if VCF filename does not end with the typical extension.
Expand Down

0 comments on commit cb27119

Please sign in to comment.