Skip to content

Commit

Permalink
duplicate positions are now only written out if --considerMultiAlleli…
Browse files Browse the repository at this point in the history
…c is specified

rows with duplicate positions are now only written out if
--considerMultiAllelic is specified
  • Loading branch information
tomkinsc committed Jun 22, 2015
1 parent d1cfcde commit 2e389e2
Showing 1 changed file with 13 additions and 3 deletions.
16 changes: 13 additions & 3 deletions cms/tools/selscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def process_vcf_into_selscan_tped(cls, vcf_file, gen_map_file, outfile_location,
sec_remaining_avg = 0
current_pos_bp = 1

with util.file.open_or_gzopen(outTpedFile, 'w+') as of1, util.file.open_or_gzopen(outTpedMetaFile, 'w+') as of2:
with util.file.open_or_gzopen(outTpedFile, 'w') as of1, util.file.open_or_gzopen(outTpedMetaFile, 'w') as of2:
# WRITE header for metadata file here with selected subset of sample_names
headerString = "CHROM VARIANT_ID POS_BP MAP_POS_CM REF_ALLELE ALT_ALLELE ANCESTRAL_CALL ALLELE_FREQ_IN_POP\n".replace(" ","\t")
of2.write(headerString)
Expand All @@ -144,6 +144,11 @@ def process_vcf_into_selscan_tped(cls, vcf_file, gen_map_file, outfile_location,
# to account for that we collapse rows that pass our filter and then write out the rows when we
# encounter a record with a new position
if record.pos != mostRecentRecordPosSeen:

if positionHasBeenSeenBefore and not consider_multi_allelic:
lineToWrite1 = None
lineToWrite2 = None

if lineToWrite1 is not None and lineToWrite2 is not None:
if len(previouslyCodedGenotypes) == ploidy*len(indices_of_matching_samples):
# write the output line
Expand Down Expand Up @@ -232,8 +237,9 @@ def process_vcf_into_selscan_tped(cls, vcf_file, gen_map_file, outfile_location,
# coded result = 101001
#log.debug(genotypes_for_selected_samples)
#log.debug(coded_genotypes_for_selected_samples)

if positionHasBeenSeenBefore:
coded_genotypes_for_selected_samples = list(str(bin(int("".join(coded_genotypes_for_selected_samples),2) & int("".join(previouslyCodedGenotypes),2)))[2:].zfill(len(coded_genotypes_for_selected_samples)))
coded_genotypes_for_selected_samples = np.array(list(str(bin(int("".join(coded_genotypes_for_selected_samples),2) & int("".join(previouslyCodedGenotypes),2)))[2:].zfill(len(coded_genotypes_for_selected_samples))))

previouslyCodedGenotypes = coded_genotypes_for_selected_samples

Expand Down Expand Up @@ -264,6 +270,10 @@ def process_vcf_into_selscan_tped(cls, vcf_file, gen_map_file, outfile_location,
print("Estimated time of completion: {}".format(human_time_remaining))
#log.info("Genotype counts found: %s", str(list(recordLengths)))

if positionHasBeenSeenBefore and not consider_multi_allelic:
lineToWrite1 = None
lineToWrite2 = None

if lineToWrite1 is not None and lineToWrite2 is not None:
if len(previouslyCodedGenotypes) == ploidy*len(indices_of_matching_samples):
# write the output lines
Expand All @@ -285,7 +295,7 @@ def __init__(self, install_methods = None):
if install_methods is None:
install_methods = []
os_type = self.get_os_type()
binaryPath = self.get_selscan_binary_path( os_type )
binaryPath = self.get_selscan_binary_path( os_type )

target_rel_path = 'selscan-{ver}/{binPath}'.format(ver=tool_version, binPath=binaryPath)
verify_command = '{dir}/selscan-{ver}/{binPath} --help > /dev/null 2>&1'.format(dir=util.file.get_build_path(),
Expand Down

0 comments on commit 2e389e2

Please sign in to comment.