Skip to content

Commit

Permalink
refactor number of genes for taxa
Browse files Browse the repository at this point in the history
  • Loading branch information
carlosp420 committed Mar 20, 2015
1 parent 18b3467 commit ea59e8a
Showing 1 changed file with 28 additions and 50 deletions.
78 changes: 28 additions & 50 deletions voseq/create_dataset/dataset.py
Expand Up @@ -38,7 +38,7 @@ def get_number_chars_from_partition_list(self, partitions):
gene_code = item.strip().replace('[', '').replace(']', '')
continue
if self.file_format == 'TNT':
gene_code = "dummy" + str(i)
gene_code = 'dummy' + str(i)
i += 1
continue
if gene_code != '':
Expand All @@ -54,55 +54,33 @@ def get_number_of_genes_for_taxa(self, partitions):
number_of_genes_for_taxa = dict()
vouchers_to_drop = set()

if self.file_format == 'NEXUS':
gene_code = ''
for item in partitions[0]:
if item.startswith('\n'):
gene_code = item.strip().replace('[', '').replace(']', '')
continue
if gene_code != '':
entry = re.sub('\s+', ' ', item)
voucher, sequence = entry.split(' ')

if voucher not in number_of_genes_for_taxa:
number_of_genes_for_taxa[voucher] = 0

sequence = sequence.replace('?', '')
if sequence != '':
number_of_genes_for_taxa[voucher] += 1

if self.minimum_number_of_genes is None:
self.vouchers_to_drop = []
else:
for voucher in number_of_genes_for_taxa:
if number_of_genes_for_taxa[voucher] < self.minimum_number_of_genes:
vouchers_to_drop.add(voucher)
self.vouchers_to_drop = vouchers_to_drop

if self.file_format == 'TNT':
gene_code = ''
for item in partitions[0]:
if item.startswith('\n'):
gene_code = 'dummy'
continue
if gene_code != '':
entry = re.sub('\s+', ' ', item)
voucher, sequence = entry.split(' ')

if voucher not in number_of_genes_for_taxa:
number_of_genes_for_taxa[voucher] = 0

sequence = sequence.replace('?', '')
if sequence != '':
number_of_genes_for_taxa[voucher] += 1

if self.minimum_number_of_genes is None:
self.vouchers_to_drop = []
else:
for voucher in number_of_genes_for_taxa:
if number_of_genes_for_taxa[voucher] < self.minimum_number_of_genes:
vouchers_to_drop.add(voucher)
self.vouchers_to_drop = vouchers_to_drop
gene_code = ''
for item in partitions[0]:
if item.startswith('\n'):
if self.file_format == 'NEXUS':
gene_code = item.strip().replace('[', '').replace(']', '')
continue
if self.file_format == 'TNT':
gene_code = 'dummy'
continue
if gene_code != '':
entry = re.sub('\s+', ' ', item)
voucher, sequence = entry.split(' ')

if voucher not in number_of_genes_for_taxa:
number_of_genes_for_taxa[voucher] = 0

sequence = sequence.replace('?', '')
if sequence != '':
number_of_genes_for_taxa[voucher] += 1

if self.minimum_number_of_genes is None:
self.vouchers_to_drop = []
else:
for voucher in number_of_genes_for_taxa:
if number_of_genes_for_taxa[voucher] < self.minimum_number_of_genes:
vouchers_to_drop.add(voucher)
self.vouchers_to_drop = vouchers_to_drop

def get_reading_frames(self):
"""
Expand Down

0 comments on commit ea59e8a

Please sign in to comment.