Skip to content

Commit

Permalink
refactor tnt dataset function to format seqrecords for dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
carlosp420 committed Mar 4, 2015
1 parent c603221 commit 9777b68
Showing 1 changed file with 7 additions and 21 deletions.
28 changes: 7 additions & 21 deletions voseq/create_dataset/dataset.py
Expand Up @@ -99,7 +99,7 @@ def get_codons_in_each_partition(self, codons):
codons = self.split_sequence_in_codon_positions(this_gene,
seq_record.seq)
for i in range(len(codon_pos)):
seq_str = self.format_seqrecord_and_codon_for_dataset(seq_record, codons[codon_pos[i]])
seq_str = self.format_record_id_and_seq_for_dataset(seq_record.id, codons[codon_pos[i]])
partition_list[i].append(seq_str)
return partition_list

Expand All @@ -114,25 +114,11 @@ def get_gene_divisor(self, this_gene, codon_description=None):
seq_str = '\n[&dna]'
return seq_str

def format_seqrecord_and_codon_for_dataset(self, seq_record, codon):
def format_record_id_and_seq_for_dataset(self, seq_record_id, seq_record_seq):
if self.file_format == 'FASTA':
seq_str = '>' + seq_record.id + '\n' + str(codon)
seq_str = '>' + seq_record_id + '\n' + str(seq_record_seq)
if self.file_format == 'TNT':
seq_str = str(seq_record.id).ljust(55) + str(codon)
return seq_str

def format_seqrecord_id_for_dataset(self, seq_record):
if self.file_format == 'FASTA':
seq_str = '>' + seq_record.id + '\n'
if self.file_format == 'TNT':
seq_str = str(seq_record.id).ljust(55)
return seq_str

def format_seqrecord_for_dataset(self, seq_record):
if self.file_format == 'FASTA':
seq_str = '>' + seq_record.id + '\n' + str(seq_record.seq)
if self.file_format == 'TNT':
seq_str = str(seq_record.id).ljust(55) + str(seq_record.seq)
seq_str = str(seq_record_id).ljust(55) + str(seq_record_seq)
return seq_str

def get_codons_in_one_partition(self, codons):
Expand Down Expand Up @@ -163,8 +149,8 @@ def get_codons_in_one_partition(self, codons):
codons = self.split_sequence_in_codon_positions(this_gene,
seq_record.seq)

seq_str = self.format_seqrecord_id_for_dataset(seq_record)
seq_str += str(chain_and_flatten([codons[i] for i in codon_pos]))
codon_seqs = str(chain_and_flatten([codons[i] for i in codon_pos]))
seq_str = self.format_record_id_and_seq_for_dataset(seq_record.id, codon_seqs)

partition_list[0].append(seq_str)
return partition_list
Expand Down Expand Up @@ -290,7 +276,7 @@ def from_seq_objs_to_dataset(self):
seq_str = self.get_gene_divisor(this_gene)
partition_list[0].append(seq_str)

seq_str = self.format_seqrecord_for_dataset(seq_record)
seq_str = self.format_record_id_and_seq_for_dataset(seq_record.id, seq_record.seq)
partition_list[0].append(seq_str)
return self.convert_lists_to_dataset(partition_list)

Expand Down

0 comments on commit 9777b68

Please sign in to comment.