From 9b20c04851e66f41f46b402e0e18c70db3570b57 Mon Sep 17 00:00:00 2001 From: carlosp420 Date: Tue, 10 Feb 2015 16:13:00 +0200 Subject: [PATCH 01/24] separate codons by partition --- voseq/create_dataset/tests/tests_utils.py | 11 ++++ voseq/create_dataset/utils.py | 67 ++++++++++++++++++++--- 2 files changed, 70 insertions(+), 8 deletions(-) diff --git a/voseq/create_dataset/tests/tests_utils.py b/voseq/create_dataset/tests/tests_utils.py index 6cfef9a2..1bd48ad1 100644 --- a/voseq/create_dataset/tests/tests_utils.py +++ b/voseq/create_dataset/tests/tests_utils.py @@ -25,6 +25,7 @@ def setUp(self): 'geneset': None, 'taxon_names': ['CODE', 'SUPERFAMILY', 'GENUS', 'SPECIES'], 'positions': ['ALL'], + 'partition_by_positions': 'ONE', } self.c = Client() @@ -128,3 +129,13 @@ def test_get_sequence_second_and_third_codon_position(self): sequence = Seq("ACACGTCGACTCCGGCAAGTCCACCACCACCGGTCACTTGATTTACAAATGTGGTGGTATCGACAaACGTACCATCGAGAAGTTCGAGAAGGA") result = dataset_creator.get_sequence_based_on_codon_positions('wingless', sequence) self.assertEqual(expected, result) + + def test_create_dataset_two_partitions_first_and_second_codon_position(self): + self.cleaned_data['positions'] = ['1st', '2nd'] + self.cleaned_data['partition_by_positions'] = 'EACH' + self.cleaned_data['gene_codes'] = [Genes.objects.get(gene_code='wingless')] + dataset_creator = CreateDataset(self.cleaned_data) + + expected = '>wingless\n-------------------\n>CAGTGATCGGAATCACACACGGCATTATTAAATGGGGGATGAAaCGACATGAAATTGAAAGA' + result = dataset_creator.dataset_str + self.assertEqual(expected, result) diff --git a/voseq/create_dataset/utils.py b/voseq/create_dataset/utils.py index f8e3eb6d..000a19bd 100644 --- a/voseq/create_dataset/utils.py +++ b/voseq/create_dataset/utils.py @@ -25,6 +25,7 @@ def __init__(self, cleaned_data): self.errors = [] self.seq_objs = dict() self.codon_positions = cleaned_data['positions'] + self.partition_by_positions = cleaned_data['partition_by_positions'] self.cleaned_data = cleaned_data self.voucher_codes = get_voucher_codes(cleaned_data) self.gene_codes = get_gene_codes(cleaned_data) @@ -72,6 +73,14 @@ def from_seq_objs_to_fasta(self): another FASTA gene sequence. """ + if self.partition_by_positions != 'ONE': + # This codons might not correspond to first, second and third codon positions + partitions = { + 'codon1': [], + 'codon2': [], + 'codon3': [], + } + fasta_str = [] append = fasta_str.append @@ -82,11 +91,46 @@ def from_seq_objs_to_fasta(self): this_gene = seq_record.name seq_str = '>' + this_gene + '\n' + '--------------------' append(seq_str) - seq_record_seq_str = str(self.get_sequence_based_on_codon_positions(this_gene, seq_record.seq)) - seq_str = '>' + seq_record.id + '\n' + seq_record_seq_str - append(seq_str) + seq_record_seqs = self.get_sequence_based_on_codon_positions(this_gene, seq_record.seq) + + # We have only one codon position + if len(seq_record_seqs) == 1: + seq_record_seq_str = seq_record_seqs[0] + seq_str = '>' + seq_record.id + '\n' + seq_record_seq_str + append(seq_str) + + # We have two codon positions because they should go to different partitions + if len(seq_record_seqs) == 2: + seq_str = '>' + seq_record.id + '\n' + seq_record_seqs[0] + partitions['codon1'].append(seq_str) + + seq_str = '>' + seq_record.id + '\n' + seq_record_seqs[1] + partitions['codon2'].append(seq_str) + + # We have three codon positions because they should go to different partitions + if len(seq_record_seqs) == 3: + seq_str = '>' + seq_record.id + '\n' + seq_record_seqs[0] + partitions['codon1'].append(seq_str) - return '\n'.join(fasta_str) + seq_str = '>' + seq_record.id + '\n' + seq_record_seqs[1] + partitions['codon2'].append(seq_str) + + seq_str = '>' + seq_record.id + '\n' + seq_record_seqs[2] + partitions['codon3'].append(seq_str) + + # We return only one partition + if len(fasta_str) > 0: + return '\n'.join(fasta_str) + + out = '' + if len(partitions['codon1']) > 0: + out += '\n'.join(partitions['codon1']) + if len(partitions['codon2']) > 0: + out += '\n'.join(partitions['codon2']) + if len(partitions['codon3']) > 0: + out += '\n'.join(partitions['codon3']) + + return out def get_taxon_names_for_taxa(self): """Returns dict: {'CP100-10': {'taxon': 'name'}} @@ -130,11 +174,15 @@ def get_reading_frames(self): def get_sequence_based_on_codon_positions(self, gene_code, seq): """Puts the sequence in frame, by deleting base pairs at the begining - of the sequence if the reading frame is not 1: + of the sequence if the reading frame is not 1. + + Takes into account whether the codon positions should go in different + partitions. :param gene_code: as lower case :param seq: as BioPython seq object. - :return: sequence as Seq object with codon positions requested by user. + :return: tuples of Seq objects depending of number of paritions_by_condons. + and codon positions as requested by user. Example: If reading frame is 2: ATGGGG becomes TGGGG. Then the sequence is @@ -142,7 +190,7 @@ def get_sequence_based_on_codon_positions(self, gene_code, seq): """ if 'ALL' in self.codon_positions: - return seq + return (seq,) reading_frame = int(self.reading_frames[gene_code.lower()]) - 1 seq = seq[reading_frame:] @@ -170,7 +218,10 @@ def get_sequence_based_on_codon_positions(self, gene_code, seq): if '1st' in self.codon_positions and '2nd' in self.codon_positions \ and '3rd' not in self.codon_positions: - return chain_and_flatten(first_position, second_position) + if self.partition_by_positions == 'ONE': + return (chain_and_flatten(first_position, second_position)) + else: + return (first_position, second_position) if '1st' in self.codon_positions and '3rd' in self.codon_positions \ and '2nd' not in self.codon_positions: From 7b668a5b49a6434b28e62ee74b4f7a2a0af1ed9c Mon Sep 17 00:00:00 2001 From: carlosp420 Date: Tue, 10 Feb 2015 16:55:10 +0200 Subject: [PATCH 02/24] fix Seq bug --- voseq/create_dataset/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/voseq/create_dataset/utils.py b/voseq/create_dataset/utils.py index 000a19bd..ed39cf2f 100644 --- a/voseq/create_dataset/utils.py +++ b/voseq/create_dataset/utils.py @@ -95,7 +95,7 @@ def from_seq_objs_to_fasta(self): # We have only one codon position if len(seq_record_seqs) == 1: - seq_record_seq_str = seq_record_seqs[0] + seq_record_seq_str = str(seq_record_seqs[0]) seq_str = '>' + seq_record.id + '\n' + seq_record_seq_str append(seq_str) @@ -190,7 +190,7 @@ def get_sequence_based_on_codon_positions(self, gene_code, seq): """ if 'ALL' in self.codon_positions: - return (seq,) + return seq, reading_frame = int(self.reading_frames[gene_code.lower()]) - 1 seq = seq[reading_frame:] From e201bff74434a01f35f90e471c6a6b7bd9833492 Mon Sep 17 00:00:00 2001 From: carlosp420 Date: Tue, 10 Feb 2015 17:35:09 +0200 Subject: [PATCH 03/24] test dataset all condons as one --- voseq/create_dataset/tests/tests_utils.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/voseq/create_dataset/tests/tests_utils.py b/voseq/create_dataset/tests/tests_utils.py index 1bd48ad1..7d5d3db0 100644 --- a/voseq/create_dataset/tests/tests_utils.py +++ b/voseq/create_dataset/tests/tests_utils.py @@ -130,12 +130,19 @@ def test_get_sequence_second_and_third_codon_position(self): result = dataset_creator.get_sequence_based_on_codon_positions('wingless', sequence) self.assertEqual(expected, result) - def test_create_dataset_two_partitions_first_and_second_codon_position(self): - self.cleaned_data['positions'] = ['1st', '2nd'] - self.cleaned_data['partition_by_positions'] = 'EACH' - self.cleaned_data['gene_codes'] = [Genes.objects.get(gene_code='wingless')] - dataset_creator = CreateDataset(self.cleaned_data) + def test_dataset_all_codons_as_one(self): + g1 = Genes.objects.get(gene_code='COI') + cleaned_data = self.cleaned_data + cleaned_data['gene_codes'] = [g1] - expected = '>wingless\n-------------------\n>CAGTGATCGGAATCACACACGGCATTATTAAATGGGGGATGAAaCGACATGAAATTGAAAGA' + dataset_creator = CreateDataset(cleaned_data) + expected = """ +>coi +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +?????????????????????????TGAGCCGGTATAATTGGTACATCCCTAAGTCTTATTATTCGAACCGAATTAGGAAATCCTAGTTTTTTAATTGGAGATGATCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGATTTGGTAATTGACTTGTACCATTAATATTGGGAGCCCCAGATATAGCTTTCCCCCGAATAAATTATATAAGATTTTGATTATTGCCTCCATCCTTAATTCTTTTAATTTCAAGTAGAATTGTAGAAAATGGGGCAGGAACTGGATGAACAGTTTACCCCCCACTTTCATCTAATATTGCCCATAGAGGAGCTTCAGTGGATTTAGCTATTTTTTCTTTACATTTAGCTGGGATTTCCTCTATCTTAGGAGCTATTAATTTTATTACTACAATTATTAATATACGAATTAATAATATATCTTATGATCAAATACCTTTATTTGTATGAGCAGTAGGAATTACAGCATTACTTCTCTTATTATCTTTACCAGTTTTAGCTGGAGCTATTACTATACTTTTAACGGATCGAAATCTTAATACCTCATTTTTTGATTCCTGCGGAGGAGGAGATCC????????????????????????????????? +>CP100-11_Melitaea_diamina +??TGAGCCGGTATAATTGGTACATCCCTAAGTCTTATTATTCGAACCGAATTAGGAAATCCTAGTTTTTTAATTGGAGATGATCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGATTTGGTAATTGACTTGTACCATTAATATTGGGAGCCCCAGATATAGCTTTCCCCCGAATAAATTATATAAGATTTTGATTATTGCCTCCATCCTTAATTCTTTTAATTTCAAGTAGAATTGTAGAAAATGGGGCAGGAACTGGATGAACAGTTTACCCCCCACTTTCATCTAATATTGCCCATAGAGGAGCTTCAGTGGATTTAGCTATTTTTTCTTTACATTTAGCTGGGATTTCCTCTATCTTAGGAGCTATTAATTTTATTACTACAATTATTAATATACGAATTAATAATATATCTTATGATCAAATACCTTTATTTGTATGAGCAGTAGGAATTACAGCATTACTTCTCTTATTATCTTTACCAGTTTTAGCTGGAGCTATTACTATACTTTTAACGGATCGAAATCTTAATACCTCATTTTTTGATTCCTGCGGAGGAGGAGATCC????????????????????????????????? +""" result = dataset_creator.dataset_str - self.assertEqual(expected, result) + self.assertEqual(expected.strip(), result) From 2cbdcdab7df484fb61588bf44f741e4cda2e8279 Mon Sep 17 00:00:00 2001 From: carlosp420 Date: Tue, 10 Feb 2015 18:05:38 +0200 Subject: [PATCH 04/24] test dataset all codons, 1st position as one --- voseq/create_dataset/tests/tests_utils.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/voseq/create_dataset/tests/tests_utils.py b/voseq/create_dataset/tests/tests_utils.py index 7d5d3db0..dcfd3bf0 100644 --- a/voseq/create_dataset/tests/tests_utils.py +++ b/voseq/create_dataset/tests/tests_utils.py @@ -143,6 +143,25 @@ def test_dataset_all_codons_as_one(self): ?????????????????????????TGAGCCGGTATAATTGGTACATCCCTAAGTCTTATTATTCGAACCGAATTAGGAAATCCTAGTTTTTTAATTGGAGATGATCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGATTTGGTAATTGACTTGTACCATTAATATTGGGAGCCCCAGATATAGCTTTCCCCCGAATAAATTATATAAGATTTTGATTATTGCCTCCATCCTTAATTCTTTTAATTTCAAGTAGAATTGTAGAAAATGGGGCAGGAACTGGATGAACAGTTTACCCCCCACTTTCATCTAATATTGCCCATAGAGGAGCTTCAGTGGATTTAGCTATTTTTTCTTTACATTTAGCTGGGATTTCCTCTATCTTAGGAGCTATTAATTTTATTACTACAATTATTAATATACGAATTAATAATATATCTTATGATCAAATACCTTTATTTGTATGAGCAGTAGGAATTACAGCATTACTTCTCTTATTATCTTTACCAGTTTTAGCTGGAGCTATTACTATACTTTTAACGGATCGAAATCTTAATACCTCATTTTTTGATTCCTGCGGAGGAGGAGATCC????????????????????????????????? >CP100-11_Melitaea_diamina ??TGAGCCGGTATAATTGGTACATCCCTAAGTCTTATTATTCGAACCGAATTAGGAAATCCTAGTTTTTTAATTGGAGATGATCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGATTTGGTAATTGACTTGTACCATTAATATTGGGAGCCCCAGATATAGCTTTCCCCCGAATAAATTATATAAGATTTTGATTATTGCCTCCATCCTTAATTCTTTTAATTTCAAGTAGAATTGTAGAAAATGGGGCAGGAACTGGATGAACAGTTTACCCCCCACTTTCATCTAATATTGCCCATAGAGGAGCTTCAGTGGATTTAGCTATTTTTTCTTTACATTTAGCTGGGATTTCCTCTATCTTAGGAGCTATTAATTTTATTACTACAATTATTAATATACGAATTAATAATATATCTTATGATCAAATACCTTTATTTGTATGAGCAGTAGGAATTACAGCATTACTTCTCTTATTATCTTTACCAGTTTTAGCTGGAGCTATTACTATACTTTTAACGGATCGAAATCTTAATACCTCATTTTTTGATTCCTGCGGAGGAGGAGATCC????????????????????????????????? +""" + result = dataset_creator.dataset_str + self.assertEqual(expected.strip(), result) + + def test_dataset_all_codons_1st_as_one(self): + g1 = Genes.objects.get(gene_code='COI') + cleaned_data = self.cleaned_data + cleaned_data['gene_codes'] = [g1] + del cleaned_data['positions'] + cleaned_data['positions'] = list(['ALL', '1st'],) + + dataset_creator = CreateDataset(cleaned_data) + expected = """ +>coi +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +?????????????????????????TGAGCCGGTATAATTGGTACATCCCTAAGTCTTATTATTCGAACCGAATTAGGAAATCCTAGTTTTTTAATTGGAGATGATCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGATTTGGTAATTGACTTGTACCATTAATATTGGGAGCCCCAGATATAGCTTTCCCCCGAATAAATTATATAAGATTTTGATTATTGCCTCCATCCTTAATTCTTTTAATTTCAAGTAGAATTGTAGAAAATGGGGCAGGAACTGGATGAACAGTTTACCCCCCACTTTCATCTAATATTGCCCATAGAGGAGCTTCAGTGGATTTAGCTATTTTTTCTTTACATTTAGCTGGGATTTCCTCTATCTTAGGAGCTATTAATTTTATTACTACAATTATTAATATACGAATTAATAATATATCTTATGATCAAATACCTTTATTTGTATGAGCAGTAGGAATTACAGCATTACTTCTCTTATTATCTTTACCAGTTTTAGCTGGAGCTATTACTATACTTTTAACGGATCGAAATCTTAATACCTCATTTTTTGATTCCTGCGGAGGAGGAGATCC????????????????????????????????? +>CP100-11_Melitaea_diamina +??TGAGCCGGTATAATTGGTACATCCCTAAGTCTTATTATTCGAACCGAATTAGGAAATCCTAGTTTTTTAATTGGAGATGATCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGATTTGGTAATTGACTTGTACCATTAATATTGGGAGCCCCAGATATAGCTTTCCCCCGAATAAATTATATAAGATTTTGATTATTGCCTCCATCCTTAATTCTTTTAATTTCAAGTAGAATTGTAGAAAATGGGGCAGGAACTGGATGAACAGTTTACCCCCCACTTTCATCTAATATTGCCCATAGAGGAGCTTCAGTGGATTTAGCTATTTTTTCTTTACATTTAGCTGGGATTTCCTCTATCTTAGGAGCTATTAATTTTATTACTACAATTATTAATATACGAATTAATAATATATCTTATGATCAAATACCTTTATTTGTATGAGCAGTAGGAATTACAGCATTACTTCTCTTATTATCTTTACCAGTTTTAGCTGGAGCTATTACTATACTTTTAACGGATCGAAATCTTAATACCTCATTTTTTGATTCCTGCGGAGGAGGAGATCC????????????????????????????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) From 2fccabafa54935a7d1a4397cd5190b5a98cf9d5f Mon Sep 17 00:00:00 2001 From: carlosp420 Date: Tue, 10 Feb 2015 18:08:23 +0200 Subject: [PATCH 05/24] test dataset all codons, 1st 2nd position as one --- voseq/create_dataset/tests/tests_utils.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/voseq/create_dataset/tests/tests_utils.py b/voseq/create_dataset/tests/tests_utils.py index dcfd3bf0..89c3b62c 100644 --- a/voseq/create_dataset/tests/tests_utils.py +++ b/voseq/create_dataset/tests/tests_utils.py @@ -151,7 +151,6 @@ def test_dataset_all_codons_1st_as_one(self): g1 = Genes.objects.get(gene_code='COI') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] - del cleaned_data['positions'] cleaned_data['positions'] = list(['ALL', '1st'],) dataset_creator = CreateDataset(cleaned_data) @@ -162,6 +161,24 @@ def test_dataset_all_codons_1st_as_one(self): ?????????????????????????TGAGCCGGTATAATTGGTACATCCCTAAGTCTTATTATTCGAACCGAATTAGGAAATCCTAGTTTTTTAATTGGAGATGATCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGATTTGGTAATTGACTTGTACCATTAATATTGGGAGCCCCAGATATAGCTTTCCCCCGAATAAATTATATAAGATTTTGATTATTGCCTCCATCCTTAATTCTTTTAATTTCAAGTAGAATTGTAGAAAATGGGGCAGGAACTGGATGAACAGTTTACCCCCCACTTTCATCTAATATTGCCCATAGAGGAGCTTCAGTGGATTTAGCTATTTTTTCTTTACATTTAGCTGGGATTTCCTCTATCTTAGGAGCTATTAATTTTATTACTACAATTATTAATATACGAATTAATAATATATCTTATGATCAAATACCTTTATTTGTATGAGCAGTAGGAATTACAGCATTACTTCTCTTATTATCTTTACCAGTTTTAGCTGGAGCTATTACTATACTTTTAACGGATCGAAATCTTAATACCTCATTTTTTGATTCCTGCGGAGGAGGAGATCC????????????????????????????????? >CP100-11_Melitaea_diamina ??TGAGCCGGTATAATTGGTACATCCCTAAGTCTTATTATTCGAACCGAATTAGGAAATCCTAGTTTTTTAATTGGAGATGATCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGATTTGGTAATTGACTTGTACCATTAATATTGGGAGCCCCAGATATAGCTTTCCCCCGAATAAATTATATAAGATTTTGATTATTGCCTCCATCCTTAATTCTTTTAATTTCAAGTAGAATTGTAGAAAATGGGGCAGGAACTGGATGAACAGTTTACCCCCCACTTTCATCTAATATTGCCCATAGAGGAGCTTCAGTGGATTTAGCTATTTTTTCTTTACATTTAGCTGGGATTTCCTCTATCTTAGGAGCTATTAATTTTATTACTACAATTATTAATATACGAATTAATAATATATCTTATGATCAAATACCTTTATTTGTATGAGCAGTAGGAATTACAGCATTACTTCTCTTATTATCTTTACCAGTTTTAGCTGGAGCTATTACTATACTTTTAACGGATCGAAATCTTAATACCTCATTTTTTGATTCCTGCGGAGGAGGAGATCC????????????????????????????????? +""" + result = dataset_creator.dataset_str + self.assertEqual(expected.strip(), result) + + def test_dataset_all_codons_1st_2nd_as_one(self): + g1 = Genes.objects.get(gene_code='COI') + cleaned_data = self.cleaned_data + cleaned_data['gene_codes'] = [g1] + cleaned_data['positions'] = list(['ALL', '1st', '2nd'],) + + dataset_creator = CreateDataset(cleaned_data) + expected = """ +>coi +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +?????????????????????????TGAGCCGGTATAATTGGTACATCCCTAAGTCTTATTATTCGAACCGAATTAGGAAATCCTAGTTTTTTAATTGGAGATGATCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGATTTGGTAATTGACTTGTACCATTAATATTGGGAGCCCCAGATATAGCTTTCCCCCGAATAAATTATATAAGATTTTGATTATTGCCTCCATCCTTAATTCTTTTAATTTCAAGTAGAATTGTAGAAAATGGGGCAGGAACTGGATGAACAGTTTACCCCCCACTTTCATCTAATATTGCCCATAGAGGAGCTTCAGTGGATTTAGCTATTTTTTCTTTACATTTAGCTGGGATTTCCTCTATCTTAGGAGCTATTAATTTTATTACTACAATTATTAATATACGAATTAATAATATATCTTATGATCAAATACCTTTATTTGTATGAGCAGTAGGAATTACAGCATTACTTCTCTTATTATCTTTACCAGTTTTAGCTGGAGCTATTACTATACTTTTAACGGATCGAAATCTTAATACCTCATTTTTTGATTCCTGCGGAGGAGGAGATCC????????????????????????????????? +>CP100-11_Melitaea_diamina +??TGAGCCGGTATAATTGGTACATCCCTAAGTCTTATTATTCGAACCGAATTAGGAAATCCTAGTTTTTTAATTGGAGATGATCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGATTTGGTAATTGACTTGTACCATTAATATTGGGAGCCCCAGATATAGCTTTCCCCCGAATAAATTATATAAGATTTTGATTATTGCCTCCATCCTTAATTCTTTTAATTTCAAGTAGAATTGTAGAAAATGGGGCAGGAACTGGATGAACAGTTTACCCCCCACTTTCATCTAATATTGCCCATAGAGGAGCTTCAGTGGATTTAGCTATTTTTTCTTTACATTTAGCTGGGATTTCCTCTATCTTAGGAGCTATTAATTTTATTACTACAATTATTAATATACGAATTAATAATATATCTTATGATCAAATACCTTTATTTGTATGAGCAGTAGGAATTACAGCATTACTTCTCTTATTATCTTTACCAGTTTTAGCTGGAGCTATTACTATACTTTTAACGGATCGAAATCTTAATACCTCATTTTTTGATTCCTGCGGAGGAGGAGATCC????????????????????????????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) From ed6af5f54a6d7e59e197ff905512e48128e9bd12 Mon Sep 17 00:00:00 2001 From: carlosp420 Date: Tue, 10 Feb 2015 18:09:02 +0200 Subject: [PATCH 06/24] test dataset all codons, 1st 2nd 3rd position as one --- voseq/create_dataset/tests/tests_utils.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/voseq/create_dataset/tests/tests_utils.py b/voseq/create_dataset/tests/tests_utils.py index 89c3b62c..58a69026 100644 --- a/voseq/create_dataset/tests/tests_utils.py +++ b/voseq/create_dataset/tests/tests_utils.py @@ -179,6 +179,24 @@ def test_dataset_all_codons_1st_2nd_as_one(self): ?????????????????????????TGAGCCGGTATAATTGGTACATCCCTAAGTCTTATTATTCGAACCGAATTAGGAAATCCTAGTTTTTTAATTGGAGATGATCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGATTTGGTAATTGACTTGTACCATTAATATTGGGAGCCCCAGATATAGCTTTCCCCCGAATAAATTATATAAGATTTTGATTATTGCCTCCATCCTTAATTCTTTTAATTTCAAGTAGAATTGTAGAAAATGGGGCAGGAACTGGATGAACAGTTTACCCCCCACTTTCATCTAATATTGCCCATAGAGGAGCTTCAGTGGATTTAGCTATTTTTTCTTTACATTTAGCTGGGATTTCCTCTATCTTAGGAGCTATTAATTTTATTACTACAATTATTAATATACGAATTAATAATATATCTTATGATCAAATACCTTTATTTGTATGAGCAGTAGGAATTACAGCATTACTTCTCTTATTATCTTTACCAGTTTTAGCTGGAGCTATTACTATACTTTTAACGGATCGAAATCTTAATACCTCATTTTTTGATTCCTGCGGAGGAGGAGATCC????????????????????????????????? >CP100-11_Melitaea_diamina ??TGAGCCGGTATAATTGGTACATCCCTAAGTCTTATTATTCGAACCGAATTAGGAAATCCTAGTTTTTTAATTGGAGATGATCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGATTTGGTAATTGACTTGTACCATTAATATTGGGAGCCCCAGATATAGCTTTCCCCCGAATAAATTATATAAGATTTTGATTATTGCCTCCATCCTTAATTCTTTTAATTTCAAGTAGAATTGTAGAAAATGGGGCAGGAACTGGATGAACAGTTTACCCCCCACTTTCATCTAATATTGCCCATAGAGGAGCTTCAGTGGATTTAGCTATTTTTTCTTTACATTTAGCTGGGATTTCCTCTATCTTAGGAGCTATTAATTTTATTACTACAATTATTAATATACGAATTAATAATATATCTTATGATCAAATACCTTTATTTGTATGAGCAGTAGGAATTACAGCATTACTTCTCTTATTATCTTTACCAGTTTTAGCTGGAGCTATTACTATACTTTTAACGGATCGAAATCTTAATACCTCATTTTTTGATTCCTGCGGAGGAGGAGATCC????????????????????????????????? +""" + result = dataset_creator.dataset_str + self.assertEqual(expected.strip(), result) + + def test_dataset_all_codons_1st_2nd_3rd_as_one(self): + g1 = Genes.objects.get(gene_code='COI') + cleaned_data = self.cleaned_data + cleaned_data['gene_codes'] = [g1] + cleaned_data['positions'] = list(['ALL', '1st', '2nd', '3rd'],) + + dataset_creator = CreateDataset(cleaned_data) + expected = """ +>coi +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +?????????????????????????TGAGCCGGTATAATTGGTACATCCCTAAGTCTTATTATTCGAACCGAATTAGGAAATCCTAGTTTTTTAATTGGAGATGATCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGATTTGGTAATTGACTTGTACCATTAATATTGGGAGCCCCAGATATAGCTTTCCCCCGAATAAATTATATAAGATTTTGATTATTGCCTCCATCCTTAATTCTTTTAATTTCAAGTAGAATTGTAGAAAATGGGGCAGGAACTGGATGAACAGTTTACCCCCCACTTTCATCTAATATTGCCCATAGAGGAGCTTCAGTGGATTTAGCTATTTTTTCTTTACATTTAGCTGGGATTTCCTCTATCTTAGGAGCTATTAATTTTATTACTACAATTATTAATATACGAATTAATAATATATCTTATGATCAAATACCTTTATTTGTATGAGCAGTAGGAATTACAGCATTACTTCTCTTATTATCTTTACCAGTTTTAGCTGGAGCTATTACTATACTTTTAACGGATCGAAATCTTAATACCTCATTTTTTGATTCCTGCGGAGGAGGAGATCC????????????????????????????????? +>CP100-11_Melitaea_diamina +??TGAGCCGGTATAATTGGTACATCCCTAAGTCTTATTATTCGAACCGAATTAGGAAATCCTAGTTTTTTAATTGGAGATGATCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGATTTGGTAATTGACTTGTACCATTAATATTGGGAGCCCCAGATATAGCTTTCCCCCGAATAAATTATATAAGATTTTGATTATTGCCTCCATCCTTAATTCTTTTAATTTCAAGTAGAATTGTAGAAAATGGGGCAGGAACTGGATGAACAGTTTACCCCCCACTTTCATCTAATATTGCCCATAGAGGAGCTTCAGTGGATTTAGCTATTTTTTCTTTACATTTAGCTGGGATTTCCTCTATCTTAGGAGCTATTAATTTTATTACTACAATTATTAATATACGAATTAATAATATATCTTATGATCAAATACCTTTATTTGTATGAGCAGTAGGAATTACAGCATTACTTCTCTTATTATCTTTACCAGTTTTAGCTGGAGCTATTACTATACTTTTAACGGATCGAAATCTTAATACCTCATTTTTTGATTCCTGCGGAGGAGGAGATCC????????????????????????????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) From 5da79b7607c170c1ad5f15df50ce1ab410248bda Mon Sep 17 00:00:00 2001 From: carlosp420 Date: Wed, 11 Feb 2015 10:54:23 +0200 Subject: [PATCH 07/24] test dataset all codons, partitions each --- voseq/create_dataset/tests/tests_utils.py | 31 +++++++++++ voseq/create_dataset/utils.py | 63 +++++++++++++---------- 2 files changed, 68 insertions(+), 26 deletions(-) diff --git a/voseq/create_dataset/tests/tests_utils.py b/voseq/create_dataset/tests/tests_utils.py index 58a69026..02f41e30 100644 --- a/voseq/create_dataset/tests/tests_utils.py +++ b/voseq/create_dataset/tests/tests_utils.py @@ -197,6 +197,37 @@ def test_dataset_all_codons_1st_2nd_3rd_as_one(self): ?????????????????????????TGAGCCGGTATAATTGGTACATCCCTAAGTCTTATTATTCGAACCGAATTAGGAAATCCTAGTTTTTTAATTGGAGATGATCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGATTTGGTAATTGACTTGTACCATTAATATTGGGAGCCCCAGATATAGCTTTCCCCCGAATAAATTATATAAGATTTTGATTATTGCCTCCATCCTTAATTCTTTTAATTTCAAGTAGAATTGTAGAAAATGGGGCAGGAACTGGATGAACAGTTTACCCCCCACTTTCATCTAATATTGCCCATAGAGGAGCTTCAGTGGATTTAGCTATTTTTTCTTTACATTTAGCTGGGATTTCCTCTATCTTAGGAGCTATTAATTTTATTACTACAATTATTAATATACGAATTAATAATATATCTTATGATCAAATACCTTTATTTGTATGAGCAGTAGGAATTACAGCATTACTTCTCTTATTATCTTTACCAGTTTTAGCTGGAGCTATTACTATACTTTTAACGGATCGAAATCTTAATACCTCATTTTTTGATTCCTGCGGAGGAGGAGATCC????????????????????????????????? >CP100-11_Melitaea_diamina ??TGAGCCGGTATAATTGGTACATCCCTAAGTCTTATTATTCGAACCGAATTAGGAAATCCTAGTTTTTTAATTGGAGATGATCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGATTTGGTAATTGACTTGTACCATTAATATTGGGAGCCCCAGATATAGCTTTCCCCCGAATAAATTATATAAGATTTTGATTATTGCCTCCATCCTTAATTCTTTTAATTTCAAGTAGAATTGTAGAAAATGGGGCAGGAACTGGATGAACAGTTTACCCCCCACTTTCATCTAATATTGCCCATAGAGGAGCTTCAGTGGATTTAGCTATTTTTTCTTTACATTTAGCTGGGATTTCCTCTATCTTAGGAGCTATTAATTTTATTACTACAATTATTAATATACGAATTAATAATATATCTTATGATCAAATACCTTTATTTGTATGAGCAGTAGGAATTACAGCATTACTTCTCTTATTATCTTTACCAGTTTTAGCTGGAGCTATTACTATACTTTTAACGGATCGAAATCTTAATACCTCATTTTTTGATTCCTGCGGAGGAGGAGATCC????????????????????????????????? +""" + result = dataset_creator.dataset_str + self.assertEqual(expected.strip(), result) + + def test_dataset_all_codons_partitions_each(self): + g1 = Genes.objects.get(gene_code='COI') + cleaned_data = self.cleaned_data + cleaned_data['gene_codes'] = [g1] + cleaned_data['positions'] = list(['ALL'],) + cleaned_data['partition_by_positions'] = 'EACH', + + dataset_creator = CreateDataset(cleaned_data) + expected = """ +>coi_1st_codon +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +????????TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? +>CP100-11_Melitaea_diamina +?ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? +>coi_2nd_codon +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +????????GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? +>CP100-11_Melitaea_diamina +TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? +>coi_3rd_codon +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? +>CP100-11_Melitaea_diamina +GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) diff --git a/voseq/create_dataset/utils.py b/voseq/create_dataset/utils.py index ed39cf2f..ee1e094d 100644 --- a/voseq/create_dataset/utils.py +++ b/voseq/create_dataset/utils.py @@ -73,16 +73,13 @@ def from_seq_objs_to_fasta(self): another FASTA gene sequence. """ - if self.partition_by_positions != 'ONE': - # This codons might not correspond to first, second and third codon positions - partitions = { - 'codon1': [], - 'codon2': [], - 'codon3': [], - } - - fasta_str = [] - append = fasta_str.append + # This codons might not correspond to first, second and third codon positions + partitions = { + 'all_codons': [], + 'codon1': [], + 'codon2': [], + 'codon3': [], + } for gene_code in self.seq_objs: this_gene = None @@ -90,14 +87,21 @@ def from_seq_objs_to_fasta(self): if this_gene is None: this_gene = seq_record.name seq_str = '>' + this_gene + '\n' + '--------------------' - append(seq_str) + partitions['all_codons'].append(seq_str) + + seq_str = '>' + this_gene + '_1st_codon\n' + '--------------------' + partitions['codon1'].append(seq_str) + seq_str = '>' + this_gene + '_2nd_codon\n' + '--------------------' + partitions['codon2'].append(seq_str) + seq_str = '>' + this_gene + '_3rd_codon\n' + '--------------------' + partitions['codon3'].append(seq_str) + seq_record_seqs = self.get_sequence_based_on_codon_positions(this_gene, seq_record.seq) # We have only one codon position if len(seq_record_seqs) == 1: - seq_record_seq_str = str(seq_record_seqs[0]) - seq_str = '>' + seq_record.id + '\n' + seq_record_seq_str - append(seq_str) + seq_str = '>' + seq_record.id + '\n' + str(seq_record_seqs[0]) + partitions['all_codons'].append(seq_str) # We have two codon positions because they should go to different partitions if len(seq_record_seqs) == 2: @@ -109,27 +113,28 @@ def from_seq_objs_to_fasta(self): # We have three codon positions because they should go to different partitions if len(seq_record_seqs) == 3: - seq_str = '>' + seq_record.id + '\n' + seq_record_seqs[0] + seq_str = '>' + seq_record.id + '\n' + str(seq_record_seqs[0]) partitions['codon1'].append(seq_str) - seq_str = '>' + seq_record.id + '\n' + seq_record_seqs[1] + seq_str = '>' + seq_record.id + '\n' + str(seq_record_seqs[1]) partitions['codon2'].append(seq_str) - seq_str = '>' + seq_record.id + '\n' + seq_record_seqs[2] + seq_str = '>' + seq_record.id + '\n' + str(seq_record_seqs[2]) partitions['codon3'].append(seq_str) - # We return only one partition - if len(fasta_str) > 0: - return '\n'.join(fasta_str) - out = '' - if len(partitions['codon1']) > 0: + if self.partition_by_positions == 'ONE': + out += '\n'.join(partitions['all_codons']) + return out + + if len(partitions['codon1']) > len(self.gene_codes): out += '\n'.join(partitions['codon1']) - if len(partitions['codon2']) > 0: + if len(partitions['codon2']) > len(self.gene_codes): + out += '\n' out += '\n'.join(partitions['codon2']) - if len(partitions['codon3']) > 0: + if len(partitions['codon3']) > len(self.gene_codes): + out += '\n' out += '\n'.join(partitions['codon3']) - return out def get_taxon_names_for_taxa(self): @@ -189,7 +194,7 @@ def get_sequence_based_on_codon_positions(self, gene_code, seq): processed to extract the codon positions requested by the user. """ - if 'ALL' in self.codon_positions: + if 'ALL' in self.codon_positions and self.partition_by_positions == 'ONE': return seq, reading_frame = int(self.reading_frames[gene_code.lower()]) - 1 @@ -230,3 +235,9 @@ def get_sequence_based_on_codon_positions(self, gene_code, seq): if '2nd' in self.codon_positions and '3rd' in self.codon_positions \ and '1st' not in self.codon_positions: return chain_and_flatten(second_position, third_position) + + if 'ALL' in self.codon_positions: + if self.partition_by_positions == 'ONE': + return (chain_and_flatten(first_position, second_position, third_position)) + else: + return (first_position, second_position, third_position) From 0de12fb320bb5943c9ca24e8d64ea4eca29b8c56 Mon Sep 17 00:00:00 2001 From: carlosp420 Date: Wed, 11 Feb 2015 12:24:49 +0200 Subject: [PATCH 08/24] test dataset all condons + 1st, partitions each --- voseq/create_dataset/tests/tests_utils.py | 31 +++++++++++++++++++++++ voseq/create_dataset/utils.py | 13 +++++----- 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/voseq/create_dataset/tests/tests_utils.py b/voseq/create_dataset/tests/tests_utils.py index 02f41e30..9752c220 100644 --- a/voseq/create_dataset/tests/tests_utils.py +++ b/voseq/create_dataset/tests/tests_utils.py @@ -228,6 +228,37 @@ def test_dataset_all_codons_partitions_each(self): ????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? >CP100-11_Melitaea_diamina GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? +""" + result = dataset_creator.dataset_str + self.assertEqual(expected.strip(), result) + + def test_dataset_all_codons_1st_partitions_each(self): + g1 = Genes.objects.get(gene_code='COI') + cleaned_data = self.cleaned_data + cleaned_data['gene_codes'] = [g1] + cleaned_data['positions'] = list(['ALL', '1st'],) + cleaned_data['partition_by_positions'] = 'EACH', + + dataset_creator = CreateDataset(cleaned_data) + expected = """ +>coi_1st_codon +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +????????TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? +>CP100-11_Melitaea_diamina +?ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? +>coi_2nd_codon +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +????????GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? +>CP100-11_Melitaea_diamina +TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? +>coi_3rd_codon +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? +>CP100-11_Melitaea_diamina +GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) diff --git a/voseq/create_dataset/utils.py b/voseq/create_dataset/utils.py index ee1e094d..e36248dc 100644 --- a/voseq/create_dataset/utils.py +++ b/voseq/create_dataset/utils.py @@ -206,6 +206,13 @@ def get_sequence_based_on_codon_positions(self, gene_code, seq): second_position = seq[1::3] third_position = seq[2::3] + # ALL overrides 1st, 2nd, 3rd codon positions. We should return all codons + if 'ALL' in self.codon_positions: + if self.partition_by_positions == 'ONE': + return (chain_and_flatten(first_position, second_position, third_position)) + else: + return (first_position, second_position, third_position) + if '1st' in self.codon_positions \ and '2nd' not in self.codon_positions \ and '3rd' not in self.codon_positions: @@ -235,9 +242,3 @@ def get_sequence_based_on_codon_positions(self, gene_code, seq): if '2nd' in self.codon_positions and '3rd' in self.codon_positions \ and '1st' not in self.codon_positions: return chain_and_flatten(second_position, third_position) - - if 'ALL' in self.codon_positions: - if self.partition_by_positions == 'ONE': - return (chain_and_flatten(first_position, second_position, third_position)) - else: - return (first_position, second_position, third_position) From 4ffc9548488286055e1017bc0e66f07ed23d6af5 Mon Sep 17 00:00:00 2001 From: carlosp420 Date: Wed, 11 Feb 2015 12:25:47 +0200 Subject: [PATCH 09/24] test dataset all condons + 1st + 2nd, partitions each --- voseq/create_dataset/tests/tests_utils.py | 31 +++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/voseq/create_dataset/tests/tests_utils.py b/voseq/create_dataset/tests/tests_utils.py index 9752c220..33b520d4 100644 --- a/voseq/create_dataset/tests/tests_utils.py +++ b/voseq/create_dataset/tests/tests_utils.py @@ -259,6 +259,37 @@ def test_dataset_all_codons_1st_partitions_each(self): ????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? >CP100-11_Melitaea_diamina GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? +""" + result = dataset_creator.dataset_str + self.assertEqual(expected.strip(), result) + + def test_dataset_all_codons_1st_2nd_partitions_each(self): + g1 = Genes.objects.get(gene_code='COI') + cleaned_data = self.cleaned_data + cleaned_data['gene_codes'] = [g1] + cleaned_data['positions'] = list(['ALL', '1st', '2nd'],) + cleaned_data['partition_by_positions'] = 'EACH', + + dataset_creator = CreateDataset(cleaned_data) + expected = """ +>coi_1st_codon +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +????????TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? +>CP100-11_Melitaea_diamina +?ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? +>coi_2nd_codon +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +????????GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? +>CP100-11_Melitaea_diamina +TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? +>coi_3rd_codon +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? +>CP100-11_Melitaea_diamina +GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) From a47ec368befee7e9e78d3a59b6092aeccdecff22 Mon Sep 17 00:00:00 2001 From: carlosp420 Date: Wed, 11 Feb 2015 12:26:18 +0200 Subject: [PATCH 10/24] test dataset all condons + 1st + 2nd + 3rd, partitions each --- voseq/create_dataset/tests/tests_utils.py | 31 +++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/voseq/create_dataset/tests/tests_utils.py b/voseq/create_dataset/tests/tests_utils.py index 33b520d4..13af31a4 100644 --- a/voseq/create_dataset/tests/tests_utils.py +++ b/voseq/create_dataset/tests/tests_utils.py @@ -290,6 +290,37 @@ def test_dataset_all_codons_1st_2nd_partitions_each(self): ????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? >CP100-11_Melitaea_diamina GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? +""" + result = dataset_creator.dataset_str + self.assertEqual(expected.strip(), result) + + def test_dataset_all_codons_1st_2nd_3rd_partitions_each(self): + g1 = Genes.objects.get(gene_code='COI') + cleaned_data = self.cleaned_data + cleaned_data['gene_codes'] = [g1] + cleaned_data['positions'] = list(['ALL', '1st', '2nd', '3rd'],) + cleaned_data['partition_by_positions'] = 'EACH', + + dataset_creator = CreateDataset(cleaned_data) + expected = """ +>coi_1st_codon +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +????????TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? +>CP100-11_Melitaea_diamina +?ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? +>coi_2nd_codon +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +????????GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? +>CP100-11_Melitaea_diamina +TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? +>coi_3rd_codon +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? +>CP100-11_Melitaea_diamina +GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) From a21034f55e315713fd3f4a82643954330e0fe55c Mon Sep 17 00:00:00 2001 From: carlosp420 Date: Wed, 11 Feb 2015 12:55:37 +0200 Subject: [PATCH 11/24] test datatset 1st codon, one partition --- voseq/create_dataset/tests/tests_utils.py | 21 ++++++++++++++++++++- voseq/create_dataset/utils.py | 2 +- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/voseq/create_dataset/tests/tests_utils.py b/voseq/create_dataset/tests/tests_utils.py index 13af31a4..602e4e11 100644 --- a/voseq/create_dataset/tests/tests_utils.py +++ b/voseq/create_dataset/tests/tests_utils.py @@ -83,7 +83,7 @@ def test_get_sequence_first_codon_position(self): expected = Seq("CGGTGATAAAGCTATATGGAGACAAGATGAG") sequence = Seq("ACACGTCGACTCCGGCAAGTCCACCACCACCGGTCACTTGATTTACAAATGTGGTGGTATCGACAaACGTACCATCGAGAAGTTCGAGAAGGA") result = dataset_creator.get_sequence_based_on_codon_positions('wingless', sequence) - self.assertEqual(expected, result) + self.assertEqual(expected, result[0]) def test_get_sequence_second_codon_position(self): self.cleaned_data['positions'] = ['2nd'] @@ -321,6 +321,25 @@ def test_dataset_all_codons_1st_2nd_3rd_partitions_each(self): ????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? >CP100-11_Melitaea_diamina GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? +""" + result = dataset_creator.dataset_str + self.assertEqual(expected.strip(), result) + + def test_dataset_1st_codon_one_partition(self): + g1 = Genes.objects.get(gene_code='COI') + cleaned_data = self.cleaned_data + cleaned_data['gene_codes'] = [g1] + del cleaned_data['positions'] + cleaned_data['positions'] = ['1st'] + + dataset_creator = CreateDataset(cleaned_data) + expected = """ +>coi +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +????????TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? +>CP100-11_Melitaea_diamina +?ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) diff --git a/voseq/create_dataset/utils.py b/voseq/create_dataset/utils.py index e36248dc..cc3a610d 100644 --- a/voseq/create_dataset/utils.py +++ b/voseq/create_dataset/utils.py @@ -216,7 +216,7 @@ def get_sequence_based_on_codon_positions(self, gene_code, seq): if '1st' in self.codon_positions \ and '2nd' not in self.codon_positions \ and '3rd' not in self.codon_positions: - return first_position + return first_position, if '2nd' in self.codon_positions \ and '1st' not in self.codon_positions \ From 3043089dcf4ad8ff8a74f158f67a752009477f27 Mon Sep 17 00:00:00 2001 From: carlosp420 Date: Wed, 11 Feb 2015 13:00:35 +0200 Subject: [PATCH 12/24] test datatset 2nd codon, one partition --- voseq/create_dataset/tests/tests_utils.py | 21 ++++++++++++++++++++- voseq/create_dataset/utils.py | 2 +- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/voseq/create_dataset/tests/tests_utils.py b/voseq/create_dataset/tests/tests_utils.py index 602e4e11..61a75031 100644 --- a/voseq/create_dataset/tests/tests_utils.py +++ b/voseq/create_dataset/tests/tests_utils.py @@ -92,7 +92,7 @@ def test_get_sequence_second_codon_position(self): expected = Seq("ATACGACCCCGATTAAGGGTAaGCTAATAAA") sequence = Seq("ACACGTCGACTCCGGCAAGTCCACCACCACCGGTCACTTGATTTACAAATGTGGTGGTATCGACAaACGTACCATCGAGAAGTTCGAGAAGGA") result = dataset_creator.get_sequence_based_on_codon_positions('wingless', sequence) - self.assertEqual(expected, result) + self.assertEqual(expected, result[0]) def test_get_sequence_third_codon_position(self): self.cleaned_data['positions'] = ['3rd'] @@ -340,6 +340,25 @@ def test_dataset_1st_codon_one_partition(self): ????????TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? >CP100-11_Melitaea_diamina ?ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? +""" + result = dataset_creator.dataset_str + self.assertEqual(expected.strip(), result) + + def test_dataset_2nd_codon_one_partition(self): + g1 = Genes.objects.get(gene_code='COI') + cleaned_data = self.cleaned_data + cleaned_data['gene_codes'] = [g1] + del cleaned_data['positions'] + cleaned_data['positions'] = ['2nd'] + + dataset_creator = CreateDataset(cleaned_data) + expected = """ +>coi +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +????????GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? +>CP100-11_Melitaea_diamina +TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) diff --git a/voseq/create_dataset/utils.py b/voseq/create_dataset/utils.py index cc3a610d..eddc5740 100644 --- a/voseq/create_dataset/utils.py +++ b/voseq/create_dataset/utils.py @@ -221,7 +221,7 @@ def get_sequence_based_on_codon_positions(self, gene_code, seq): if '2nd' in self.codon_positions \ and '1st' not in self.codon_positions \ and '3rd' not in self.codon_positions: - return second_position + return second_position, if '3rd' in self.codon_positions \ and '1st' not in self.codon_positions \ From b954788f0252b40cba42f7f2e2eee11d0e8d14df Mon Sep 17 00:00:00 2001 From: carlosp420 Date: Wed, 11 Feb 2015 13:39:42 +0200 Subject: [PATCH 13/24] test dataset 3rd codon, one partition --- voseq/create_dataset/tests/tests_utils.py | 21 ++++++++++++++++++++- voseq/create_dataset/utils.py | 2 +- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/voseq/create_dataset/tests/tests_utils.py b/voseq/create_dataset/tests/tests_utils.py index 61a75031..aeacde85 100644 --- a/voseq/create_dataset/tests/tests_utils.py +++ b/voseq/create_dataset/tests/tests_utils.py @@ -101,7 +101,7 @@ def test_get_sequence_third_codon_position(self): expected = Seq("CCCCCGCCCCTCGTCATTTCCATCCGGCGG") sequence = Seq("ACACGTCGACTCCGGCAAGTCCACCACCACCGGTCACTTGATTTACAAATGTGGTGGTATCGACAaACGTACCATCGAGAAGTTCGAGAAGGA") result = dataset_creator.get_sequence_based_on_codon_positions('wingless', sequence) - self.assertEqual(expected, result) + self.assertEqual(expected, result[0]) def test_get_sequence_first_and_second_codon_position(self): self.cleaned_data['positions'] = ['1st', '2nd'] @@ -359,6 +359,25 @@ def test_dataset_2nd_codon_one_partition(self): ????????GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? >CP100-11_Melitaea_diamina TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? +""" + result = dataset_creator.dataset_str + self.assertEqual(expected.strip(), result) + + def test_dataset_3rd_codon_one_partition(self): + g1 = Genes.objects.get(gene_code='COI') + cleaned_data = self.cleaned_data + cleaned_data['gene_codes'] = [g1] + del cleaned_data['positions'] + cleaned_data['positions'] = ['3rd'] + + dataset_creator = CreateDataset(cleaned_data) + expected = """ +>coi +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? +>CP100-11_Melitaea_diamina +GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) diff --git a/voseq/create_dataset/utils.py b/voseq/create_dataset/utils.py index eddc5740..03504d28 100644 --- a/voseq/create_dataset/utils.py +++ b/voseq/create_dataset/utils.py @@ -226,7 +226,7 @@ def get_sequence_based_on_codon_positions(self, gene_code, seq): if '3rd' in self.codon_positions \ and '1st' not in self.codon_positions \ and '2nd' not in self.codon_positions: - return third_position + return third_position, if '1st' in self.codon_positions and '2nd' in self.codon_positions \ and '3rd' not in self.codon_positions: From f19f9187c9f3cbd216a3a5f5861c54c6d91d2ccd Mon Sep 17 00:00:00 2001 From: carlosp420 Date: Wed, 11 Feb 2015 14:21:59 +0200 Subject: [PATCH 14/24] test dataset 1st 2nd codons, one partition --- voseq/create_dataset/tests/tests_utils.py | 21 ++++++++++++++++++++- voseq/create_dataset/utils.py | 4 ++-- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/voseq/create_dataset/tests/tests_utils.py b/voseq/create_dataset/tests/tests_utils.py index aeacde85..ef5ae29e 100644 --- a/voseq/create_dataset/tests/tests_utils.py +++ b/voseq/create_dataset/tests/tests_utils.py @@ -110,7 +110,7 @@ def test_get_sequence_first_and_second_codon_position(self): expected = Seq("CAGTGATCGGAATCACACACGGCATTATTAAATGGGGGATGAAaCGACATGAAATTGAAAGA") sequence = Seq("ACACGTCGACTCCGGCAAGTCCACCACCACCGGTCACTTGATTTACAAATGTGGTGGTATCGACAaACGTACCATCGAGAAGTTCGAGAAGGA") result = dataset_creator.get_sequence_based_on_codon_positions('wingless', sequence) - self.assertEqual(expected, result) + self.assertEqual(expected, result[0]) def test_get_sequence_first_and_third_codon_position(self): self.cleaned_data['positions'] = ['1st', '3rd'] @@ -378,6 +378,25 @@ def test_dataset_3rd_codon_one_partition(self): ????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? >CP100-11_Melitaea_diamina GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? +""" + result = dataset_creator.dataset_str + self.assertEqual(expected.strip(), result) + + def test_dataset_1st_2nd_codon_one_partition(self): + g1 = Genes.objects.get(gene_code='COI') + cleaned_data = self.cleaned_data + cleaned_data['gene_codes'] = [g1] + del cleaned_data['positions'] + cleaned_data['positions'] = ['1st', '2nd'] + + dataset_creator = CreateDataset(cleaned_data) + expected = """ +>coi +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +????????????????TGGCGGATATGGACTCCTAGCTATATCGACGATTGGAACCAGTTTTATGGGAGACAATTAAAACATGTACGCCAGCTTATATATTTTTATGTATCCATATATGGGGTTGGAATGCTGTCCTTATTTGGGCCCGAATGCTTCCCGATAATAATAGTTTGTTTTCCCCTCTTATCTTTATTCAGAGATGTGAAAGGGCGGACGGTGACGTTACCCCCTTCTCAAATGCCAAGGGGCTCGTGATTGCATTTTCTTCATTGCGGATTCTCATTTGGGCATAATTATACACATATAAATCGATAAAAATTCTAGACAATCCTTTTGTTGGCGTGGATACGCTTCTCTTTTTTCTTCCGTTTGCGGGCATACATCTTTACGACGAACTAAACTCTTTTGATCTGGGGGGGGACC?????????????????????? +>CP100-11_Melitaea_diamina +?TAGCGTAAATGTAATCCAATCTATATCAACGATAGAATCTATTTTAATGAGTGTCAATTTATACATGAAAGTCTGTTTATAAATTTTTAAGTAGCAATAAATGAGATTGTATTACTGACATAAATGGAGCCAGTAAGTTCCCCAAAATTTAAAATTTATATGCTCATCTAATCTTAATTAATAAATGAGAATGGGAGAATGATAAAGTTCCCCACTTATTATATGCCTAAGAGTTAGGGTTAGTATTTTTTACTTAGTGGATTCTTACTAGAGTATATTTATATAAATATATAACAATATATAATTTTGTCAAACTTATTGATAGAGAGAATAAGATACTCCTATATTTACAGTTAGTGAGTATATAACTTAAGGTCAATCTATACTATTTTGTTCTCGAGAGAGTC?????????????????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) diff --git a/voseq/create_dataset/utils.py b/voseq/create_dataset/utils.py index 03504d28..19b40d19 100644 --- a/voseq/create_dataset/utils.py +++ b/voseq/create_dataset/utils.py @@ -98,7 +98,7 @@ def from_seq_objs_to_fasta(self): seq_record_seqs = self.get_sequence_based_on_codon_positions(this_gene, seq_record.seq) - # We have only one codon position + # We have codon positions that go to one partition if len(seq_record_seqs) == 1: seq_str = '>' + seq_record.id + '\n' + str(seq_record_seqs[0]) partitions['all_codons'].append(seq_str) @@ -231,7 +231,7 @@ def get_sequence_based_on_codon_positions(self, gene_code, seq): if '1st' in self.codon_positions and '2nd' in self.codon_positions \ and '3rd' not in self.codon_positions: if self.partition_by_positions == 'ONE': - return (chain_and_flatten(first_position, second_position)) + return chain_and_flatten(first_position, second_position), else: return (first_position, second_position) From f52f8ca506bf51fe0940e195b6c2dc39b03c71e3 Mon Sep 17 00:00:00 2001 From: carlosp420 Date: Wed, 11 Feb 2015 14:51:08 +0200 Subject: [PATCH 15/24] test dataset 2nd, 3rd codons, one partition --- voseq/create_dataset/tests/tests_utils.py | 21 ++++++++++++++++++++- voseq/create_dataset/utils.py | 2 +- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/voseq/create_dataset/tests/tests_utils.py b/voseq/create_dataset/tests/tests_utils.py index ef5ae29e..210e452c 100644 --- a/voseq/create_dataset/tests/tests_utils.py +++ b/voseq/create_dataset/tests/tests_utils.py @@ -128,7 +128,7 @@ def test_get_sequence_second_and_third_codon_position(self): expected = Seq("ACTCACCCGCAGCCCCCCCCGTACTGTTACAAGTGTGTTCACaAGTCCTCAGAGTCAGAGA") sequence = Seq("ACACGTCGACTCCGGCAAGTCCACCACCACCGGTCACTTGATTTACAAATGTGGTGGTATCGACAaACGTACCATCGAGAAGTTCGAGAAGGA") result = dataset_creator.get_sequence_based_on_codon_positions('wingless', sequence) - self.assertEqual(expected, result) + self.assertEqual(expected, result[0]) def test_dataset_all_codons_as_one(self): g1 = Genes.objects.get(gene_code='COI') @@ -397,6 +397,25 @@ def test_dataset_1st_2nd_codon_one_partition(self): ????????????????TGGCGGATATGGACTCCTAGCTATATCGACGATTGGAACCAGTTTTATGGGAGACAATTAAAACATGTACGCCAGCTTATATATTTTTATGTATCCATATATGGGGTTGGAATGCTGTCCTTATTTGGGCCCGAATGCTTCCCGATAATAATAGTTTGTTTTCCCCTCTTATCTTTATTCAGAGATGTGAAAGGGCGGACGGTGACGTTACCCCCTTCTCAAATGCCAAGGGGCTCGTGATTGCATTTTCTTCATTGCGGATTCTCATTTGGGCATAATTATACACATATAAATCGATAAAAATTCTAGACAATCCTTTTGTTGGCGTGGATACGCTTCTCTTTTTTCTTCCGTTTGCGGGCATACATCTTTACGACGAACTAAACTCTTTTGATCTGGGGGGGGACC?????????????????????? >CP100-11_Melitaea_diamina ?TAGCGTAAATGTAATCCAATCTATATCAACGATAGAATCTATTTTAATGAGTGTCAATTTATACATGAAAGTCTGTTTATAAATTTTTAAGTAGCAATAAATGAGATTGTATTACTGACATAAATGGAGCCAGTAAGTTCCCCAAAATTTAAAATTTATATGCTCATCTAATCTTAATTAATAAATGAGAATGGGAGAATGATAAAGTTCCCCACTTATTATATGCCTAAGAGTTAGGGTTAGTATTTTTTACTTAGTGGATTCTTACTAGAGTATATTTATATAAATATATAACAATATATAATTTTGTCAAACTTATTGATAGAGAGAATAAGATACTCCTATATTTACAGTTAGTGAGTATATAACTTAAGGTCAATCTATACTATTTTGTTCTCGAGAGAGTC?????????????????????? +""" + result = dataset_creator.dataset_str + self.assertEqual(expected.strip(), result) + + def test_dataset_2nd_3rd_codon_one_partition(self): + g1 = Genes.objects.get(gene_code='COI') + cleaned_data = self.cleaned_data + cleaned_data['gene_codes'] = [g1] + del cleaned_data['positions'] + cleaned_data['positions'] = ['2nd', '3rd'] + + dataset_creator = CreateDataset(cleaned_data) + expected = """ +>coi +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +????????????????GACCGTTATTGTCACCTAGTTTTTTTGACCAATAGAATCTGTTTTATTGAATATAATTATATCCTTTACACTATCTTTTTTATTTTTTTATTTGCATTTATTGAGATTGTATGATTTACATATATGGACCCAATTACTTCCCGATAATATTAGATTGATATGCTCACCTATTTTTATTCAGTGATTTAAAATGGCAGACTGAGACATTACCCCATTCACTATTTCCATGAGACTCATGATTACTTTTTCTTAATTACTGGTTCCCTTCTAGACTTTATTTTTCTCATTTTATTAGATTATATTACTATATAATACTTATTTAGACATAGATTCACATATTTCTATACTTACATTTACTGACTTTCTTATTTACGATGAATTTATCCCATTTTATCCGCGAGAGAATC?????????????????????? +>CP100-11_Melitaea_diamina +TGGCGGATATGGACTCCTAGCTATATCGACGATTGGAACCAGTTTTATGGGAGACAATTAAAACATGTACGCCAGCTTATATATTTTTATGTATCCATATATGGGGTTGGAATGCTGTCCTTATTTGGGCCCGAATGCTTCCCGATAATAATAGTTTGTTTTCCCCTCTTATCTTTATTCAGAGATGTGAAAGGGCGGACGGTGACGTTACCCCCTTCTCAAATGCCAAGGGGCTCGTGATTGCATTTTCTTCATTGCGGATTCTCATTTGGGCATAATTATACACATATAAATCGATAAAAATTCTAGACAATCCTTTTGTTGGCGTGGATACGCTTCTCTTTTTTCTTCCGTTTGCGGGCATACATCTTTACGACGAACTAAACTCTTTTGATCTGGGGGGGGACC?????????????????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) diff --git a/voseq/create_dataset/utils.py b/voseq/create_dataset/utils.py index 19b40d19..76edd130 100644 --- a/voseq/create_dataset/utils.py +++ b/voseq/create_dataset/utils.py @@ -241,4 +241,4 @@ def get_sequence_based_on_codon_positions(self, gene_code, seq): if '2nd' in self.codon_positions and '3rd' in self.codon_positions \ and '1st' not in self.codon_positions: - return chain_and_flatten(second_position, third_position) + return chain_and_flatten(second_position, third_position), From 38aa03305c1447a938ca61c7519d9a23a5def8d6 Mon Sep 17 00:00:00 2001 From: carlosp420 Date: Wed, 11 Feb 2015 15:03:00 +0200 Subject: [PATCH 16/24] test dataset 1st, 3rd codons, one partition --- voseq/create_dataset/tests/tests_utils.py | 21 ++++++++++++++++++++- voseq/create_dataset/utils.py | 10 ++++++++-- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/voseq/create_dataset/tests/tests_utils.py b/voseq/create_dataset/tests/tests_utils.py index 210e452c..b26f1d19 100644 --- a/voseq/create_dataset/tests/tests_utils.py +++ b/voseq/create_dataset/tests/tests_utils.py @@ -119,7 +119,7 @@ def test_get_sequence_first_and_third_codon_position(self): expected = Seq("CCGCGCTCGCAGTCACACACGTCCTGATTCAATTGTGTACGCAACTACACGGAGTCGGAGG") sequence = Seq("ACACGTCGACTCCGGCAAGTCCACCACCACCGGTCACTTGATTTACAAATGTGGTGGTATCGACAaACGTACCATCGAGAAGTTCGAGAAGGA") result = dataset_creator.get_sequence_based_on_codon_positions('wingless', sequence) - self.assertEqual(expected, result) + self.assertEqual(expected, result[0]) def test_get_sequence_second_and_third_codon_position(self): self.cleaned_data['positions'] = ['2nd', '3rd'] @@ -416,6 +416,25 @@ def test_dataset_2nd_3rd_codon_one_partition(self): ????????????????GACCGTTATTGTCACCTAGTTTTTTTGACCAATAGAATCTGTTTTATTGAATATAATTATATCCTTTACACTATCTTTTTTATTTTTTTATTTGCATTTATTGAGATTGTATGATTTACATATATGGACCCAATTACTTCCCGATAATATTAGATTGATATGCTCACCTATTTTTATTCAGTGATTTAAAATGGCAGACTGAGACATTACCCCATTCACTATTTCCATGAGACTCATGATTACTTTTTCTTAATTACTGGTTCCCTTCTAGACTTTATTTTTCTCATTTTATTAGATTATATTACTATATAATACTTATTTAGACATAGATTCACATATTTCTATACTTACATTTACTGACTTTCTTATTTACGATGAATTTATCCCATTTTATCCGCGAGAGAATC?????????????????????? >CP100-11_Melitaea_diamina TGGCGGATATGGACTCCTAGCTATATCGACGATTGGAACCAGTTTTATGGGAGACAATTAAAACATGTACGCCAGCTTATATATTTTTATGTATCCATATATGGGGTTGGAATGCTGTCCTTATTTGGGCCCGAATGCTTCCCGATAATAATAGTTTGTTTTCCCCTCTTATCTTTATTCAGAGATGTGAAAGGGCGGACGGTGACGTTACCCCCTTCTCAAATGCCAAGGGGCTCGTGATTGCATTTTCTTCATTGCGGATTCTCATTTGGGCATAATTATACACATATAAATCGATAAAAATTCTAGACAATCCTTTTGTTGGCGTGGATACGCTTCTCTTTTTTCTTCCGTTTGCGGGCATACATCTTTACGACGAACTAAACTCTTTTGATCTGGGGGGGGACC?????????????????????? +""" + result = dataset_creator.dataset_str + self.assertEqual(expected.strip(), result) + + def test_dataset_1st_3rd_codon_one_partition(self): + g1 = Genes.objects.get(gene_code='COI') + cleaned_data = self.cleaned_data + cleaned_data['gene_codes'] = [g1] + del cleaned_data['positions'] + cleaned_data['positions'] = ['1st', '3rd'] + + dataset_creator = CreateDataset(cleaned_data) + expected = """ +>coi +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +????????????????TAGCGTAAATGTAATCCAATCTATATCAACGATAGAATCTATTTTAATGAGTGTCAATTTATACATGAAAGTCTGTTTATAAATTTTTAAGTAGCAATAAATGAGATTGTATTACTGACATAAATGGAGCCAGTAAGTTCCCCAAAATTTAAAATTTATATGCTCATCTAATCTTAATTAATAAATGAGAATGGGAGAATGATAAAGTTCCCCACTTATTATATGCCTAAGAGTTAGGGTTAGTATTTTTTACTTAGTGGATTCTTACTAGAGTATATTTATATAAATATATAACAATATATAATTTTGTCAAACTTATTGATAGAGAGAATAAGATACTCCTATATTTACAGTTAGTGAGTATATAACTTAAGGTCAATCTATACTATTTTGTTCTCGAGAGAGTC?????????????????????? +>CP100-11_Melitaea_diamina +?GACCGTTATTGTCACCTAGTTTTTTTGACCAATAGAATCTGTTTTATTGAATATAATTATATCCTTTACACTATCTTTTTTATTTTTTTATTTGCATTTATTGAGATTGTATGATTTACATATATGGACCCAATTACTTCCCGATAATATTAGATTGATATGCTCACCTATTTTTATTCAGTGATTTAAAATGGCAGACTGAGACATTACCCCATTCACTATTTCCATGAGACTCATGATTACTTTTTCTTAATTACTGGTTCCCTTCTAGACTTTATTTTTCTCATTTTATTAGATTATATTACTATATAATACTTATTTAGACATAGATTCACATATTTCTATACTTACATTTACTGACTTTCTTATTTACGATGAATTTATCCCATTTTATCCGCGAGAGAATC?????????????????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) diff --git a/voseq/create_dataset/utils.py b/voseq/create_dataset/utils.py index 76edd130..a47b5290 100644 --- a/voseq/create_dataset/utils.py +++ b/voseq/create_dataset/utils.py @@ -237,8 +237,14 @@ def get_sequence_based_on_codon_positions(self, gene_code, seq): if '1st' in self.codon_positions and '3rd' in self.codon_positions \ and '2nd' not in self.codon_positions: - return chain_and_flatten(first_position, third_position) + if self.partition_by_positions == 'ONE': + return chain_and_flatten(first_position, third_position), + else: + return first_position, third_position if '2nd' in self.codon_positions and '3rd' in self.codon_positions \ and '1st' not in self.codon_positions: - return chain_and_flatten(second_position, third_position), + if self.partition_by_positions == 'ONE': + return chain_and_flatten(second_position, third_position), + else: + return (second_position, third_position) From fa620d62138b5551128110eadf3136059366f393 Mon Sep 17 00:00:00 2001 From: carlosp420 Date: Thu, 12 Feb 2015 14:12:26 +0200 Subject: [PATCH 17/24] test dataset 1st, each partition --- requirements/base.txt | 2 +- voseq/create_dataset/tests/tests_utils.py | 20 ++++++++++++++++++++ voseq/create_dataset/utils.py | 10 ++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/requirements/base.txt b/requirements/base.txt index e155ff45..c2c459a2 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -1,7 +1,7 @@ biopython==1.65 Django==1.7.4 pyprind==2.9.1 -elasticsearch==1.3.0 +elasticsearch==1.4 Unipath==1.0 psycopg2==2.6 dataset==0.5.5 diff --git a/voseq/create_dataset/tests/tests_utils.py b/voseq/create_dataset/tests/tests_utils.py index b26f1d19..0df52bd8 100644 --- a/voseq/create_dataset/tests/tests_utils.py +++ b/voseq/create_dataset/tests/tests_utils.py @@ -435,6 +435,26 @@ def test_dataset_1st_3rd_codon_one_partition(self): ????????????????TAGCGTAAATGTAATCCAATCTATATCAACGATAGAATCTATTTTAATGAGTGTCAATTTATACATGAAAGTCTGTTTATAAATTTTTAAGTAGCAATAAATGAGATTGTATTACTGACATAAATGGAGCCAGTAAGTTCCCCAAAATTTAAAATTTATATGCTCATCTAATCTTAATTAATAAATGAGAATGGGAGAATGATAAAGTTCCCCACTTATTATATGCCTAAGAGTTAGGGTTAGTATTTTTTACTTAGTGGATTCTTACTAGAGTATATTTATATAAATATATAACAATATATAATTTTGTCAAACTTATTGATAGAGAGAATAAGATACTCCTATATTTACAGTTAGTGAGTATATAACTTAAGGTCAATCTATACTATTTTGTTCTCGAGAGAGTC?????????????????????? >CP100-11_Melitaea_diamina ?GACCGTTATTGTCACCTAGTTTTTTTGACCAATAGAATCTGTTTTATTGAATATAATTATATCCTTTACACTATCTTTTTTATTTTTTTATTTGCATTTATTGAGATTGTATGATTTACATATATGGACCCAATTACTTCCCGATAATATTAGATTGATATGCTCACCTATTTTTATTCAGTGATTTAAAATGGCAGACTGAGACATTACCCCATTCACTATTTCCATGAGACTCATGATTACTTTTTCTTAATTACTGGTTCCCTTCTAGACTTTATTTTTCTCATTTTATTAGATTATATTACTATATAATACTTATTTAGACATAGATTCACATATTTCTATACTTACATTTACTGACTTTCTTATTTACGATGAATTTATCCCATTTTATCCGCGAGAGAATC?????????????????????? +""" + result = dataset_creator.dataset_str + self.assertEqual(expected.strip(), result) + + def test_dataset_1st_each(self): + g1 = Genes.objects.get(gene_code='COI') + cleaned_data = self.cleaned_data + cleaned_data['gene_codes'] = [g1] + del cleaned_data['positions'] + cleaned_data['positions'] = ['1st'] + cleaned_data['partition_by_positions'] = 'EACH' + + dataset_creator = CreateDataset(cleaned_data) + expected = """ +>coi +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +????????TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? +>CP100-11_Melitaea_diamina +?ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) diff --git a/voseq/create_dataset/utils.py b/voseq/create_dataset/utils.py index a47b5290..9f45d8bb 100644 --- a/voseq/create_dataset/utils.py +++ b/voseq/create_dataset/utils.py @@ -81,6 +81,8 @@ def from_seq_objs_to_fasta(self): 'codon3': [], } + length_partitions = None + for gene_code in self.seq_objs: this_gene = None for seq_record in self.seq_objs[gene_code]: @@ -102,6 +104,7 @@ def from_seq_objs_to_fasta(self): if len(seq_record_seqs) == 1: seq_str = '>' + seq_record.id + '\n' + str(seq_record_seqs[0]) partitions['all_codons'].append(seq_str) + length_partitions = 1 # We have two codon positions because they should go to different partitions if len(seq_record_seqs) == 2: @@ -110,6 +113,7 @@ def from_seq_objs_to_fasta(self): seq_str = '>' + seq_record.id + '\n' + seq_record_seqs[1] partitions['codon2'].append(seq_str) + length_partitions = 2 # We have three codon positions because they should go to different partitions if len(seq_record_seqs) == 3: @@ -121,12 +125,18 @@ def from_seq_objs_to_fasta(self): seq_str = '>' + seq_record.id + '\n' + str(seq_record_seqs[2]) partitions['codon3'].append(seq_str) + length_partitions = 3 out = '' if self.partition_by_positions == 'ONE': out += '\n'.join(partitions['all_codons']) return out + # We have codon positions that go to one partition + if length_partitions == 1: + out += '\n'.join(partitions['all_codons']) + return out + if len(partitions['codon1']) > len(self.gene_codes): out += '\n'.join(partitions['codon1']) if len(partitions['codon2']) > len(self.gene_codes): From 2fed8bfc65ba45a4b715b76ffbd6a7d276b9f615 Mon Sep 17 00:00:00 2001 From: carlosp420 Date: Thu, 12 Feb 2015 14:15:25 +0200 Subject: [PATCH 18/24] test dataset 2nd, each partition --- voseq/create_dataset/tests/tests_utils.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/voseq/create_dataset/tests/tests_utils.py b/voseq/create_dataset/tests/tests_utils.py index 0df52bd8..906b8999 100644 --- a/voseq/create_dataset/tests/tests_utils.py +++ b/voseq/create_dataset/tests/tests_utils.py @@ -455,6 +455,26 @@ def test_dataset_1st_each(self): ????????TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? >CP100-11_Melitaea_diamina ?ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? +""" + result = dataset_creator.dataset_str + self.assertEqual(expected.strip(), result) + + def test_dataset_2nd_each(self): + g1 = Genes.objects.get(gene_code='COI') + cleaned_data = self.cleaned_data + cleaned_data['gene_codes'] = [g1] + del cleaned_data['positions'] + cleaned_data['positions'] = ['2nd'] + cleaned_data['partition_by_positions'] = 'EACH' + + dataset_creator = CreateDataset(cleaned_data) + expected = """ +>coi +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +????????GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? +>CP100-11_Melitaea_diamina +TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) From 66cb2564b822ea24c8889dff85c10a3f811dc2a2 Mon Sep 17 00:00:00 2001 From: carlosp420 Date: Thu, 12 Feb 2015 14:16:22 +0200 Subject: [PATCH 19/24] test dataset 3rd, each partition --- voseq/create_dataset/tests/tests_utils.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/voseq/create_dataset/tests/tests_utils.py b/voseq/create_dataset/tests/tests_utils.py index 906b8999..a8158854 100644 --- a/voseq/create_dataset/tests/tests_utils.py +++ b/voseq/create_dataset/tests/tests_utils.py @@ -475,6 +475,26 @@ def test_dataset_2nd_each(self): ????????GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? >CP100-11_Melitaea_diamina TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? +""" + result = dataset_creator.dataset_str + self.assertEqual(expected.strip(), result) + + def test_dataset_3rd_each(self): + g1 = Genes.objects.get(gene_code='COI') + cleaned_data = self.cleaned_data + cleaned_data['gene_codes'] = [g1] + del cleaned_data['positions'] + cleaned_data['positions'] = ['3rd'] + cleaned_data['partition_by_positions'] = 'EACH' + + dataset_creator = CreateDataset(cleaned_data) + expected = """ +>coi +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? +>CP100-11_Melitaea_diamina +GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) From a93645920865447ae951b2db4eaea8ff6f94bc37 Mon Sep 17 00:00:00 2001 From: carlosp420 Date: Thu, 12 Feb 2015 14:38:17 +0200 Subject: [PATCH 20/24] test dataset 1st, 3rd, each partition --- voseq/create_dataset/tests/tests_utils.py | 52 +++++++++++++++++++++++ voseq/create_dataset/utils.py | 19 ++++++--- 2 files changed, 66 insertions(+), 5 deletions(-) diff --git a/voseq/create_dataset/tests/tests_utils.py b/voseq/create_dataset/tests/tests_utils.py index a8158854..6035b3b4 100644 --- a/voseq/create_dataset/tests/tests_utils.py +++ b/voseq/create_dataset/tests/tests_utils.py @@ -495,6 +495,58 @@ def test_dataset_3rd_each(self): ????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? >CP100-11_Melitaea_diamina GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? +""" + result = dataset_creator.dataset_str + self.assertEqual(expected.strip(), result) + + def test_dataset_1st_2nd_each(self): + g1 = Genes.objects.get(gene_code='COI') + cleaned_data = self.cleaned_data + cleaned_data['gene_codes'] = [g1] + del cleaned_data['positions'] + cleaned_data['positions'] = ['1st', '2nd'] + cleaned_data['partition_by_positions'] = 'EACH' + + dataset_creator = CreateDataset(cleaned_data) + expected = """ +>coi_1st_codon +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +????????TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? +>CP100-11_Melitaea_diamina +?ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? +>coi_2nd_codon +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +????????GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? +>CP100-11_Melitaea_diamina +TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? +""" + result = dataset_creator.dataset_str + self.assertEqual(expected.strip(), result) + + def test_dataset_1st_3rd_each(self): + g1 = Genes.objects.get(gene_code='COI') + cleaned_data = self.cleaned_data + cleaned_data['gene_codes'] = [g1] + del cleaned_data['positions'] + cleaned_data['positions'] = ['1st', '3rd'] + cleaned_data['partition_by_positions'] = 'EACH' + + dataset_creator = CreateDataset(cleaned_data) + expected = """ +>coi_1st_codon +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +????????TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? +>CP100-11_Melitaea_diamina +?ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? +>coi_3rd_codon +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? +>CP100-11_Melitaea_diamina +GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) diff --git a/voseq/create_dataset/utils.py b/voseq/create_dataset/utils.py index 9f45d8bb..b8b5bac7 100644 --- a/voseq/create_dataset/utils.py +++ b/voseq/create_dataset/utils.py @@ -108,12 +108,21 @@ def from_seq_objs_to_fasta(self): # We have two codon positions because they should go to different partitions if len(seq_record_seqs) == 2: - seq_str = '>' + seq_record.id + '\n' + seq_record_seqs[0] - partitions['codon1'].append(seq_str) + if self.codon_positions == ['1st', '2nd']: + seq_str = '>' + seq_record.id + '\n' + str(seq_record_seqs[0]) + partitions['codon1'].append(seq_str) - seq_str = '>' + seq_record.id + '\n' + seq_record_seqs[1] - partitions['codon2'].append(seq_str) - length_partitions = 2 + seq_str = '>' + seq_record.id + '\n' + str(seq_record_seqs[1]) + partitions['codon2'].append(seq_str) + length_partitions = 2 + + if self.codon_positions == ['1st', '3rd']: + seq_str = '>' + seq_record.id + '\n' + str(seq_record_seqs[0]) + partitions['codon1'].append(seq_str) + + seq_str = '>' + seq_record.id + '\n' + str(seq_record_seqs[1]) + partitions['codon3'].append(seq_str) + length_partitions = 2 # We have three codon positions because they should go to different partitions if len(seq_record_seqs) == 3: From 058f8f04b2fae9239b969dc794626087de214afa Mon Sep 17 00:00:00 2001 From: carlosp420 Date: Thu, 12 Feb 2015 14:48:38 +0200 Subject: [PATCH 21/24] test dataset 2nd, 3rd, each position --- voseq/create_dataset/tests/tests_utils.py | 26 +++++++++++++++++++++++ voseq/create_dataset/utils.py | 8 +++++++ 2 files changed, 34 insertions(+) diff --git a/voseq/create_dataset/tests/tests_utils.py b/voseq/create_dataset/tests/tests_utils.py index 6035b3b4..a9bae31e 100644 --- a/voseq/create_dataset/tests/tests_utils.py +++ b/voseq/create_dataset/tests/tests_utils.py @@ -550,3 +550,29 @@ def test_dataset_1st_3rd_each(self): """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) + + def test_dataset_2nd_3rd_each(self): + g1 = Genes.objects.get(gene_code='COI') + cleaned_data = self.cleaned_data + cleaned_data['gene_codes'] = [g1] + del cleaned_data['positions'] + cleaned_data['positions'] = ['2nd', '3rd'] + cleaned_data['partition_by_positions'] = 'EACH' + + dataset_creator = CreateDataset(cleaned_data) + expected = """ +>coi_2nd_codon +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +????????GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? +>CP100-11_Melitaea_diamina +TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? +>coi_3rd_codon +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? +>CP100-11_Melitaea_diamina +GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? +""" + result = dataset_creator.dataset_str + self.assertEqual('\n' + expected.strip(), result) diff --git a/voseq/create_dataset/utils.py b/voseq/create_dataset/utils.py index b8b5bac7..df863bc7 100644 --- a/voseq/create_dataset/utils.py +++ b/voseq/create_dataset/utils.py @@ -124,6 +124,14 @@ def from_seq_objs_to_fasta(self): partitions['codon3'].append(seq_str) length_partitions = 2 + if self.codon_positions == ['2nd', '3rd']: + seq_str = '>' + seq_record.id + '\n' + str(seq_record_seqs[0]) + partitions['codon2'].append(seq_str) + + seq_str = '>' + seq_record.id + '\n' + str(seq_record_seqs[1]) + partitions['codon3'].append(seq_str) + length_partitions = 2 + # We have three codon positions because they should go to different partitions if len(seq_record_seqs) == 3: seq_str = '>' + seq_record.id + '\n' + str(seq_record_seqs[0]) From 90d0f079d73d50e435d04256e494c5bdfbe4025b Mon Sep 17 00:00:00 2001 From: carlosp420 Date: Thu, 12 Feb 2015 15:06:46 +0200 Subject: [PATCH 22/24] test dataset 1st, 2nd, 3rd, each position --- voseq/create_dataset/tests/tests_utils.py | 32 +++++++++++++++++++++++ voseq/create_dataset/utils.py | 7 +++++ 2 files changed, 39 insertions(+) diff --git a/voseq/create_dataset/tests/tests_utils.py b/voseq/create_dataset/tests/tests_utils.py index a9bae31e..21ea21e9 100644 --- a/voseq/create_dataset/tests/tests_utils.py +++ b/voseq/create_dataset/tests/tests_utils.py @@ -576,3 +576,35 @@ def test_dataset_2nd_3rd_each(self): """ result = dataset_creator.dataset_str self.assertEqual('\n' + expected.strip(), result) + + def test_dataset_1st_2nd_3rd_each(self): + g1 = Genes.objects.get(gene_code='COI') + cleaned_data = self.cleaned_data + cleaned_data['gene_codes'] = [g1] + del cleaned_data['positions'] + cleaned_data['positions'] = ['1st', '2nd', '3rd'] + cleaned_data['partition_by_positions'] = 'EACH' + + dataset_creator = CreateDataset(cleaned_data) + expected = """ +>coi_1st_codon +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +????????TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? +>CP100-11_Melitaea_diamina +?ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? +>coi_2nd_codon +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +????????GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? +>CP100-11_Melitaea_diamina +TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? +>coi_3rd_codon +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? +>CP100-11_Melitaea_diamina +GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? +""" + result = dataset_creator.dataset_str + self.assertEqual(expected.strip(), result) diff --git a/voseq/create_dataset/utils.py b/voseq/create_dataset/utils.py index df863bc7..af750a9c 100644 --- a/voseq/create_dataset/utils.py +++ b/voseq/create_dataset/utils.py @@ -275,3 +275,10 @@ def get_sequence_based_on_codon_positions(self, gene_code, seq): return chain_and_flatten(second_position, third_position), else: return (second_position, third_position) + + if '1st' in self.codon_positions and '2nd' in self.codon_positions \ + and '3rd' in self.codon_positions: + if self.partition_by_positions == 'ONE': + return (chain_and_flatten(first_position, second_position, third_position)) + else: + return (first_position, second_position, third_position) From 1b64516b319a2d6e010090a4366506b33aa8032a Mon Sep 17 00:00:00 2001 From: carlosp420 Date: Thu, 12 Feb 2015 15:07:17 +0200 Subject: [PATCH 23/24] test dataset ALL, 1st, 2nd, 3rd, each position --- voseq/create_dataset/tests/tests_utils.py | 32 +++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/voseq/create_dataset/tests/tests_utils.py b/voseq/create_dataset/tests/tests_utils.py index 21ea21e9..13009dae 100644 --- a/voseq/create_dataset/tests/tests_utils.py +++ b/voseq/create_dataset/tests/tests_utils.py @@ -605,6 +605,38 @@ def test_dataset_1st_2nd_3rd_each(self): ????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? >CP100-11_Melitaea_diamina GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? +""" + result = dataset_creator.dataset_str + self.assertEqual(expected.strip(), result) + + def test_dataset_ALL_1st_2nd_3rd_each(self): + g1 = Genes.objects.get(gene_code='COI') + cleaned_data = self.cleaned_data + cleaned_data['gene_codes'] = [g1] + del cleaned_data['positions'] + cleaned_data['positions'] = ['ALL', '1st', '2nd', '3rd'] + cleaned_data['partition_by_positions'] = 'EACH' + + dataset_creator = CreateDataset(cleaned_data) + expected = """ +>coi_1st_codon +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +????????TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? +>CP100-11_Melitaea_diamina +?ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? +>coi_2nd_codon +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +????????GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? +>CP100-11_Melitaea_diamina +TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? +>coi_3rd_codon +-------------------- +>CP100-10_Papilionoidea_Melitaea_diamina +????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? +>CP100-11_Melitaea_diamina +GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) From 1b53c03be9dcbcc766ad0a02ac6f7335adb78804 Mon Sep 17 00:00:00 2001 From: carlosp420 Date: Thu, 12 Feb 2015 15:21:52 +0200 Subject: [PATCH 24/24] rm unused model imports --- voseq/blast_local/models.py | 3 --- voseq/blast_local_full/models.py | 3 --- voseq/blast_ncbi/models.py | 3 --- voseq/blast_new/models.py | 3 --- voseq/core/models.py | 3 --- voseq/create_dataset/models.py | 3 --- voseq/view_genes/models.py | 3 --- 7 files changed, 21 deletions(-) diff --git a/voseq/blast_local/models.py b/voseq/blast_local/models.py index 71a83623..e69de29b 100644 --- a/voseq/blast_local/models.py +++ b/voseq/blast_local/models.py @@ -1,3 +0,0 @@ -from django.db import models - -# Create your models here. diff --git a/voseq/blast_local_full/models.py b/voseq/blast_local_full/models.py index 71a83623..e69de29b 100644 --- a/voseq/blast_local_full/models.py +++ b/voseq/blast_local_full/models.py @@ -1,3 +0,0 @@ -from django.db import models - -# Create your models here. diff --git a/voseq/blast_ncbi/models.py b/voseq/blast_ncbi/models.py index 71a83623..e69de29b 100644 --- a/voseq/blast_ncbi/models.py +++ b/voseq/blast_ncbi/models.py @@ -1,3 +0,0 @@ -from django.db import models - -# Create your models here. diff --git a/voseq/blast_new/models.py b/voseq/blast_new/models.py index 71a83623..e69de29b 100644 --- a/voseq/blast_new/models.py +++ b/voseq/blast_new/models.py @@ -1,3 +0,0 @@ -from django.db import models - -# Create your models here. diff --git a/voseq/core/models.py b/voseq/core/models.py index 71a83623..e69de29b 100644 --- a/voseq/core/models.py +++ b/voseq/core/models.py @@ -1,3 +0,0 @@ -from django.db import models - -# Create your models here. diff --git a/voseq/create_dataset/models.py b/voseq/create_dataset/models.py index 71a83623..e69de29b 100644 --- a/voseq/create_dataset/models.py +++ b/voseq/create_dataset/models.py @@ -1,3 +0,0 @@ -from django.db import models - -# Create your models here. diff --git a/voseq/view_genes/models.py b/voseq/view_genes/models.py index 71a83623..e69de29b 100644 --- a/voseq/view_genes/models.py +++ b/voseq/view_genes/models.py @@ -1,3 +0,0 @@ -from django.db import models - -# Create your models here.