From 6733b269a20baf836ef413595105805518fff41e Mon Sep 17 00:00:00 2001 From: Paul Cantalupo Date: Tue, 6 Nov 2012 14:36:27 -0500 Subject: [PATCH 1/2] fixing bug 3375 and adding tests --- Bio/SeqIO/genbank.pm | 30 +- t/SeqIO/genbank.t | 21 +- t/data/NC_002058_multDBLINK_bug3375.gb | 479 +++++++++++++++++++++++++ 3 files changed, 519 insertions(+), 11 deletions(-) create mode 100644 t/data/NC_002058_multDBLINK_bug3375.gb diff --git a/Bio/SeqIO/genbank.pm b/Bio/SeqIO/genbank.pm index dcc6d3cee4..165fd0b140 100644 --- a/Bio/SeqIO/genbank.pm +++ b/Bio/SeqIO/genbank.pm @@ -587,20 +587,30 @@ sub next_seq { -version => $version, -database => $db || 'GenBank', -tagname => 'dblink')); - } elsif ( $dbsource =~ /(\S+)([\.:])\s*(\d+)/ ) { - my ($id, $db, $version); + } elsif ( $dbsource =~ /(\S+)([\.:])\s*(\S+)/ ) { + my ($db, $version); + my @ids = (); if ($2 eq ':') { - ($db, $id) = ($1, $3); + $db = $1; + # Genbank 192 release notes say this: "The second field can consist of + # multiple comma-separated identifiers, if a sequence record has + # multiple DBLINK cross-references of a given type." + # For example: DBLINK Project:100,200,300" + @ids = split (/,/, $3); } else { - ($db, $id, $version) = ('GenBank', $1, $3); + ($db, $version) = ('GenBank', $3); + $ids[0] = $1; } - $annotation->add_Annotation('dblink', - Bio::Annotation::DBLink->new( - -primary_id => $id, - -version => $version, - -database => $db, - -tagname => 'dblink') + + foreach my $id (@ids) { + $annotation->add_Annotation('dblink', + Bio::Annotation::DBLink->new( + -primary_id => $id, + -version => $version, + -database => $db, + -tagname => 'dblink') ); + } } else { $self->warn("Unrecognized DBSOURCE data: $dbsource\n"); } diff --git a/t/SeqIO/genbank.t b/t/SeqIO/genbank.t index 4ef359ae58..0c1cac62b4 100644 --- a/t/SeqIO/genbank.t +++ b/t/SeqIO/genbank.t @@ -7,7 +7,7 @@ BEGIN { use lib '.'; use Bio::Root::Test; - test_begin(-tests => 274 ); + test_begin(-tests => 283 ); use_ok('Bio::SeqIO::genbank'); } @@ -601,3 +601,22 @@ $as = $ast->next_seq; ($cds) = grep { $_->primary_tag eq 'CDS' } $as->get_SeqFeatures(); @notes = $cds->get_tag_values('note'); is(scalar @notes, 2); + + +#bug 3375 +my $in = Bio::SeqIO->new(-format => 'genbank', + -file => test_input_file('NC_002058_multDBLINK_bug3375.gb')); +my $seq = $in->next_seq(); # should not throw a warning now +my @dblinks = $seq->annotation->get_Annotations('dblink'); # contains 5 dblink references +is($dblinks[0]->database, 'BioProject', 'bug3375 database is BioProject'); +is($dblinks[0]->primary_id, 'PRJNA15288', 'bug3375 primary_id is PRJNA15288'); + +is($dblinks[1]->database, 'Project'); +is($dblinks[1]->primary_id, '100'); +is($dblinks[3]->database, 'Project'); +is($dblinks[3]->primary_id, '300'); + +is($dblinks[4]->database, 'GenBank'); +is($dblinks[4]->primary_id, 'NC_002058'); +is($dblinks[4]->version, '3'); + diff --git a/t/data/NC_002058_multDBLINK_bug3375.gb b/t/data/NC_002058_multDBLINK_bug3375.gb new file mode 100644 index 0000000000..4e9e370d54 --- /dev/null +++ b/t/data/NC_002058_multDBLINK_bug3375.gb @@ -0,0 +1,479 @@ +LOCUS NC_002058 7440 bp ss-RNA linear VRL 08-DEC-2008 +DEFINITION Poliovirus, complete genome. +ACCESSION NC_002058 +VERSION NC_002058.3 GI:12408699 +DBLINK BioProject: PRJNA15288 +DBLINK Project:100,200,300 +DBLINK NC_002058.3 +KEYWORDS coat protein; complementary DNA; genome; polyprotein. +SOURCE Human enterovirus C + ORGANISM Human enterovirus C + Viruses; ssRNA positive-strand viruses, no DNA stage; + Picornavirales; Picornaviridae; Enterovirus. +REFERENCE 1 (sites) + AUTHORS Dorner,A.J., Dorner,L.F., Larsen,G.R., Wimmer,E. and Anderson,C.W. + TITLE Identification of the initiation site of poliovirus polyprotein + synthesis + JOURNAL J. Virol. 42 (3), 1017-1028 (1982) + PUBMED 6284987 +REFERENCE 2 (sites) + AUTHORS Emini,E.A., Elzinga,M. and Wimmer,E. + TITLE Carboxy-terminal analysis of poliovirus proteins: termination of + poliovirus RNA translation and location of unique poliovirus + polyprotein cleavage sites + JOURNAL J. Virol. 42 (1), 194-199 (1982) + PUBMED 6283138 +REFERENCE 3 (bases 1 to 7440) + AUTHORS Racaniello,V.R. and Baltimore,D. + TITLE Molecular cloning of poliovirus cDNA and determination of the + complete nucleotide sequence of the viral genome + JOURNAL Proc. Natl. Acad. Sci. U.S.A. 78 (8), 4887-4891 (1981) + PUBMED 6272282 +REFERENCE 4 (bases 1 to 7440) + AUTHORS Kitamura,N., Semler,B.L., Rothberg,P.G., Larsen,G.R., Adler,C.J., + Dorner,A.J., Emini,E.A., Hanecak,R., Lee,J.J., van der Werf,S., + Anderson,C.W. and Wimmer,E. + TITLE Primary structure, gene organization and polypeptide expression of + poliovirus RNA + JOURNAL Nature 291 (5816), 547-553 (1981) + PUBMED 6264310 +REFERENCE 5 (bases 5360 to 5527) + AUTHORS Kitamura,N., Adler,C.J., Rothberg,P.G., Martinko,J., Nathenson,S.G. + and Wimmer,E. + TITLE The genome-linked protein of picornaviruses. VII. Genetic mapping + of poliovirus VPg by protein and RNA sequence studies + JOURNAL Cell 21 (1), 295-302 (1980) + PUBMED 6250717 +REFERENCE 6 (bases 6383 to 7440) + AUTHORS Kitamura,N. and Wimmer,E. + TITLE Sequence of 1060 3'-terminal nucleotides of poliovirus RNA as + determined by a modification of the dideoxynucleotide method + JOURNAL Proc. Natl. Acad. Sci. U.S.A. 77 (6), 3196-3200 (1980) + PUBMED 6158042 +REFERENCE 7 (bases 1 to 7440) + CONSRTM NCBI Genome Project + TITLE Direct Submission + JOURNAL Submitted (01-AUG-2000) National Center for Biotechnology + Information, NIH, Bethesda, MD 20894, USA +COMMENT REVIEWED REFSEQ: This record has been curated by NCBI staff. The + reference sequence was derived from V01149. + On Jan 24, 2001 this sequence version replaced gi:12331600. + See also entries V01148 (POLIO1A, for another version of this + genome) and V01150 (POLIOS1, for the genome of the Sabin 1 strain). + See the entry V01148 (POLIO1A) for the sequence reported in [3]. + Mature peptides were added to the annotation by NCBI RefSeq Genomes + staff. + COMPLETENESS: full length. +FEATURES Location/Qualifiers + source 1..7440 + /organism="Human enterovirus C" + /mol_type="genomic RNA" + /strain="Human poliovirus 1 Mahoney" + /db_xref="taxon:138950" + conflict 284..288 + /note="CGCUC is GUU in [3]" + /citation=[4] + conflict 352 + /note="G is U in [3]" + /citation=[4] + conflict 387 + /note="GC is C in [3]" + /citation=[4] + conflict 488 + /note="G is GU in [3]" + /citation=[4] + gene 743..7372 + /locus_tag="PVgp1" + /db_xref="GeneID:919920" + CDS 743..7372 + /locus_tag="PVgp1" + /codon_start=1 + /product="polyprotein" + /protein_id="NP_041277.1" + /db_xref="GI:9627037" + /db_xref="GOA:P03300" + /db_xref="UniProtKB/Swiss-Prot:P03300" + /db_xref="GeneID:919920" + /translation="MGAQVSSQKVGAHENSNRAYGGSTINYTTINYYRDSASNAASKQ + DFSQDPSKFTEPIKDVLIKTAPMLNSPNIEACGYSDRVLQLTLGNSTITTQEAANSVV + AYGRWPEYLRDSEANPVDQPTEPDVAACRFYTLDTVSWTKESRGWWWKLPDALRDMGL + FGQNMYYHYLGRSGYTVHVQCNASKFHQGALGVFAVPEMCLAGDSNTTTMHTSYQNAN + PGEKGGTFTGTFTPDNNQTSPARRFCPVDYLLGNGTLLGNAFVFPHQIINLRTNNCAT + LVLPYVNSLSIDSMVKHNNWGIAILPLAPLNFASESSPEIPITLTIAPMCCEFNGLRN + ITLPRLQGLPVMNTPGSNQYLTADNFQSPCALPEFDVTPPIDIPGEVKNMMELAEIDT + MIPFDLSATKKNTMEMYRVRLSDKPHTDDPILCLSLSPASDPRLSHTMLGEILNYYTH + WAGSLKFTFLFCGFMMATGKLLVSYAPPGADPPKKRKEAMLGTHVIWDIGLQSSCTMV + VPWISNTTYRQTIDDSFTEGGYISVFYQTRIVVPLSTPREMDILGFVSACNDFSVRLL + RDTTHIEQKALAQGLGQMLESMIDNTVRETVGAATSRDALPNTEASGPTHSKEIPALT + AVETGATNPLVPSDTVQTRHVVQHRSRSESSIESFFARGACVTIMTVDNPASTTNKDK + LFAVWKITYKDTVQLRRKLEFFTYSRFDMELTFVVTANFTETNNGHALNQVYQIMYVP + PGAPVPEKWDDYTWQTSSNPSIFYTYGTAPARISVPYVGISNAYSHFYDGFSKVPLKD + QSAALGDSLYGAASLNDFGILAVRVVNDHNPTKVTSKIRVYLKPKHIRVWCPRPPRAV + AYYGPGVDYKDGTLTPLSTKDLTTYGFGHQNKAVYTAGYKICNYHLATQDDLQNAVNV + MWSRDLLVTESRAQGTDSIARCNCNAGVYYCESRRKYYPVSFVGPTFQYMEANNYYPA + RYQSHMLIGHGFASPGDCGGILRCHHGVIGIITAGGEGLVAFSDIRDLYAYEEEAMEQ + GITNYIESLGAAFGSGFTQQISDKITELTNMVTSTITEKLLKNLIKIISSLVIITRNY + EDTTTVLATLALLGCDASPWQWLRKKACDVLEIPYVIKQGDSWLKKFTEACNAAKGLE + WVSNKISKFIDWLKEKIIPQARDKLEFVTKLRQLEMLENQISTIHQSCPSQEHQEILF + NNVRWLSIQSKRFAPLYAVEAKRIQKLEHTINNYIQFKSKHRIEPVCLLVHGSPGTGK + SVATNLIARAIAERENTSTYSLPPDPSHFDGYKQQGVVIMDDLNQNPDGADMKLFCQM + VSTVEFIPPMASLEEKGILFTSNYVLASTNSSRISPPTVAHSDALARRFAFDMDIQVM + NEYSRDGKLNMAMATEMCKNCHQPANFKRCCPLVCGKAIQLMDKSSRVRYSIDQITTM + IINERNRRSNIGNCMEALFQGPLQYKDLKIDIKTSPPPECINDLLQAVDSQEVRDYCE + KKGWIVNITSQVQTERNINRAMTILQAVTTFAAVAGVVYVMYKLFAGHQGAYTGLPNK + KPNVPTIRTAKVQGPGFDYAVAMAKRNIVTATTSKGEFTMLGVHDNVAILPTHASPGE + SIVIDGKEVEILDAKALEDQAGTNLEITIITLKRNEKFRDIRPHIPTQITETNDGVLI + VNTSKYPNMYVPVGAVTEQGYLNLGGRQTARTLMYNFPTRAGQCGGVITCTGKVIGMH + VGGNGSHGFAAALKRSYFTQSQGEIQWMRPSKEVGYPIINAPSKTKLEPSAFHYVFEG + VKEPAVLTKNDPRLKTDFEEAIFSKYVGNKITEVDEYMKEAVDHYAGQLMSLDINTEQ + MCLEDAMYGTDGLEALDLSTSAGYPYVAMGKKKRDILNKQTRDTKEMQKLLDTYGINL + PLVTYVKDELRSKTKVEQGKSRLIEASSLNDSVAMRMAFGNLYAAFHKNPGVITGSAV + GCDPDLFWSKIPVLMEEKLFAFDYTGYDASLSPAWFEALKMVLEKIGFGDRVDYIDYL + NHSHHLYKNKTYCVKGGMPSGCSGTSIFNSMINNLIIRTLLLKTYKGIDLDHLKMIAY + GDDVIASYPHEVDASLLAQSGKDYGLTMTPADKSATFETVTWENVTFLKRFFRADEKY + PFLIHPVMPMKEIHESIRWTKDPRNTQDHVRSLCLLAWHNGEEEYNKFLAKIRSVPIG + RALLLPEYSTLYRRWLDSF" + mat_peptide 746..949 + /locus_tag="PVgp1" + /product="coat protein VP4" + /protein_id="NP_740468.1" + /db_xref="GI:25121840" + mat_peptide 950..1765 + /locus_tag="PVgp1" + /product="coat protein VP2" + /protein_id="NP_740469.1" + /db_xref="GI:25121841" + mat_peptide 1766..2479 + /locus_tag="PVgp1" + /product="coat protein VP3" + /protein_id="NP_740470.1" + /db_xref="GI:25121842" + mat_peptide 2480..3385 + /locus_tag="PVgp1" + /product="coat protein VP1" + /protein_id="NP_740471.1" + /db_xref="GI:25121843" + mat_peptide 3386..3832 + /locus_tag="PVgp1" + /product="Picornain 2A" + /note="Small chymotrypsin-like cysteine proteinase" + /protein_id="NP_740477.1" + /db_xref="GI:25121849" + mat_peptide 3833..4123 + /locus_tag="PVgp1" + /product="protein 2B" + /protein_id="NP_740472.1" + /db_xref="GI:25121844" + mat_peptide 4124..5110 + /locus_tag="PVgp1" + /product="protein 2C (NTPase)" + /note="Involved in initiation and elongation of RNA + synthesis, RNA encapsidation, virus uncoating and + guanidine resistance (Asn179-Gly mutation). Putative + helicase III." + /protein_id="NP_740473.1" + /db_xref="GI:25121845" + mat_peptide 5111..5371 + /locus_tag="PVgp1" + /product="protein 3A" + /protein_id="NP_740474.1" + /db_xref="GI:25121846" + mat_peptide 5372..5437 + /locus_tag="PVgp1" + /product="genome linked protein VPg" + /protein_id="NP_740475.1" + /db_xref="GI:25121847" + mat_peptide 5438..5986 + /locus_tag="PVgp1" + /product="Picornain 3C" + /note="Chymotrypsin-like cystein proteinase 3C" + /protein_id="NP_740476.2" + /db_xref="GI:62871476" + mat_peptide 5987..7369 + /locus_tag="PVgp1" + /product="RNA-directed RNA-polymerase 3D" + /protein_id="NP_740478.2" + /db_xref="GI:62871477" + conflict 1467..1468 + /locus_tag="PVgp1" + /note="GG is G in [3]" + /citation=[4] + conflict 1529..1530 + /locus_tag="PVgp1" + /note="CC is C in [3]" + /citation=[4] + conflict 1534..1535 + /locus_tag="PVgp1" + /note="CC is C in [3]" + /citation=[4] + conflict 1547 + /locus_tag="PVgp1" + /note="C is U in [3]" + /citation=[4] + conflict 1579 + /locus_tag="PVgp1" + /note="C is A in [3]" + /citation=[4] + conflict 1601 + /locus_tag="PVgp1" + /note="A is C in [3]" + /citation=[4] + conflict 1668 + /locus_tag="PVgp1" + /note="C is U in [3]" + /citation=[4] + conflict 2001 + /locus_tag="PVgp1" + /note="A is C in [3]" + /citation=[4] + conflict 2004..2006 + /locus_tag="PVgp1" + /note="AUC is CCU in [3]" + /citation=[4] + conflict 2035 + /locus_tag="PVgp1" + /note="U is C in [3]" + /citation=[4] + conflict 2133 + /locus_tag="PVgp1" + /note="U is C in [3]" + /citation=[4] + conflict 2286 + /locus_tag="PVgp1" + /note="C is G in [3]" + /citation=[4] + conflict 2983 + /locus_tag="PVgp1" + /note="G is A in [3]" + /citation=[4] + conflict 3043 + /locus_tag="PVgp1" + /note="A is G in [3]" + /citation=[4] + conflict 3303..3304 + /locus_tag="PVgp1" + /note="GG is G in [3]" + /citation=[4] + conflict 3308 + /locus_tag="PVgp1" + /note="G is GC in [3]" + /citation=[4] + conflict 3657 + /locus_tag="PVgp1" + /note="C is U in [3]" + /citation=[4] + conflict 3696 + /locus_tag="PVgp1" + /note="C is A in [3]" + /citation=[4] + conflict 3766 + /locus_tag="PVgp1" + /note="C is A in [3]" + /citation=[4] + conflict 4158 + /locus_tag="PVgp1" + /note="C is CC in [3]" + /citation=[4] + conflict 4163..4164 + /locus_tag="PVgp1" + /note="GC is G in [3]" + /citation=[4] + conflict 4174 + /locus_tag="PVgp1" + /note="A is C in [3]" + /citation=[4] + conflict 5113 + /locus_tag="PVgp1" + /note="A is C in [3]" + /citation=[4] + conflict 5598 + /locus_tag="PVgp1" + /note="U is C in [3]" + /citation=[4] + conflict 5619..5623 + /locus_tag="PVgp1" + /note="CGCUC is UGUUU in [3]" + /citation=[4] + conflict 5645 + /locus_tag="PVgp1" + /note="C is U in [3]" + /citation=[4] + conflict 5786 + /locus_tag="PVgp1" + /note="G is C in [3]" + /citation=[4] + conflict 5903..5905 + /locus_tag="PVgp1" + /note="AAA is AA in [3]" + /citation=[4] + conflict 5927..5931 + /locus_tag="PVgp1" + /note="GGGAA is GGA in [3]" + /citation=[4] + conflict 5970..5971 + /locus_tag="PVgp1" + /note="AC is UA in [3]" + /citation=[4] + conflict 5997 + /locus_tag="PVgp1" + /note="A is C in [3]" + /citation=[4] + conflict 6019 + /locus_tag="PVgp1" + /note="A is C in [3]" + /citation=[4] + conflict 6021 + /locus_tag="PVgp1" + /note="U is C in [3]" + /citation=[4] + conflict 6261 + /locus_tag="PVgp1" + /note="C is U in [3]" + /citation=[4] + conflict 6845..6847 + /locus_tag="PVgp1" + /note="CCA is CA in [2]" + /citation=[5] + conflict 6978..6979 + /locus_tag="PVgp1" + /note="UU is UUU in [2]" + /citation=[5] + conflict 7258 + /locus_tag="PVgp1" + /note="U is UU in [2]" + /citation=[5] + conflict 7410 + /note="U is C in [3] and [2]" + /citation=[4] + /citation=[5] + conflict 7440 + /note="G is GG in [3] and [2]" + /citation=[4] + /citation=[5] +ORIGIN + 1 ttaaaacagc tctggggttg tacccacccc agaggcccac gtggcggcta gtactccggt + 61 attgcggtac ccttgtacgc ctgttttata ctcccttccc gtaacttaga cgcacaaaac + 121 caagttcaat agaagggggt acaaaccagt accaccacga acaagcactt ctgtttcccc + 181 ggtgatgtcg tatagactgc ttgcgtggtt gaaagcgacg gatccgttat ccgcttatgt + 241 acttcgagaa gcccagtacc acctcggaat cttcgatgcg ttgcgctcag cactcaaccc + 301 cagagtgtag cttaggctga tgagtctgga catccctcac cggtgacggt ggtccaggct + 361 gcgttggcgg cctacctatg gctaacgcca tgggacgcta gttgtgaaca aggtgtgaag + 421 agcctattga gctacataag aatcctccgg cccctgaatg cggctaatcc caacctcgga + 481 gcaggtggtc acaaaccagt gattggcctg tcgtaacgcg caagtccgtg gcggaaccga + 541 ctactttggg tgtccgtgtt tccttttatt ttattgtggc tgcttatggt gacaatcaca + 601 gattgttatc ataaagcgaa ttggattggc catccggtga aagtgagact cattatctat + 661 ctgtttgctg gatccgctcc attgagtgtg tttactctaa gtacaatttc aacagttatt + 721 tcaatcagac aattgtatca taatgggtgc tcaggtttca tcacagaaag tgggcgcaca + 781 tgaaaactca aatagagcgt atggtggttc taccattaat tacaccacca ttaattatta + 841 tagagattca gctagtaacg cggcttcgaa acaggacttc tctcaagacc cttccaagtt + 901 caccgagccc atcaaggatg tcctgataaa aacagcccca atgctaaact cgccaaacat + 961 agaggcttgc gggtatagcg atagagtact gcaattaaca ctgggaaact ccactataac + 1021 cacacaggag gcggctaatt cagtagtcgc ttatgggcgt tggcctgaat atctgaggga + 1081 cagcgaagcc aatccagtgg accagccgac agaaccagac gtcgctgcat gcaggtttta + 1141 tacgctagac accgtgtctt ggacgaaaga gtcgcgaggg tggtggtgga agttgcctga + 1201 tgcactgagg gacatgggac tctttgggca aaatatgtac taccactacc taggtaggtc + 1261 cgggtacacc gtgcatgtac agtgtaacgc ctccaaattc caccaggggg cactaggggt + 1321 attcgccgta ccagagatgt gtctggccgg ggatagcaac accactacca tgcacaccag + 1381 ctatcaaaat gccaatcctg gcgagaaagg aggcactttc acgggtacgt tcactcctga + 1441 caacaaccag acatcacctg cccgcaggtt ctgcccggtg gattacctcc ttggaaatgg + 1501 cacgttgttg gggaatgcct ttgtgttccc gcaccagata ataaacctac ggaccaacaa + 1561 ctgtgctaca ctggtactcc cttacgtgaa ctccctctcg atagatagta tggtaaagca + 1621 caataattgg ggaattgcaa tattaccatt ggccccatta aattttgcta gtgagtcctc + 1681 cccagagatt ccaatcacct tgaccatagc ccctatgtgc tgtgagttca atggattaag + 1741 aaacatcacc ctgccacgct tacagggcct gccggtcatg aacacccctg gtagcaatca + 1801 atatcttact gcagacaact tccagtcacc gtgtgcgctg cctgaatttg atgtgacccc + 1861 acctattgac atacccggtg aagtaaagaa catgatggaa ttggcagaaa tcgacaccat + 1921 gattcccttt gacttaagtg ccacaaaaaa gaacaccatg gaaatgtata gggttcggtt + 1981 aagtgacaaa ccacatacag acgatcccat actctgcctg tcactctctc cagcttcaga + 2041 tcctaggttg tcacatacta tgcttggaga aatcctaaat tactacacac actgggcagg + 2101 atccctgaag ttcacgtttc tgttctgtgg attcatgatg gcaactggca aactgttggt + 2161 gtcatacgcg cctcctggag ccgacccacc aaagaagcgt aaggaggcga tgttgggaac + 2221 acatgtgatc tgggacatag gactgcagtc ctcatgtact atggtagtgc catggattag + 2281 caacaccacg tatcggcaaa ccatagatga tagtttcacc gaaggcggat acatcagcgt + 2341 cttctaccaa actagaatag tcgtccctct ttcgacaccc agagagatgg acatccttgg + 2401 ttttgtgtca gcgtgtaatg acttcagcgt gcgcttgttg cgagatacca cacatataga + 2461 gcaaaaagcg ctagcacagg ggttaggtca gatgcttgaa agcatgattg acaacacagt + 2521 ccgtgaaacg gtgggggcgg caacatctag agacgctctc ccaaacactg aagccagtgg + 2581 accaacacac tccaaggaaa ttccggcact caccgcagtg gaaactgggg ccacaaatcc + 2641 actagtccct tctgatacag tgcaaaccag acatgttgta caacataggt caaggtcaga + 2701 gtctagcata gagtctttct tcgcgcgggg tgcatgcgtg accattatga ccgtggataa + 2761 cccagcttcc accacgaata aggataagct atttgcagtg tggaagatca cttataaaga + 2821 tactgtccag ttacggagga aattggagtt cttcacctat tctagatttg atatggaact + 2881 tacctttgtg gttactgcaa atttcactga gactaacaat gggcatgcct taaatcaagt + 2941 gtaccaaatt atgtacgtac caccaggcgc tccagtgccc gagaaatggg acgactacac + 3001 atggcaaacc tcatcaaatc catcaatctt ttacacctac ggaacagctc cagcccggat + 3061 ctcggtaccg tatgttggta tttcgaacgc ctattcacac ttttacgacg gtttttccaa + 3121 agtaccactg aaggaccagt cggcagcact aggtgactcc ctttatggtg cagcatctct + 3181 aaatgacttc ggtattttgg ctgttagagt agtcaatgat cacaacccga ccaaggtcac + 3241 ctccaaaatc agagtgtatc taaaacccaa acacatcaga gtctggtgcc cgcgtccacc + 3301 gagggcagtg gcgtactacg gccctggagt ggattacaag gatggtacgc ttacacccct + 3361 ctccaccaag gatctgacca catatggatt cggacaccaa aacaaagcgg tgtacactgc + 3421 aggttacaaa atttgcaact accacttggc cactcaggat gatttgcaaa acgcagtgaa + 3481 cgtcatgtgg agtagagacc tcttagtcac agaatcaaga gcccagggca ccgattcaat + 3541 cgcaaggtgc aattgcaacg caggggtgta ctactgcgag tctagaagga aatactaccc + 3601 agtatccttc gttggcccaa cgttccagta catggaggct aataactatt acccagctag + 3661 gtaccagtcc catatgctca ttggccatgg attcgcatct ccaggggatt gtggtggcat + 3721 actcagatgt caccacgggg tgatagggat cattactgct ggtggcgaag ggttggttgc + 3781 attttcagac attagagact tgtatgccta cgaagaagaa gccatggaac aaggcatcac + 3841 caattacata gagtcacttg gggccgcatt tggaagtgga tttactcagc agattagcga + 3901 caaaataaca gagttgacca atatggtgac cagtaccatc actgaaaagc tacttaagaa + 3961 cttgatcaag atcatatcct cactagttat tataactagg aactatgaag acaccacaac + 4021 agtgctcgct accctggccc ttcttgggtg tgatgcttca ccatggcagt ggcttagaaa + 4081 gaaagcatgc gatgttctgg agatacctta tgtcatcaag caaggtgaca gttggttgaa + 4141 gaagtttact gaagcatgca acgcagctaa gggactggag tgggtgtcaa acaaaatctc + 4201 aaaattcatt gattggctca aggagaaaat tatcccacaa gctagagata agttggaatt + 4261 tgtaacaaaa cttagacaac tagaaatgct ggaaaaccaa atctcaacta tacaccaatc + 4321 atgccctagt caggaacacc aggaaattct attcaataat gtcagatggt tatccatcca + 4381 gtctaagagg tttgcccctc tttacgcagt ggaagccaaa agaatacaga aactagagca + 4441 tactattaac aactacatac agttcaagag caaacaccgt attgaaccag tatgtttgct + 4501 agtacatggc agccccggaa caggtaaatc tgtagcaacc aacctgattg ctagagccat + 4561 agctgaaaga gaaaacacgt ccacgtactc gctacccccg gatccatcac acttcgacgg + 4621 atacaaacaa cagggagtgg tgattatgga cgacctgaat caaaacccag atggtgcgga + 4681 catgaagctg ttctgtcaga tggtatcaac agtggagttt ataccaccca tggcatccct + 4741 ggaggagaaa ggaatcctgt ttacttcaaa ttacgttcta gcatccacaa actcaagcag + 4801 aatttccccc cccactgtgg cacacagtga tgcattagcc aggcgctttg cgttcgacat + 4861 ggacattcag gtcatgaatg agtattctag agatgggaaa ttgaacatgg ccatggctac + 4921 tgaaatgtgt aagaactgtc accaaccagc aaactttaag agatgctgtc ctttagtgtg + 4981 tggtaaggca attcaattaa tggacaaatc ttccagagtt agatacagta ttgaccagat + 5041 cactacaatg attatcaatg agagaaacag aagatccaac attggcaatt gtatggaggc + 5101 tttgtttcaa ggaccactcc agtataaaga cttgaaaatt gacatcaaga cgagtccccc + 5161 tcctgaatgt atcaatgact tgctccaagc agttgactcc caggaggtga gagattactg + 5221 tgagaagaag ggttggatag tcaacatcac cagccaggtt caaacagaaa ggaacatcaa + 5281 cagggcaatg acaattctac aagcggtgac aaccttcgcc gcagtggctg gagttgtcta + 5341 tgtcatgtat aaactgtttg ctggacacca gggagcatac actggtttac caaacaaaaa + 5401 acccaacgtg cccaccattc ggacagcaaa ggtacaagga ccagggttcg attacgcagt + 5461 ggctatggct aaaagaaaca ttgttacagc aactactagc aagggagagt tcactatgtt + 5521 aggagtccac gacaacgtgg ctattttacc aacccacgct tcacctggtg aaagcattgt + 5581 gatcgatggc aaagaagtgg agatcttgga tgccaaagcg ctcgaagatc aagcaggaac + 5641 caatcttgaa atcactataa tcactctaaa gagaaatgaa aagttcagag acattagacc + 5701 acatatacct actcaaatca ctgagacaaa tgatggagtc ttgatcgtga acactagcaa + 5761 gtaccccaat atgtatgttc ctgtcggtgc tgtgactgaa cagggatatc taaatctcgg + 5821 tgggcgccaa actgctcgta ctctaatgta caactttcca accagagcag gacagtgtgg + 5881 tggagtcatc acatgtactg ggaaagtcat cgggatgcat gttggtggga acggttcaca + 5941 cgggtttgca gcggccctga agcgatcata cttcactcag agtcaaggtg aaatccagtg + 6001 gatgagacct tcgaaggaag tgggatatcc aatcataaat gccccgtcca aaaccaagct + 6061 tgaacccagt gctttccact atgtgtttga aggggtgaag gaaccagcag tcctcactaa + 6121 aaacgatccc aggcttaaga cagactttga ggaggcaatt ttctccaagt acgtgggtaa + 6181 caaaattact gaagtggatg agtacatgaa agaggcagta gaccactatg ctggccagct + 6241 catgtcacta gacatcaaca cagaacaaat gtgcttggag gatgccatgt atggcactga + 6301 tggtctagaa gcacttgatt tgtccaccag tgctggctac ccttatgtag caatgggaaa + 6361 gaagaagaga gacatcttga acaaacaaac cagagacact aaggaaatgc aaaaactgct + 6421 cgacacatat ggaatcaacc tcccactggt gacttatgta aaggatgaac ttagatccaa + 6481 aacaaaggtt gagcagggga aatccagatt aattgaagct tctagtttga atgactcagt + 6541 ggcaatgaga atggcttttg ggaacctata tgctgctttt cacaaaaacc caggagtgat + 6601 aacaggttca gcagtggggt gcgatccaga tttgttttgg agcaaaattc cggtattgat + 6661 ggaagagaag ctgtttgctt ttgactacac agggtatgat gcatctctca gccctgcttg + 6721 gttcgaggca ctaaagatgg tgcttgagaa aatcggattc ggagacagag ttgactacat + 6781 cgactaccta aaccactcac accacctgta caagaataaa acatactgtg tcaagggcgg + 6841 tatgccatct ggctgctcag gcacttcaat ttttaactca atgattaaca acttgattat + 6901 caggacactc ttactgaaaa cctacaaggg catagattta gaccacctaa aaatgattgc + 6961 ctatggtgat gatgtaattg cttcctaccc ccatgaagtt gacgctagtc tcctagccca + 7021 atcaggaaaa gactatggac taactatgac tccagctgac aaatcagcta catttgaaac + 7081 agtcacatgg gagaatgtaa cattcttgaa gagattcttc agggcagacg agaaataccc + 7141 atttcttatt catccagtaa tgccaatgaa ggaaattcat gaatcaatta gatggactaa + 7201 agatcctagg aacactcagg atcacgttcg ctctctgtgc cttttagctt ggcacaatgg + 7261 cgaagaagaa tataacaaat tcctagctaa aatcaggagt gtgccaattg gaagagcttt + 7321 attgctccca gagtactcaa cattgtaccg ccgttggctt gactcatttt agtaacccta + 7381 cctcagtcga attggattgg gtcatactgt tgtaggggta aatttttctt taattcggag +// + From 65bb304e3eb89f7fcfda7659e291434514a67ee4 Mon Sep 17 00:00:00 2001 From: Paul Cantalupo Date: Tue, 6 Nov 2012 14:36:59 -0500 Subject: [PATCH 2/2] simplifing tests --- t/SeqIO/genbank.t | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/t/SeqIO/genbank.t b/t/SeqIO/genbank.t index 0c1cac62b4..d48aab451c 100644 --- a/t/SeqIO/genbank.t +++ b/t/SeqIO/genbank.t @@ -7,7 +7,7 @@ BEGIN { use lib '.'; use Bio::Root::Test; - test_begin(-tests => 283 ); + test_begin(-tests => 281 ); use_ok('Bio::SeqIO::genbank'); } @@ -608,15 +608,13 @@ my $in = Bio::SeqIO->new(-format => 'genbank', -file => test_input_file('NC_002058_multDBLINK_bug3375.gb')); my $seq = $in->next_seq(); # should not throw a warning now my @dblinks = $seq->annotation->get_Annotations('dblink'); # contains 5 dblink references +# testing DBLINK BioProject: PRJNA15288 is($dblinks[0]->database, 'BioProject', 'bug3375 database is BioProject'); is($dblinks[0]->primary_id, 'PRJNA15288', 'bug3375 primary_id is PRJNA15288'); - -is($dblinks[1]->database, 'Project'); -is($dblinks[1]->primary_id, '100'); +# testing DBLINK Project:100,200,300 is($dblinks[3]->database, 'Project'); is($dblinks[3]->primary_id, '300'); - +# testing DBLINK NC_002058.3 is($dblinks[4]->database, 'GenBank'); is($dblinks[4]->primary_id, 'NC_002058'); is($dblinks[4]->version, '3'); -