diff --git a/.gitignore b/.gitignore index 398b918333..e5a15934b6 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ Build Build.bat _build* +_Inline pm_to_blib* *.tar.gz .lwpcookies diff --git a/.travis.yml b/.travis.yml index ccd77559eb..233a26fa03 100644 --- a/.travis.yml +++ b/.travis.yml @@ -26,6 +26,8 @@ install: - "cpanm Bio::Phylo | tail -n 1" - "cpanm Test::Weaken | tail -n 1" - "cpanm Test::Memory::Cycle | tail -n 1" + #Test coverage from Coveralls + #- cpanm --quiet --notest Devel::Cover::Report::Coveralls #for some reason tests and deps aren't skipped here. Will have to look into it more... #git repos, seems to only work for simple checkouts, so pure perl only (TODO: look into before_script for more detail) - "git clone https://github.com/bioperl/Bio-Root.git; export PERL5LIB=$( pwd )/Bio-Root/lib:$PERL5LIB" @@ -34,6 +36,10 @@ install: script: - "./Build test" + #Devel::Cover coverage options are: statement, branch, condition, path, subroutine, pod, time, all and none + #- "./Build build && cover -test -report coveralls" #complete version coverage test + #- PERL5OPT=-MDevel::Cover=+ignore,prove,-coverage,statement,subroutine prove -lr t #limited version coverage test + #- cover -report coveralls #TODO - send emails to bioperl-guts-l notifications: diff --git a/AUTHORS b/AUTHORS index b6d8785a5f..c9724e2fe3 100644 --- a/AUTHORS +++ b/AUTHORS @@ -20,6 +20,8 @@ =item * Brian Osborne +=item * Francisco J. Ossandon + =item * Jason Stajich =item * Lincoln Stein diff --git a/Bio/DB/BioFetch.pm b/Bio/DB/BioFetch.pm index 55d8be35a8..bdf4b3582c 100644 --- a/Bio/DB/BioFetch.pm +++ b/Bio/DB/BioFetch.pm @@ -26,14 +26,14 @@ Bio::DB::BioFetch - Database object interface to BioFetch retrieval $bf = Bio::DB::BioFetch->new(); - $seq = $bf->get_Seq_by_id('BUM'); # EMBL or SWALL ID + $seq = $bf->get_Seq_by_id('HSFOS'); # EMBL or SWALL ID # change formats, storage procedures $bf = Bio::DB::BioFetch->new(-format => 'fasta', -retrievaltype => 'tempfile', -db => 'EMBL'); - $stream = $bf->get_Stream_by_id(['BUM','J00231']); + $stream = $bf->get_Stream_by_id(['HSFOS','J00231']); while (my $s = $stream->next_seq) { print $s->seq,"\n"; } @@ -135,7 +135,7 @@ BEGIN { fasta => 'fasta', namespace => 'uniprot', }, - 'uniprot' => { + 'uniprot' => { default => 'swiss', swissprot => 'swiss', fasta => 'fasta', diff --git a/Bio/DB/EMBL.pm b/Bio/DB/EMBL.pm index 4d187115fa..3b1fb3f1b2 100644 --- a/Bio/DB/EMBL.pm +++ b/Bio/DB/EMBL.pm @@ -23,7 +23,7 @@ Bio::DB::EMBL - Database object interface for EMBL entry retrieval $embl = Bio::DB::EMBL->new(); # remember that EMBL_ID does not equal GenBank_ID! - $seq = $embl->get_Seq_by_id('BUM'); # EMBL ID + $seq = $embl->get_Seq_by_id('HSFOS'); # EMBL ID print "cloneid is ", $seq->id, "\n"; # or changeing to accession number and Fasta format ... diff --git a/Bio/DB/Fasta.pm b/Bio/DB/Fasta.pm index 915e6bf09d..cb62f5b489 100644 --- a/Bio/DB/Fasta.pm +++ b/Bio/DB/Fasta.pm @@ -243,7 +243,6 @@ sub _calculate_offsets { return \%offsets; } - =head2 seq Title : seq, sequence, subseq @@ -289,8 +288,8 @@ sub subseq { seek($fh, $filestart,0); read($fh, $data, $filestop-$filestart+1); - $data =~ s/\n//g; - $data =~ s/\r//g; + + $data = Bio::DB::IndexedBase::_strip_crnl($data); if ($strand == -1) { # Reverse-complement the sequence @@ -332,8 +331,7 @@ sub header { read($fh, $data, $headerlen); # On Windows chomp remove '\n' but leaves '\r' # when reading '\r\n' in binary mode - $data =~ s/\n//g; - $data =~ s/\r//g; + $data = Bio::DB::IndexedBase::_strip_crnl($data); substr($data, 0, 1) = ''; return $data; } diff --git a/Bio/DB/Flat.pm b/Bio/DB/Flat.pm index a911ed5a66..f285227ff1 100644 --- a/Bio/DB/Flat.pm +++ b/Bio/DB/Flat.pm @@ -23,9 +23,9 @@ Bio::DB::Flat - Interface for indexed flat files -write_flag => 1); $db->build_index('/usr/share/embl/primate.embl', '/usr/share/embl/protists.embl'); - $seq = $db->get_Seq_by_id('BUM'); + $seq = $db->get_Seq_by_id('HSFOS'); @sequences = $db->get_Seq_by_acc('DIV' => 'primate'); - $raw = $db->fetch_raw('BUM'); + $raw = $db->fetch_raw('HSFOS'); =head1 DESCRIPTION diff --git a/Bio/DB/IndexedBase.pm b/Bio/DB/IndexedBase.pm index 52cd902c90..b8334e4ba6 100644 --- a/Bio/DB/IndexedBase.pm +++ b/Bio/DB/IndexedBase.pm @@ -241,7 +241,7 @@ methods. Internal methods are usually preceded with a _ package Bio::DB::IndexedBase; BEGIN { - @AnyDBM_File::ISA = qw(DB_File GDBM_File NDBM_File SDBM_File) + @AnyDBM_File::ISA = qw(DB_File GDBM_File NDBM_File SDBM_File) if(!$INC{'AnyDBM_File.pm'}); } @@ -268,6 +268,46 @@ use constant DIE_ON_MISSMATCHED_LINES => 1; # you can avoid dying if you want but you may get incorrect results +# Compiling the below regular expressions speeds up the Pure Perl +# seq/subseq() from Bio::DB::Fasta by about 7% from 7.76s to 7.22s +# over 32358 calls on Variant Effect Prediction data. +my $nl = qr/\n/; +my $cr = qr/\r/; + +# Remove carriage returns (\r) and newlines (\n) from a string. When +# called from subseq, this can take a signficiant portion of time, in +# Variant Effect Prediction. Therefore we compile the match portion. +sub _strip_crnl { + my $str = shift; + $str =~ s/$nl//g; + $str =~ s/$cr//g; + return $str; +} + +# C can do perfrom _strip_crnl much faster. But this requires the +# Inline::C module which we don't require people to have. So we make +# this optional by wrapping the C code in an eval. If the eval works, +# the Perl strip_crnl() function is overwritten. +eval q{ + use Inline C => <<'END_OF_C_CODE'; + /* Strip all new line (\n) and carriage return (\r) characters + from string str + */ + char* _strip_crnl(char* str) { + char *s; + char *s2 = str; + for (s = str; *s; *s++) { + if (*s != '\n' && *s != '\r') { + *s2++ = *s; + } + } + *s2 = '\0'; + return str; + } +END_OF_C_CODE +}; + + =head2 new Title : new @@ -682,6 +722,8 @@ sub _close_index { return 1; } +# Compiling the below regular expression speeds up _parse_compound_id +my $compound_id = qr/^ (.+?) (?:\:([\d_]+)(?:,|-|\.\.)([\d_]+))? (?:\/(.+))? $/x; sub _parse_compound_id { # Handle compound IDs: @@ -699,7 +741,7 @@ sub _parse_compound_id { if ( (not defined $start ) && (not defined $stop ) && (not defined $strand) && - ($id =~ /^ (.+?) (?:\:([\d_]+)(?:,|-|\.\.)([\d_]+))? (?:\/(.+))? $/x) ) { + ($id =~ m{$compound_id}) ) { # Start, stop and strand not provided and ID looks like a compound ID ($id, $start, $stop, $strand) = ($1, $2, $3, $4); } diff --git a/Bio/DB/Qual.pm b/Bio/DB/Qual.pm index 62c87e020f..675b470748 100644 --- a/Bio/DB/Qual.pm +++ b/Bio/DB/Qual.pm @@ -335,8 +335,7 @@ sub subqual { read($fh, $data, $filestop-$filestart+1); # Process quality score - $data =~ s/\n//g; - $data =~ s/\r//g; + Bio::DB::IndexedBase::_strip_crnl($data); my $subqual = 0; $subqual = 1 if ( $start || $stop ); my @data; @@ -379,9 +378,9 @@ sub header { seek($fh, $offset, 0); read($fh, $data, $headerlen); # On Windows chomp remove '\n' but leaves '\r' - # when reading '\r\n' in binary mode - $data =~ s/\n//g; - $data =~ s/\r//g; + # when reading '\r\n' in binary mode, + # _strip_crnl removes both + $data = Bio::DB::IndexedBase::_strip_crnl($data); substr($data, 0, 1) = ''; return $data; } diff --git a/Bio/DB/Registry.pm b/Bio/DB/Registry.pm index d982dea16b..e92f9a9523 100644 --- a/Bio/DB/Registry.pm +++ b/Bio/DB/Registry.pm @@ -122,7 +122,9 @@ sub _load_registry { my $self = shift; eval { $HOME = (getpwuid($>))[7]; } unless $HOME; if ($@) { - $self->warn("This Perl doesn't implement function getpwuid(), no \$HOME"); + # Windows can have Win32::LoginName to get the Username, so check if it works before giving up + ( defined &Win32::LoginName ) ? ( $HOME = Win32::LoginName() ) + : $self->warn("This Perl doesn't implement function getpwuid(), no \$HOME"); } my @ini_files = $self->_get_ini_files(); diff --git a/Bio/DB/SeqFeature/Store/LoadHelper.pm b/Bio/DB/SeqFeature/Store/LoadHelper.pm index b852efa8ef..2a50bdd236 100644 --- a/Bio/DB/SeqFeature/Store/LoadHelper.pm +++ b/Bio/DB/SeqFeature/Store/LoadHelper.pm @@ -40,7 +40,7 @@ use File::Temp 'tempdir'; use File::Spec; use Fcntl qw(O_CREAT O_RDWR); -our $VERSION = '1.10'; +our $VERSION = '1.11'; my %DBHandles; diff --git a/Bio/DB/Taxonomy/flatfile.pm b/Bio/DB/Taxonomy/flatfile.pm index 3c1565d260..1573b8d735 100644 --- a/Bio/DB/Taxonomy/flatfile.pm +++ b/Bio/DB/Taxonomy/flatfile.pm @@ -99,6 +99,9 @@ $DEFAULT_PARENT_INDEX = 'parents'; $DB_BTREE->{'flags'} = R_DUP; # allow duplicate values in DB_File BTREEs +# 8192 bytes; this seems to work to keep OS X from complaining +$DB_HASH->{'bsize'} = 0x2000; + @DIVISIONS = ([qw(BCT Bacteria)], [qw(INV Invertebrates)], [qw(MAM Mammals)], diff --git a/Bio/SeqFeatureI.pm b/Bio/SeqFeatureI.pm index fec7c40bbb..c4a9991037 100644 --- a/Bio/SeqFeatureI.pm +++ b/Bio/SeqFeatureI.pm @@ -522,12 +522,13 @@ sub spliced_seq { my @locset = $self->location->each_Location; my @locs; if ( not $nosort ) { - @locs = map { $_->[0] } +# @locs = map { $_->[0] } # sort so that most negative is first basically to order # the features on the opposite strand 5'->3' on their strand # rather than they way most are input which is on the fwd strand - sort { $a->[1] <=> $b->[1] } # Yes Tim, Schwartzian transformation +# sort { $a->[1] <=> $b->[1] } # Yes Tim, Schwartzian transformation + my @proc_locs = map { $fstrand = $_->strand unless defined $fstrand; $mixed = 1 if defined $_->strand && $fstrand != $_->strand; @@ -538,6 +539,16 @@ sub spliced_seq { [ $_, $_->start * ($_->strand || 1) ]; } @locset; + my @sort_locs; + if ( $fstrand == 1 ) { + @sort_locs = sort { $a->[1] <=> $b->[1] } @proc_locs; # Yes Tim, Schwartzian transformation + }elsif ( $fstrand == -1 ){ + @sort_locs = sort { $b->[1] <=> $a->[1] } @proc_locs; # Yes Tim, Schwartzian transformation + } else { + @sort_locs = @proc_locs; + } + @locs = map { $_->[0] } @sort_locs; + if ( $mixed ) { $self->warn( "Mixed strand locations, spliced seq using the " . "input order rather than trying to sort"); diff --git a/Bio/SeqIO/embl.pm b/Bio/SeqIO/embl.pm index 1b438949ae..4dc96aaa2a 100644 --- a/Bio/SeqIO/embl.pm +++ b/Bio/SeqIO/embl.pm @@ -1463,7 +1463,7 @@ sub _write_line_EMBL_regex { CHUNK: while($line) { foreach my $pat ($regex, '[,;\.\/-]\s|'.$regex, '[,;\.\/-]|'.$regex) { - if ($line =~ m/^(.{0,$subl})($pat)(.*)/ ) { + if ($line =~ m/^(.{1,$subl})($pat)(.*)/ ) { my $l = $1.$2; $l =~ s/#/ /g # remove word wrap protection char '#' if $pre1 eq "RA "; diff --git a/Bio/SeqIO/genbank.pm b/Bio/SeqIO/genbank.pm index 1836933e3a..29d02c2fc4 100644 --- a/Bio/SeqIO/genbank.pm +++ b/Bio/SeqIO/genbank.pm @@ -499,7 +499,9 @@ sub next_seq { } } - # Comments + # Comments may be plain text or Structured Comments. + # Structured Comments are made up of tag/value pairs and have beginning + # and end delimiters like ##*-Data-START## and ##*-Data-END## elsif ($line =~ /^COMMENT\s+(\S.*)/) { if ($annotation) { my $comment = $1; @@ -507,8 +509,20 @@ sub next_seq { last if ($line =~ /^\S/); $comment .= $line; } - $comment =~ s/\n/ /g; $comment =~ s/ +/ /g; + # Structured Comment, do not remove returns in the tabular section + if ( my ( $text, $table )= $comment + =~ /([^#]*)(##\S+Data-START##.+?##\S+Data-END##)/is + ) { + $text =~ s/\n/ /g if $text; + $table =~ s/START##/START##\n/; + $table =~ s/^\s+//gm; + $comment = $text . "\n" . $table; + } + # Plain text, remove returns + else { + $comment =~ s/\n/ /g; + } $annotation->add_Annotation( 'comment', Bio::Annotation::Comment->new( diff --git a/Bio/Tools/Analysis/Protein/GOR4.pm b/Bio/Tools/Analysis/Protein/GOR4.pm index ddc87c011b..352a8b3fe5 100644 --- a/Bio/Tools/Analysis/Protein/GOR4.pm +++ b/Bio/Tools/Analysis/Protein/GOR4.pm @@ -153,12 +153,12 @@ use Bio::SeqIO; use HTTP::Request::Common qw(POST); use Bio::SeqFeature::Generic; use Bio::Seq::Meta::Array; - +$ENV{PERL_LWP_SSL_VERIFY_HOSTNAME} = 0; use base qw(Bio::Tools::Analysis::SimpleAnalysisBase); use constant MIN_STRUC_LEN => 3; -my $URL = 'http://npsa-pbil.ibcp.fr/cgi-bin/secpred_gor4.pl'; +my $URL = 'https://npsa-prabi.ibcp.fr/cgi-bin/secpred_sopma.pl'; my $ANALYSIS_NAME = 'GOR4'; my $ANALYSIS_SPEC = {name => 'Gor4', type => 'Protein'}; my $INPUT_SPEC = [ @@ -366,11 +366,9 @@ sub _run { my $out = 'http://npsa-pbil.ibcp.fr/'.$next; my $req2 = HTTP::Request->new(GET=>$out); my $resp2 = $self->request($req2); - $self->status('COMPLETED') if $resp2 ne ''; + $self->status('COMPLETED') if $resp2 ne ''; $self->{'_result'} = $resp2->content; } - - 1; diff --git a/Bio/Tools/Analysis/Protein/HNN.pm b/Bio/Tools/Analysis/Protein/HNN.pm index ef9af040f9..936756a96e 100644 --- a/Bio/Tools/Analysis/Protein/HNN.pm +++ b/Bio/Tools/Analysis/Protein/HNN.pm @@ -154,7 +154,7 @@ use Bio::SeqIO; use HTTP::Request::Common qw (POST); use Bio::SeqFeature::Generic; use Bio::Seq::Meta::Array; - +$ENV{PERL_LWP_SSL_VERIFY_HOSTNAME} = 0; use base qw(Bio::Tools::Analysis::SimpleAnalysisBase); @@ -195,9 +195,10 @@ sub _run { # delay repeated calls by default by 3 sec, set delay() to change $self->sleep; $self->status('TERMINATED_BY_ERROR'); - my $request = POST 'http://npsa-pbil.ibcp.fr/cgi-bin/secpred_hnn.pl', + my $request = POST 'https://npsa-prabi.ibcp.fr/cgi-bin/secpred_hnn.pl', Content_Type => 'form-data', - Content => [title => "", + Content => [ + title => "", notice => $self->seq->seq, ali_width => 70, ]; diff --git a/Bio/Tools/Analysis/Protein/Sopma.pm b/Bio/Tools/Analysis/Protein/Sopma.pm index 34c82b78b7..526554b66f 100644 --- a/Bio/Tools/Analysis/Protein/Sopma.pm +++ b/Bio/Tools/Analysis/Protein/Sopma.pm @@ -154,12 +154,12 @@ use Bio::SeqIO; use HTTP::Request::Common qw (POST); use Bio::SeqFeature::Generic; use Bio::Seq::Meta::Array; - +$ENV{PERL_LWP_SSL_VERIFY_HOSTNAME} = 0; use base qw(Bio::Tools::Analysis::SimpleAnalysisBase); #extends array for 2struc. -my $URL = 'http://npsa-pbil.ibcp.fr/cgi-bin/secpred_sopma.pl'; +my $URL = 'https://npsa-prabi.ibcp.fr/cgi-bin/secpred_sopma.pl'; my $ANALYSIS_NAME= 'Sopma'; my $ANALYSIS_SPEC= {name => 'Sopma', type => 'Protein'}; my $INPUT_SPEC = [ @@ -450,7 +450,7 @@ sub _run { # delay repeated calls by default by 3 sec, set delay() to change $self->sleep; $self->status('TERMINATED_BY_ERROR'); - my $request = POST 'http://npsa-pbil.ibcp.fr/cgi-bin/secpred_sopma.pl', + my $request = POST 'https://npsa-prabi.ibcp.fr/cgi-bin/secpred_sopma.pl', Content_Type => 'form-data', Content => [title => "", notice => $self->seq->seq, @@ -465,6 +465,7 @@ sub _run { #### get text only version of results ## my ($next) = $text =~ /Prediction.*?=(.*?)>/; + return $self unless $next; my $out = "http://npsa-pbil.ibcp.fr/". "$next"; my $req2 = HTTP::Request->new(GET=>$out); my $resp2 = $self->request ($req2); diff --git a/Bio/Tools/CodonTable.pm b/Bio/Tools/CodonTable.pm index d008e311b4..25a68cf12b 100644 --- a/Bio/Tools/CodonTable.pm +++ b/Bio/Tools/CodonTable.pm @@ -127,13 +127,11 @@ only differences are in available initiator codons. NCBI Genetic Codes home page: + (Last update of the Genetic Codes: April 30, 2013) http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c -EBI Translation Table Viewer: - http://www.ebi.ac.uk/cgi-bin/mutations/trtables.cgi - -Amended ASN.1 version with ids 16 and 21 is at: - ftp://ftp.ebi.ac.uk/pub/databases/geneticcode/ +ASN.1 version with ids 1 to 25 is at: + ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt Thanks to Matteo diTomasso for the original Perl implementation of these tables. @@ -202,30 +200,33 @@ BEGIN { @NAMES = #id ( + 'Strict', #0, special option for ATG-only start 'Standard', #1 'Vertebrate Mitochondrial',#2 'Yeast Mitochondrial',# 3 - 'Mold, Protozoan, and CoelenterateMitochondrial and Mycoplasma/Spiroplasma',#4 + 'Mold, Protozoan, and Coelenterate Mitochondrial and Mycoplasma/Spiroplasma',#4 'Invertebrate Mitochondrial',#5 'Ciliate, Dasycladacean and Hexamita Nuclear',# 6 '', '', - 'Echinoderm Mitochondrial',#9 + 'Echinoderm and Flatworm Mitochondrial',#9 'Euplotid Nuclear',#10 - '"Bacterial"',# 11 + 'Bacterial, Archaeal and Plant Plastid',# 11 'Alternative Yeast Nuclear',# 12 'Ascidian Mitochondrial',# 13 - 'Flatworm Mitochondrial',# 14 + 'Alternative Flatworm Mitochondrial',# 14 'Blepharisma Nuclear',# 15 'Chlorophycean Mitochondrial',# 16 '', '', '', '', 'Trematode Mitochondrial',# 21 'Scenedesmus obliquus Mitochondrial', #22 'Thraustochytrium Mitochondrial', #23 - 'Strict', #24, option for only ATG start + 'Pterobranchia Mitochondrial', #24 + 'Candidate Division SR1 and Gracilibacteria', #25 ); @TABLES = qw( + FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG @@ -245,7 +246,8 @@ BEGIN { FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG - FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG + FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG + FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG ); # (bases used for these tables, for reference) @@ -255,6 +257,7 @@ BEGIN { @STARTS = qw( + -----------------------------------M---------------------------- ---M---------------M---------------M---------------------------- --------------------------------MMMM---------------M------------ ----------------------------------MM---------------------------- @@ -262,11 +265,11 @@ BEGIN { ---M----------------------------MMMM---------------M------------ -----------------------------------M---------------------------- '' '' - -----------------------------------M---------------------------- + -----------------------------------M---------------M------------ -----------------------------------M---------------------------- ---M---------------M------------MMMM---------------M------------ -------------------M---------------M---------------------------- - -----------------------------------M---------------------------- + ---M------------------------------MM---------------M------------ -----------------------------------M---------------------------- -----------------------------------M---------------------------- -----------------------------------M---------------------------- @@ -274,22 +277,23 @@ BEGIN { -----------------------------------M---------------M------------ -----------------------------------M---------------------------- --------------------------------M--M---------------M------------ - -----------------------------------M---------------------------- + ---M---------------M---------------M---------------M------------ + ---M-------------------------------M---------------M------------ ); my @nucs = qw(t c a g); my $x = 0; ($CODONS, $TRCOL) = ({}, {}); for my $i (@nucs) { - for my $j (@nucs) { - for my $k (@nucs) { - my $codon = "$i$j$k"; - $CODONS->{$codon} = $x; - $TRCOL->{$x} = $codon; - $x++; + for my $j (@nucs) { + for my $k (@nucs) { + my $codon = "$i$j$k"; + $CODONS->{$codon} = $x; + $TRCOL->{$x} = $codon; + $x++; + } } } - } %IUPAC_DNA = Bio::Tools::IUPAC->iupac_iub(); %IUPAC_AA = Bio::Tools::IUPAC->iupac_iup(); %THREELETTERSYMBOLS = Bio::SeqUtils->valid_aa(2); @@ -316,25 +320,26 @@ sub new { Title : id Usage : $obj->id(3); $id_integer = $obj->id(); Function: Sets or returns the id of the translation table. IDs are - integers from 1 to 15, excluding 7 and 8 which have been - removed as redundant. If an invalid ID is given the method - returns 0, false. + integers from 0 (special ATG-only start) to 25, excluding + 7-8 and 17-20 which have been removed. If an invalid ID is + given the method returns 1, the standard table. Example : - Returns : value of id, a scalar, 0 if not a valid + Returns : value of id, a scalar, warn and fall back to 1 (standard table) + if specified id is not valid Args : newvalue (optional) =cut sub id{ - my ($self,$value) = @_; - if( defined $value) { - if ( !(defined $TABLES[$value-1]) or $TABLES[$value-1] eq '') { - $self->warn("Not a valid codon table ID [$value] "); - $value = 0; - } - $self->{'id'} = $value; - } - return $self->{'id'}; + my ($self,$value) = @_; + if( defined $value) { + if ( not defined $TABLES[$value] or $TABLES[$value] eq '') { + $self->warn("Not a valid codon table ID [$value], using [1] instead "); + $value = 1; + } + $self->{'id'} = $value; + } + return $self->{'id'}; } =head2 name @@ -353,7 +358,7 @@ sub name{ my ($self) = @_; my ($id) = $self->{'id'}; - return $NAMES[$id-1]; + return $NAMES[$id]; } =head2 tables @@ -372,8 +377,8 @@ sub name{ sub tables{ my %tables; - for my $id (1 .. @NAMES) { - my $name = $NAMES[$id-1]; + for my $id (0 .. $#NAMES) { + my $name = $NAMES[$id]; $tables{$id} = $name if $name; } return \%tables; @@ -424,37 +429,38 @@ sub translate { if ($seq =~ /[^actg]/ ) { #ambiguous chars for (my $i = 0; $i < (length($seq) - (CODONSIZE-1)); $i+= CODONSIZE) { my $triplet = substr($seq, $i, CODONSIZE); - if( $triplet eq $CODONGAP ) { - $protein .= $GAP; - } elsif (exists $CODONS->{$triplet}) { - $protein .= substr($TABLES[$id-1], - $CODONS->{$triplet},1); - } else { - $protein .= $self->_translate_ambiguous_codon($triplet); + if( $triplet eq $CODONGAP ) { + $protein .= $GAP; + } elsif (exists $CODONS->{$triplet}) { + $protein .= substr($TABLES[$id], + $CODONS->{$triplet},1); + } else { + $protein .= $self->_translate_ambiguous_codon($triplet); + } } - } } else { # simple, strict translation - for (my $i = 0; $i < (length($seq) - (CODONSIZE -1)); $i+=CODONSIZE) { - my $triplet = substr($seq, $i, CODONSIZE); + for (my $i = 0; $i < (length($seq) - (CODONSIZE -1)); $i+=CODONSIZE) { + my $triplet = substr($seq, $i, CODONSIZE); if( $triplet eq $CODONGAP ) { - $protein .= $GAP; - } if (exists $CODONS->{$triplet}) { - $protein .= substr($TABLES[$id-1], $CODONS->{$triplet}, 1); - } else { + $protein .= $GAP; + } + if (exists $CODONS->{$triplet}) { + $protein .= substr($TABLES[$id], $CODONS->{$triplet}, 1); + } else { $protein .= 'X'; } } } if ($partial == 2 && $complete_codon) { # 2 overhanging nucleotides - my $triplet = substr($seq, ($partial -4)). "n"; - if( $triplet eq $CODONGAP ) { - $protein .= $GAP; - } elsif (exists $CODONS->{$triplet}) { - my $aa = substr($TABLES[$id-1], $CODONS->{$triplet},1); - $protein .= $aa; - } else { - $protein .= $self->_translate_ambiguous_codon($triplet, $partial); - } + my $triplet = substr($seq, ($partial -4)). "n"; + if( $triplet eq $CODONGAP ) { + $protein .= $GAP; + } elsif (exists $CODONS->{$triplet}) { + my $aa = substr($TABLES[$id], $CODONS->{$triplet},1); + $protein .= $aa; + } else { + $protein .= $self->_translate_ambiguous_codon($triplet, $partial); + } } return $protein; } @@ -467,23 +473,23 @@ sub _translate_ambiguous_codon { my @codons = $self->unambiguous_codons($triplet); my %aas =(); foreach my $codon (@codons) { - $aas{substr($TABLES[$id-1],$CODONS->{$codon},1)} = 1; + $aas{substr($TABLES[$id],$CODONS->{$codon},1)} = 1; } my $count = scalar keys %aas; if ( $count == 1 ) { - $aa = (keys %aas)[0]; + $aa = (keys %aas)[0]; } elsif ( $count == 2 ) { - if ($aas{'D'} and $aas{'N'}) { - $aa = 'B'; - } - elsif ($aas{'E'} and $aas{'Q'}) { - $aa = 'Z'; - } else { - $partial ? ($aa = '') : ($aa = 'X'); - } + if ($aas{'D'} and $aas{'N'}) { + $aa = 'B'; + } + elsif ($aas{'E'} and $aas{'Q'}) { + $aa = 'Z'; + } else { + $partial ? ($aa = '') : ($aa = 'X'); + } } else { - $partial ? ($aa = '') : ($aa = 'X'); + $partial ? ($aa = '') : ($aa = 'X'); } return $aa; } @@ -520,7 +526,7 @@ sub translate_strict{ return 'X' unless defined $CODONS->{$value}; - return substr( $TABLES[$id-1], $CODONS->{$value}, 1 ); + return substr( $TABLES[$id], $CODONS->{$value}, 1 ); } =head2 revtranslate @@ -554,8 +560,9 @@ sub revtranslate { $value = ucfirst $value; $value = $THREELETTERSYMBOLS{$value}; } - if ( defined $value and $value =~ /$VALID_PROTEIN/ - and length($value) == 1 ) { + if ( defined $value and $value =~ /$VALID_PROTEIN/ + and length($value) == 1 + ) { my $id = $self->{'id'}; $value = uc $value; @@ -563,18 +570,18 @@ sub revtranslate { foreach my $aa (@aas) { #print $aa, " -2\n"; $aa = '\*' if $aa eq '*'; - while ($TABLES[$id-1] =~ m/$aa/g) { - my $p = pos $TABLES[$id-1]; - push (@codons, $TRCOL->{--$p}); - } + while ($TABLES[$id] =~ m/$aa/g) { + my $p = pos $TABLES[$id]; + push (@codons, $TRCOL->{--$p}); + } } } - if ($coding and uc ($coding) eq 'RNA') { - for my $i (0..$#codons) { - $codons[$i] =~ tr/t/u/; - } - } + if ($coding and uc ($coding) eq 'RNA') { + for my $i (0..$#codons) { + $codons[$i] =~ tr/t/u/; + } + } return @codons; } @@ -597,7 +604,6 @@ sub revtranslate { =cut sub reverse_translate_all { - my ($self, $obj, $cut, $threshold) = @_; ## check args are OK @@ -642,7 +648,6 @@ sub reverse_translate_all { } return $self->_make_iupac_string(\@data); - } =head2 reverse_translate_best @@ -689,7 +694,7 @@ sub reverse_translate_best { $self->throw("Input sequence contains invalid character: $aa"); } } - $str; + return $str; } =head2 is_start_codon @@ -742,7 +747,7 @@ sub _codon_is { my $id = $self->{'id'}; for my $c ( $self->unambiguous_codons($value) ) { - my $m = substr( $table->[$id-1], $CODONS->{$c}, 1 ); + my $m = substr( $table->[$id], $CODONS->{$c}, 1 ); return 0 unless $m eq $key; } return 1; @@ -818,21 +823,19 @@ sub _unambiquous_codons { sub add_table { my ($self, $name, $table, $starts) = @_; - $name ||= 'Custom'. scalar @NAMES + 1; - $starts ||= $STARTS[0]; + $name ||= 'Custom' . $#NAMES + 1; + $starts ||= $STARTS[1]; $self->throw('Suspect input!') unless length($table) == 64 and length($starts) == 64; - push @NAMES, $name; + push @NAMES, $name; push @TABLES, $table; push @STARTS, $starts; - return scalar @NAMES; - + return $#NAMES; } sub _make_iupac_string { - my ($self, $cod_ref) = @_; if(ref($cod_ref) ne 'ARRAY') { $self->throw(" I need a reference to a list of references to codons, ". @@ -855,7 +858,6 @@ sub _make_iupac_string { } } return $iupac_string; - } diff --git a/Bio/Tools/Run/RemoteBlast.pm b/Bio/Tools/Run/RemoteBlast.pm index 943ac4ddcb..23cf0e9b0f 100644 --- a/Bio/Tools/Run/RemoteBlast.pm +++ b/Bio/Tools/Run/RemoteBlast.pm @@ -161,6 +161,7 @@ use IO::String; use Bio::SearchIO; use LWP; use HTTP::Request::Common; +use Bio::Root::Version; use constant { NOT_FINISHED => 0, diff --git a/Build.PL b/Build.PL index 22786b5453..b171d21875 100644 --- a/Build.PL +++ b/Build.PL @@ -105,6 +105,9 @@ my %recommends = ( '/Bio::DB::*,Bio::Tools::Run::RemoteBlast,'. 'Bio::Tools::Analysis::Protein*,Bio::Tools::Analysis::DNA*'], + 'Inline::C' => [0.67, + 'Speeding up code like Fasta Bio::DB::Fasta'], + 'IO::Scalar' => [0, 'Deal with non-seekable filehandles/Bio::Tools::GuessSeqFormat'], @@ -191,11 +194,10 @@ my @drivers = available_drivers(); my $build = Bio::Root::Build->new( module_name => 'Bio', dist_name => 'BioPerl', - dist_version => '1.006924', + dist_version => '1.006925', dist_author => 'BioPerl Team ', dist_abstract => 'Bioinformatics Toolkit', license => 'perl', - no_index => {'x_dir' => [qw(examples/root/lib)]}, requires => { 'perl' => '5.6.1', 'IO::String' => 0, # why is this required? @@ -261,7 +263,6 @@ my $build = Bio::Root::Build->new( } }, dynamic_config => 1, - #create_makefile_pl => 'passthrough', recursive_test_files => 1, # Extra files needed for BioPerl modules @@ -273,24 +274,6 @@ my $build = Bio::Root::Build->new( my $accept = $build->args('accept'); -# how much do I hate this? Let me count the ways..... -#if (!$build->feature('EntrezGene')) { -# warn <feature('Bio::DB::GFF') || $build->feature('MySQL Tests') || $build->feature('Pg Tests') || $build->feature('SQLite Tests'); diff --git a/Changes b/Changes index 7cee667485..1bcf897016 100644 --- a/Changes +++ b/Changes @@ -17,6 +17,36 @@ Bugs and requested features list: CPAN releases are branched from 'master'. --------------------------------------------------------- +1.6.925 + + * WrapperBase quoted option values [majensen] + * Various documentation fixes and updates [bosborne] + + [Bug Fixes] + + * Fixes in Bio::Root::Build to deal with META.json/yml for CPAN indexing [cjfields] + * Bio::SeqFeature::Generic spliced_seq() bug fix [Eric Snyder, via bosborne] + * NeXML parser fixes [fjossandon] + * Bug fix for Bio::DB::SeqFeature memory adapter [lstein] + * Issue #70: CONTIG parsing in GenBank output fixed [fjossandon] + * Issue #76: Circular genome fixes with Bio::Location::Split [fjossandon] + * Issue #80: Fix lack of caching issue with Bio::DB::Taxonomy [fjossandon] + * Issue #81: Small updates to make sure possible memory leaks are detected [cjfields] + * Issue #84: EMBL format wrapping problem [nyamned] + * Issue #90: Missing entries for translation tables 24 and 25 [fjossandon] + * Issue #95: Speed up of Bio::DB::Fasta::subseq by using a compiled regex + or compiled C code (when Inline::C is installed) [rocky] + * Fix various Bio::Tools::Analysis remote server config problems [cjfields] + * Added several missing 'Data::Stag' and 'LWP::UserAgent' requirements [fjossandon] + * Added a workaround in Bio::DB::Registry to get Username in Windows [fjossandon] + * For HMMer report parsing, changed "$hsp->bits" to return 0 instead of undef + to be consistent with "$hit->bits" behaviour [fjossandon] + * Fixed a bug in HMMer3 parsing, where an homology line ending in CS or RF + aminoacids made "next_seq" confused and broke the parser [fjossandon] + * Adjusted FTLocationFactory.pm to comply with current GenBank Feature Table + Definition, so now "join(complement(C..D),complement(A..B))" is equivalent + to "complement(join(A..B,C..D))" [fjossandon] + 1.6.924 [Significant changes] diff --git a/MANIFEST.SKIP b/MANIFEST.SKIP index a0cad390b5..731f8c1484 100644 --- a/MANIFEST.SKIP +++ b/MANIFEST.SKIP @@ -70,4 +70,7 @@ # Avoid additional run-time generated things bioperl\.lisp -SeqFeature_(BDB|SQLite|Pg|mysql)\.t \ No newline at end of file +SeqFeature_(BDB|SQLite|Pg|mysql)\.t + +# Don't add these files to MANIFEST +examples/root/lib diff --git a/README.md b/README.md index 59bcb0c099..13914cb592 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ [![Build Status](https://travis-ci.org/bioperl/bioperl-live.svg?branch=master)](https://travis-ci.org/bioperl/bioperl-live) +[![Coverage Status](https://coveralls.io/repos/bioperl/bioperl-live/badge.png?branch=master)](https://coveralls.io/r/bioperl/bioperl-live?branch=master) # Getting Started diff --git a/examples/db/dbfetch b/examples/db/dbfetch index cb6d69ed56..b0dc7458ae 100755 --- a/examples/db/dbfetch +++ b/examples/db/dbfetch @@ -39,7 +39,7 @@ searches returning more than one entry per query term are not in these specs. In its default setup, type one or more EMBL accession numbers -(e.g. J00231), entry name (e.g. BUM) or sequence version into the +(e.g. J00231), entry name (e.g. HSFOS) or sequence version into the seach dialog to retieve hypertext linked enties. Note that for practical reasons only the first 50 identifiers @@ -276,7 +276,7 @@ sub print_prompt { $q->p("This page allows you to retrieve up to ". MAXIDS . " entries at the time from various up-to-date biological databases."), $q->p("For EMBL, enter an accession number (e.g. J00231) or entry name (e.g. - BUM) or a sequence version (e.g. J00231.1), or any combination of them + HSFOS) or a sequence version (e.g. J00231.1), or any combination of them separated by a non-word character into your browser's search dialog. SWALL examples are: fos_human, p53_human. For short Ensembl entries, try : AL122059, AL031002, AL031030 . diff --git a/scripts/DB/bp_biofetch_genbank_proxy.pl b/scripts/DB/bp_biofetch_genbank_proxy.pl index 05deac5289..1275c91d33 100644 --- a/scripts/DB/bp_biofetch_genbank_proxy.pl +++ b/scripts/DB/bp_biofetch_genbank_proxy.pl @@ -184,7 +184,7 @@ sub print_usage { Option : id Descr : unique database identifier(s) Type : required - Usage : db=J00231 | id=J00231+BUM + Usage : db=J00231 | id=J00231+HSFOS Arg : string The ID option should be able to process all UIDS in a database. It diff --git a/scripts/seq/bp_translate_seq.pl b/scripts/seq/bp_translate_seq.pl index 6b7194455e..98d91c3662 100644 --- a/scripts/seq/bp_translate_seq.pl +++ b/scripts/seq/bp_translate_seq.pl @@ -42,19 +42,24 @@ =head1 AUTHOR use Bio::SeqIO; use Getopt::Long; -my ($format) = 'fasta'; +my ($format,$outfile) = 'fasta'; GetOptions( - 'format:s' => \$format, - ); + 'f|format:s' => \$format, + 'o|out|outfile:s' => \$outfile, + ); my $oformat = 'fasta'; # this implicity uses the <> file stream -my $seqin = Bio::SeqIO->new( -format => $format, -file => shift); -my $seqout = Bio::SeqIO->new( -format => $oformat, -file => ">-" ); - - +my $seqin = Bio::SeqIO->new( -format => $format, -fh => \*ARGV); +my $seqout; +if( $outfile ) { + $seqout = Bio::SeqIO->new( -format => $oformat, -file => ">$outfile" ); +} else { +# defaults to writing to STDOUT + $seqout = Bio::SeqIO->new( -format => $oformat ); +} while( (my $seq = $seqin->next_seq()) ) { my $pseq = $seq->translate(); $seqout->write_seq($pseq); diff --git a/scripts/taxa/bp_taxid4species.pl b/scripts/taxa/bp_taxid4species.pl index 2fcce40ad4..eed36079c0 100644 --- a/scripts/taxa/bp_taxid4species.pl +++ b/scripts/taxa/bp_taxid4species.pl @@ -10,6 +10,7 @@ use strict; use warnings; use Getopt::Long; +use Data::Dumper; my $verbose = 0; my $plain = 0; my $help = 0; @@ -22,33 +23,59 @@ my $ua = new LWP::UserAgent(); -my $urlbase = 'http://www.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=taxonomy&term='; +my $urlbase = 'http://www.ncbi.nlm.nih.gov/entrez/eutils/'; +my $esearch = 'esearch.fcgi?db=taxonomy&usehistory=y&term='; +my $esummary = 'esummary.fcgi?db=taxonomy&query_key=QUERYKEY&WebEnv=WEBENV'; my (@organisms) = @ARGV; die("must provide valid organism") unless @organisms; my $organismstr = join(" OR ", @organisms); $organismstr =~ s/\s/\+/g; -my $response = $ua->get($urlbase.$organismstr); +# Esearch +my $response = $ua->get($urlbase . $esearch . $organismstr); my $t = XML::Twig->new(); print $response->content,"\n"if($verbose); $t->parse($response->content); my $root = $t->root; -my $list = $root->first_child('IdList'); -my @data; -foreach my $child ($list->children('Id') ) { - push @data, $child->text; - if( $plain ) { print $child->text, "\n" } +my $querykey = $root->first_child('QueryKey')->text; +my $webenv = $root->first_child('WebEnv')->text; + +# Esummary +$esummary =~ s/QUERYKEY/$querykey/; +$esummary =~ s/WEBENV/$webenv/; +$response = $ua->get($urlbase . $esummary); +$t = XML::Twig->new(); +print $response->content,"\n"if($verbose); +$t->parse($response->content); +$root = $t->root; + +# Parse XML +my %taxinfo; +foreach my $docsum ($root->children) { + foreach my $item ($docsum->children('Item')) { + if ($item->{att}{Name} eq 'ScientificName') { + my $sciname = $item->text; + $taxinfo{lc $sciname}{sciname} = $sciname; + $taxinfo{lc $sciname}{tid} = $docsum->first_child_text('Id'); + last; + } + } } -unless( $plain ) { - $list = $root->first_child('TranslationStack'); - foreach my $set ($list->children('TermSet') ) { - foreach my $term ( $set->children('Term') ) { - print "\"",$term->text(), "\", ", shift @data, "\n"; - } + +# Output in same order as given on command line +foreach my $orgn (@organisms) { + if (exists $taxinfo{lc $orgn}) { + my $tid = $taxinfo{lc $orgn}{tid}; + + if ($plain) { print $tid, "\n"; } + else { print join(", ", "'$orgn'", $tid), "\n"; } } + else { print "'$orgn' not found\n"; } } + + =head1 NAME bp_taxid4species - simple script which returns the NCBI Taxonomic id for a requested species diff --git a/t/LocalDB/Flat.t b/t/LocalDB/Flat.t index 5b688275db..25174a6c6c 100644 --- a/t/LocalDB/Flat.t +++ b/t/LocalDB/Flat.t @@ -4,13 +4,13 @@ use strict; BEGIN { - use lib '.'; - use Bio::Root::Test; + use lib '.'; + use Bio::Root::Test; - test_begin(-tests => 25, - -requires_module => 'DB_File'); + test_begin(-tests => 25, + -requires_module => 'DB_File'); - use_ok('Bio::DB::Flat'); + use_ok('Bio::DB::Flat'); } my $verbose = test_debug(); @@ -21,10 +21,10 @@ my $tmpdir = test_output_dir(); my $db = Bio::DB::Flat->new(-directory => $tmpdir, -index => 'bdb', - -dbname => 'mydb', - -format => 'fasta', - -verbose => $verbose, - -write_flag => 1 ); + -dbname => 'mydb', + -format => 'fasta', + -verbose => $verbose, + -write_flag => 1 ); ok($db); my $dir = test_input_file('AAC12660.fa'); my $result = $db->build_index(glob($dir)); @@ -39,7 +39,7 @@ undef $db; $db = Bio::DB::Flat->new(-directory => $tmpdir, -index => 'bdb', -format => 'embl', - -dbname => 'myembl', + -dbname => 'myembl', -verbose => $verbose, -write_flag => 1 ); @@ -63,12 +63,12 @@ is($seq->length,192); undef $db; $db = Bio::DB::Flat->new(-directory => $tmpdir, - -index => 'binarysearch', - -format => 'fasta', - -dbname => 'mybinfa', - -verbose => $verbose, - -write_flag => 1 - ); + -index => 'binarysearch', + -format => 'fasta', + -dbname => 'mybinfa', + -verbose => $verbose, + -write_flag => 1 + ); $dir= test_input_file('dbfa', '1.fa'); $result = $db->build_index($dir); @@ -78,30 +78,32 @@ ok($seq); is($seq->length,808); undef $db; -$db = Bio::DB::Flat->new(-directory => $tmpdir, - -index => 'binarysearch', - -format => 'swiss', - -dbname => 'mybinswiss', - -verbose => $verbose, - -write_flag => 1 - ); -$dir= test_input_file('swiss.dat'); -$result = $db->build_index($dir); - -ok($result); -$seq = $db->get_Seq_by_id('ACON_CAEEL'); -ok($seq); -is($seq->length,788); - -$seq = $db->get_Seq_by_id('ACON_CAEEL'); -ok($seq && ref($seq)); - -undef $db; - +SKIP: { + test_skip(-tests => 4, -requires_module => 'Data::Stag'); + $db = Bio::DB::Flat->new(-directory => $tmpdir, + -index => 'binarysearch', + -format => 'swiss', + -dbname => 'mybinswiss', + -verbose => $verbose, + -write_flag => 1 + ); + $dir= test_input_file('swiss.dat'); + $result = $db->build_index($dir); + + ok($result); + $seq = $db->get_Seq_by_id('ACON_CAEEL'); + ok($seq); + is($seq->length,788); + + $seq = $db->get_Seq_by_id('ACON_CAEEL'); + ok($seq && ref($seq)); + + undef $db; +} $db = Bio::DB::Flat->new(-directory => $tmpdir, -index => 'binarysearch', -format => 'fasta', - -dbname => 'myfasta', + -dbname => 'myfasta', -verbose => $verbose, -write_flag => 1 ); @@ -116,7 +118,7 @@ undef $db; $db = Bio::DB::Flat->new(-directory => $tmpdir, -index => 'bdb', -format => 'fasta', - -dbname => 'mybfasta', + -dbname => 'mybfasta', -verbose => $verbose, -write_flag => 1 ); @@ -124,6 +126,6 @@ $dir = test_input_file('tmp.fst'); $result = $db->build_index(glob($dir)); ok($result); for my $id ( qw(TEST00001 TEST00002 TEST00003 TEST00004) ) { - $seq = $db->get_Seq_by_id($id); - is($seq->length,98); + $seq = $db->get_Seq_by_id($id); + is($seq->length,98); } diff --git a/t/LocalDB/Index/Index.t b/t/LocalDB/Index/Index.t index f63740d9a4..1eec2ad5fc 100644 --- a/t/LocalDB/Index/Index.t +++ b/t/LocalDB/Index/Index.t @@ -87,23 +87,26 @@ $ind->make_index(test_input_file('test.embl')); ok ( -e "Wibbl3" || -e "Wibbl3.pag" ); is ($ind->fetch('AL031232')->length, 4870); -$ind = Bio::Index::Swissprot->new(-filename => 'Wibbl4', - -write_flag => 1); -$ind->make_index(test_input_file('roa1.swiss')); -ok ( -e "Wibbl4" || -e "Wibbl4.pag" ); -$seq = $ind->fetch('ROA1_HUMAN'); -is ($seq->display_id(), 'ROA1_HUMAN'); -$seq = $ind->fetch('P09651'); -is ($seq->display_id(), 'ROA1_HUMAN'); - -# test id_parser -$ind = Bio::Index::Swissprot->new(-filename => 'Wibbl4', - -write_flag => 1); -$ind->id_parser(\&get_id); -$ind->make_index(test_input_file('roa1.swiss')); -ok ( -e "Wibbl4" || -e "Wibbl4.pag" ); -$seq = $ind->fetch('X12671'); -is ($seq->length,371); +SKIP: { + test_skip(-tests => 5, -requires_module => 'Data::Stag'); + $ind = Bio::Index::Swissprot->new(-filename => 'Wibbl4', + -write_flag => 1); + $ind->make_index(test_input_file('roa1.swiss')); + ok ( -e "Wibbl4" || -e "Wibbl4.pag" ); + $seq = $ind->fetch('ROA1_HUMAN'); + is ($seq->display_id(), 'ROA1_HUMAN'); + $seq = $ind->fetch('P09651'); + is ($seq->display_id(), 'ROA1_HUMAN'); + + # test id_parser + $ind = Bio::Index::Swissprot->new(-filename => 'Wibbl4', + -write_flag => 1); + $ind->id_parser(\&get_id); + $ind->make_index(test_input_file('roa1.swiss')); + ok ( -e "Wibbl4" || -e "Wibbl4.pag" ); + $seq = $ind->fetch('X12671'); + is ($seq->length,371); +} my $gb_ind = Bio::Index::GenBank->new(-filename => 'Wibbl5', diff --git a/t/Perl.t b/t/Perl.t index 3d46d1a6ed..38055b23bb 100644 --- a/t/Perl.t +++ b/t/Perl.t @@ -62,10 +62,11 @@ is $trans, 'IGLGTQFVCYM'; # these now run only with BIOPERLDEBUG set SKIP: { - test_skip(-tests => 12, -requires_networking => 1); + test_skip(-tests => 12, -requires_networking => 1, -requires_module => 'LWP::UserAgent'); # swissprot SKIP: { + test_skip(-tests => 2, -requires_module => 'Data::Stag'); eval { $seq_object = get_sequence('swissprot',"ROA1_HUMAN"); }; @@ -80,7 +81,7 @@ SKIP: { # embl SKIP: { eval { - $seq_object = get_sequence('embl',"BUM"); + $seq_object = get_sequence('embl',"HSFOS"); }; if ($@) { skip("problem connecting to EMBL:$@",2); diff --git a/t/RemoteDB/BioFetch.t b/t/RemoteDB/BioFetch.t index 19169108d8..9ba007c51d 100644 --- a/t/RemoteDB/BioFetch.t +++ b/t/RemoteDB/BioFetch.t @@ -63,6 +63,7 @@ SKIP: { SKIP: { #swissprot ok $db2 = Bio::DB::BioFetch->new(-db => 'swissprot'); + test_skip(-tests => 5, -requires_module => 'Data::Stag'); eval { $seq = $db2->get_Seq_by_id('YNB3_YEAST'); }; @@ -124,13 +125,13 @@ SKIP: { # unisave SKIP: { ok $db = Bio::DB::BioFetch->new(-db => 'unisave', - -verbose => $verbose); + -verbose => $verbose ? $verbose : -1); eval { - $seq = $db->get_Seq_by_acc('LAM1_MOUSE'); + $seq = $db->get_Seq_by_acc('P14733'); }; skip($dbwarn, 4) if $@; isa_ok($seq, 'Bio::SeqI'); - is($seq->display_id, 'LAM1_MOUSE'); + is($seq->display_id, 'LMNB1_MOUSE'); is($seq->accession, 'P14733'); - is($seq->length, 587); + is($seq->length, 588); } diff --git a/t/RemoteDB/EMBL.t b/t/RemoteDB/EMBL.t index a03992f5c4..7f196f2c20 100644 --- a/t/RemoteDB/EMBL.t +++ b/t/RemoteDB/EMBL.t @@ -5,13 +5,15 @@ use strict; BEGIN { use lib '.'; - use Bio::Root::Test; - - test_begin(-tests => 16, - -requires_modules => [qw(IO::String HTTP::Request::Common)], - -requires_networking => 1); - - use_ok('Bio::DB::EMBL'); + use Bio::Root::Test; + + test_begin(-tests => 16, + -requires_modules => [qw(IO::String + LWP::UserAgent + HTTP::Request::Common)], + -requires_networking => 1); + + use_ok('Bio::DB::EMBL'); } my $verbose = test_debug(); diff --git a/t/RemoteDB/HIV/HIV.t b/t/RemoteDB/HIV/HIV.t index b1327e5c2e..04ac6c86a6 100755 --- a/t/RemoteDB/HIV/HIV.t +++ b/t/RemoteDB/HIV/HIV.t @@ -7,8 +7,10 @@ BEGIN { use Bio::Root::Test; test_begin( -tests => 30, - -requires_modules => [qw( XML::Simple HTTP::Request::Common)], - ); + -requires_modules => [qw(LWP::UserAgent + XML::Simple + HTTP::Request::Common)], + ); use_ok('Bio::DB::HIV'); use_ok('Bio::DB::WebDBSeqI'); use_ok('Bio::DB::HIV::HIVAnnotProcessor'); diff --git a/t/RemoteDB/HIV/HIVQuery.t b/t/RemoteDB/HIV/HIVQuery.t index 476359b6ca..de27e52f30 100755 --- a/t/RemoteDB/HIV/HIVQuery.t +++ b/t/RemoteDB/HIV/HIVQuery.t @@ -7,7 +7,8 @@ BEGIN { use Bio::Root::Test; test_begin( -tests => 41, - -requires_modules => [qw( XML::Simple )] + -requires_modules => [qw(LWP::UserAgent + XML::Simple )] ); use_ok('Bio::DB::Query::HIVQuery'); use_ok('Bio::DB::HIV'); diff --git a/t/RemoteDB/MeSH.t b/t/RemoteDB/MeSH.t index 10517f6ca8..07910935c1 100755 --- a/t/RemoteDB/MeSH.t +++ b/t/RemoteDB/MeSH.t @@ -4,15 +4,15 @@ use strict; BEGIN { - use lib '.'; - use Bio::Root::Test; - - test_begin(-tests => 5, - -requires_modules => [qw(IO::String - LWP::UserAgent - HTTP::Request::Common)], - -requires_networking => 1); - + use lib '.'; + use Bio::Root::Test; + + test_begin(-tests => 5, + -requires_modules => [qw(IO::String + LWP::UserAgent + HTTP::Request::Common)], + -requires_networking => 1); + use_ok('Bio::DB::MeSH'); } @@ -27,6 +27,6 @@ SKIP: { is $t->each_twig(), 2; eval {$t = $mesh->get_exact_term("Sinus Thrombosis, Intracranial");}; skip "Couldn't connect to MeSH with Bio::DB::MeSH. Skipping those tests", 2 if $@; - is $t->description, "Formation or presence of a blood clot (THROMBUS) in the CRANIAL SINUSES, large endothelium-lined venous channels situated within the SKULL. Intracranial sinuses, also called cranial venous sinuses, include the superior sagittal, cavernous, lateral, petrous sinuses, and many others. Cranial sinus thrombosis can lead to severe HEADACHE; SEIZURE; and other neurological defects."; + like $t->description, qr/Thrombus/i; is $t->id, "D012851"; } diff --git a/t/RemoteDB/SeqRead_fail.t b/t/RemoteDB/SeqRead_fail.t index c540b0a2c5..bf10434e71 100644 --- a/t/RemoteDB/SeqRead_fail.t +++ b/t/RemoteDB/SeqRead_fail.t @@ -27,13 +27,19 @@ sub fetch { my ($id, $class) = @_; print "###################### $class ####################################\n" if $verbose; my $seq; - ok defined( my $gb = $class->new('-verbose'=>$verbose, - '-delay'=>0, - '-retrievaltype' => 'tempfile') ), "defined for $class"; + ok defined( my $gb = $class->new('-verbose' => $verbose, + '-delay' => 0, + '-retrievaltype' => 'tempfile') ), "defined for $class"; + + if ($class eq 'Bio::DB::SwissProt') { + test_skip(-tests => 1, -requires_module => 'Data::Stag'); + next if $@; + } + eval { $seq = $gb->get_Seq_by_id($id) }; if ($@ || !defined $seq) { - ok 1, "error or undef for $class"; - return; + ok 1, "error or undef for $class"; + return; } ok 0, "failure for $class"; } diff --git a/t/RemoteDB/SwissProt.t b/t/RemoteDB/SwissProt.t index b5f94e33f8..3c538b295f 100755 --- a/t/RemoteDB/SwissProt.t +++ b/t/RemoteDB/SwissProt.t @@ -10,7 +10,8 @@ BEGIN { test_begin(-tests => 23, -requires_modules => [qw(IO::String LWP::UserAgent - HTTP::Request::Common)], + HTTP::Request::Common + Data::Stag)], -requires_networking => 1); use_ok('Bio::DB::SwissProt'); diff --git a/t/RemoteDB/Taxonomy.t b/t/RemoteDB/Taxonomy.t index 5dbc41e5f2..a965e54365 100644 --- a/t/RemoteDB/Taxonomy.t +++ b/t/RemoteDB/Taxonomy.t @@ -9,8 +9,9 @@ BEGIN { test_begin( -tests => 202, - -requires_modules => [ 'DB_File', - 'XML::Twig' ] + -requires_modules => [qw(DB_File + LWP::UserAgent + XML::Twig )] ); use_ok('Bio::DB::Taxonomy'); diff --git a/t/Restriction/IO.t b/t/Restriction/IO.t index db6f5bc91d..409f2597fa 100644 --- a/t/Restriction/IO.t +++ b/t/Restriction/IO.t @@ -59,8 +59,9 @@ is $renzs->each_enzyme, 16; ok $out = Bio::Restriction::IO->new(-format=>'base'); SKIP: { - test_skip(-tests => 3, -requires_networking => 1); - #test_skip(-tests => 2, -requires_module => 'LWP::UserAgent'); + test_skip(-tests => 3, + -requires_module => 'LWP::UserAgent', + -requires_networking => 1); ok $in = Bio::Restriction::IO->new(-format=>'prototype', -current => 1); diff --git a/t/SeqFeature/Generic.t b/t/SeqFeature/Generic.t index 39e747bf29..c380917ad9 100644 --- a/t/SeqFeature/Generic.t +++ b/t/SeqFeature/Generic.t @@ -7,7 +7,7 @@ BEGIN { use lib '.'; use Bio::Root::Test; - test_begin(-tests => 362); + test_begin(-tests => 364); use_ok 'Bio::Seq'; use_ok 'Bio::SeqIO'; @@ -325,3 +325,12 @@ for my $sf (@split_sfs) { is $sf->end, $end, 'End'; is $sf->length, $length, 'Expected length'; } + +# spliced_seq() on the reverse strand, bug #88 (github) +$seq = Bio::SeqIO->new( -file => test_input_file('AF222649-rc.gbk') )->next_seq; +# All should start with "ATG" +for my $feat ( $seq->get_SeqFeatures('CDS') ) { + ok $feat->spliced_seq->seq =~ /^ATG/, "Reverse strand is spliced correctly"; +} + + diff --git a/t/SeqIO/Splicedseq.t b/t/SeqIO/Splicedseq.t index 9a321380f2..a42ed0d661 100644 --- a/t/SeqIO/Splicedseq.t +++ b/t/SeqIO/Splicedseq.t @@ -7,7 +7,7 @@ BEGIN { use lib '.'; use Bio::Root::Test; - test_begin(-tests => 26); + test_begin(-tests => 27); use_ok('Bio::Seq'); use_ok('Bio::SeqIO'); @@ -80,17 +80,20 @@ my $cut_feat = Bio::SeqFeature::Generic->new(-primary_tag => 'CDS', protein_id => 'YP_718205.1', } ); $seq_obj->add_SeqFeature($cut_feat); -is $cut_feat->seq->seq, 'TTTTTAA', 'cut by origin sequence'; -is $cut_feat->start, 16, 'cut by origin start using $feat->start'; -is $cut_feat->end, 2, 'cut by origin end using $feat->end'; -is $cut_feat->location->start, 16, 'cut by origin start using $feat->location->start'; -is $cut_feat->location->end, 2, 'cut by origin end using $feat->location->end'; +is $cut_feat->seq->seq, 'TTTTTAA', 'cut by origin sequence using $feat->seq'; +is $cut_feat->spliced_seq->seq, 'TTTTTAA', 'cut by origin sequence using $feat->spliced_seq'; +is $cut_feat->start, 16, 'cut by origin start using $feat->start'; +is $cut_feat->end, 2, 'cut by origin end using $feat->end'; +is $cut_feat->location->start, 16, 'cut by origin start using $feat->location->start'; +is $cut_feat->location->end, 2, 'cut by origin end using $feat->location->end'; SKIP: { - test_skip(-tests => 3, -requires_module => 'LWP::UserAgent', -requires_networking => 1); + test_skip(-tests => 3, + -requires_modules => [qw(Bio::DB::GenBank + LWP::UserAgent )], + -requires_networking => 1); my $db_in; eval { - use Bio::DB::GenBank; ok $db_in = Bio::DB::GenBank->new(); my $seq_obj = $db_in->get_Seq_by_id('AF032048.1'); }; diff --git a/t/SeqIO/genbank.t b/t/SeqIO/genbank.t index 3c37dfc629..db6443254d 100644 --- a/t/SeqIO/genbank.t +++ b/t/SeqIO/genbank.t @@ -1,12 +1,11 @@ # -*-Perl-*- Test Harness script for Bioperl -# $Id$ use strict; BEGIN { use lib '.'; use Bio::Root::Test; - test_begin(-tests => 296); + test_begin(-tests => 301); use_ok('Bio::SeqIO::genbank'); } @@ -693,3 +692,28 @@ is $features[0]->primary_tag, 'CDS', 'Correct primary tag for feature'; @features = $seq->remove_SeqFeatures; is $#features, 9, 'Got 10 features'; +# Handle Structured Comments in COMMENT section +$seq = Bio::SeqIO->new(-format => 'genbank', + -file => test_input_file('KF527485.gbk') )->next_seq; +my $comment = ($seq->get_Annotations('comment') )[0]; +is($comment->as_text, "Comment: +##Assembly-Data-START## +Assembly Method :: Lasergene v. 10 +Sequencing Technology :: ABI37XL; Sanger dideoxy sequencing +##Assembly-Data-END##", +"Got correct Structured Comment"); + +$seq = Bio::SeqIO->new(-format => 'genbank', + -file => test_input_file('HM138502.gbk') )->next_seq; +$comment = ($seq->get_Annotations('comment') )[0]; +ok( $comment->as_text + =~ /^Comment: Swine influenza A \(H1N1\) virus isolated during human swine flu outbreak of 2009/, + "Got correct Structured Comment" +); +ok( $comment->as_text =~ /^##GISAID_EpiFlu\(TM\)Data-START##/m, + "Got correct Structured Comment" ); +ok( $comment->as_text =~ /^Subtype :: H1N1/m, + "Got correct Structured Comment" +); +ok( $comment->as_text =~ /^##GISAID_EpiFlu\(TM\)Data-END##/m, + "Got correct Structured Comment" ); diff --git a/t/SeqTools/CodonTable.t b/t/SeqTools/CodonTable.t index 4b4f06bf3f..11652a7c36 100644 --- a/t/SeqTools/CodonTable.t +++ b/t/SeqTools/CodonTable.t @@ -3,11 +3,11 @@ use strict; -BEGIN { +BEGIN { use lib '.'; use Bio::Root::Test; - test_begin(-tests => 71); + test_begin(-tests => 81); use_ok('Bio::Tools::CodonTable'); use_ok('Bio::CodonUsage::IO'); @@ -23,6 +23,17 @@ isa_ok $myCodonTable, 'Bio::Tools::CodonTable'; $myCodonTable = Bio::Tools::CodonTable->new(); is $myCodonTable->id(), 1; +# invalid table should produce a warn and set default table (1) +my $stderr = ''; +{ + # capture stderr output + local *STDERR; + open STDERR, '>', \$stderr; + $myCodonTable->id(99); +} +like $stderr, qr/Not a valid codon table ID/; +is $myCodonTable->id, 1; + # change codon table $myCodonTable->id(10); is $myCodonTable->id, 10; @@ -30,12 +41,12 @@ is $myCodonTable->name(), 'Euplotid Nuclear'; # enumerate tables as object method my $table = $myCodonTable->tables(); -cmp_ok (keys %{$table}, '>=', 17); # currently 17 known tables -is $table->{11}, q{"Bacterial"}; +cmp_ok (keys %{$table}, '>=', 19); # currently 19 known tables +is $table->{11}, 'Bacterial, Archaeal and Plant Plastid'; # enumerate tables as class method $table = Bio::Tools::CodonTable->tables; -cmp_ok (values %{$table}, '>=', 17); # currently 17 known tables +cmp_ok (values %{$table}, '>=', 19); # currently 19 known tables is $table->{23}, 'Thraustochytrium Mitochondrial'; # translate codons @@ -74,7 +85,7 @@ is $myCodonTable->translate('jj',1), ''; is $myCodonTable->translate('jjg'), 'X'; is $myCodonTable->translate('jjg',1), 'X'; -is $myCodonTable->translate('gt'), ''; +is $myCodonTable->translate('gt'), ''; is $myCodonTable->translate('gt',1), 'V'; is $myCodonTable->translate('g'), ''; @@ -89,20 +100,20 @@ ggkggyggsggvgghggdggbggxgtmgtrgtwgtkgtygtsgtvgthgtdgtbgtxtartaytcmtcrtcwt cktcytcstcvtchtcdtcbtcxtgyttrttytramgamggmgrracratrayytaytgytrsaasagsartaa; SEQ $seq =~ s/\s+//g; -@ii = grep { length == 3 } split /(.{3})/, $seq; +@ii = grep { length == 3 } split /(.{3})/, $seq; print join (' ', @ii), "\n" if( $DEBUG); my $prot = <translate($ii[$i]) ) { - $test = 0; + $test = 0; print $ii[$i], ": |", $res[$i], "| ne |", $myCodonTable->translate($ii[$i]), "| @ $i\n" if( $DEBUG); last ; @@ -110,12 +121,14 @@ for my $i (0..$#ii) { } ok $test; -# reverse translate amino acids +# reverse translate amino acids is $myCodonTable->revtranslate('U'), 0; is $myCodonTable->revtranslate('O'), 0; is $myCodonTable->revtranslate('J'), 9; is $myCodonTable->revtranslate('I'), 3; +my @RNA_codons = $myCodonTable->revtranslate('M', 'RNA'); +is $RNA_codons[0], 'aug'; # test RNA output @ii = qw(A l ACN Thr sER ter Glx); @res = ( @@ -144,10 +157,10 @@ $test = 1; } ok $test; -# boolean tests -$myCodonTable->id(1); +# boolean tests +$myCodonTable->id(1); # Standard table -ok $myCodonTable->is_start_codon('ATG'); +ok $myCodonTable->is_start_codon('ATG'); is $myCodonTable->is_start_codon('GGH'), 0; ok $myCodonTable->is_start_codon('HTG'); is $myCodonTable->is_start_codon('CCC'), 0; @@ -164,20 +177,17 @@ is $myCodonTable->is_unknown_codon('UAG'), 0; is $myCodonTable->translate_strict('ATG'), 'M'; - - # # adding a custom codon table # - my @custom_table = ( 'test1', 'FFLLSSSSYY**CC*WLLLL**PPHHQQR*RRIIIMT*TT*NKKSSRRV*VVAA*ADDEE*GGG' ); ok my $custct = $myCodonTable->add_table(@custom_table); -is $custct, 25; +is $custct, 26; is $myCodonTable->translate('atgaaraayacmacracwacka'), 'MKNTTTT'; ok $myCodonTable->id($custct); is $myCodonTable->translate('atgaaraayacmacracwacka'), 'MKXXTTT'; @@ -202,18 +212,19 @@ is $myCodonTable->reverse_translate_all($seq), 'GCBWSNNNNTTYCAYAARYTN'; # # test reverse_translate_best(), requires a Bio::CodonUsage::Table object -# +# -ok $seq = Bio::PrimarySeq->new(-seq =>'ACDEFGHIKLMNPQRSTVWY'); +ok $seq = Bio::PrimarySeq->new(-seq =>'ACDEFGHIKLMNPQRSTVWYX'); ok my $io = Bio::CodonUsage::IO->new(-file => test_input_file('MmCT')); ok my $cut = $io->next_data(); -is $myCodonTable->reverse_translate_best($seq,$cut), 'GCCTGCGACGAGTTCGGCCACATCAAGCTGATGAACCCCCAGCGCTCCACCGTGTGGTAC'; +is $myCodonTable->reverse_translate_best($seq,$cut), 'GCCTGCGACGAGTTCGGCCACATCAAGCTGATGAACCCCCAGCGCTCCACCGTGTGGTACNNN'; +is $myCodonTable->reverse_translate_all($seq, $cut, 15), 'GCNTGYGAYGARTTYGGVCAYATYAARCTSATGAAYCCNCARMGVWSYACHGTSTGGTAYNNN'; # # test 'Strict' table, requires a Bio::CodonUsage::Table object # -$myCodonTable = Bio::Tools::CodonTable->new(); +$myCodonTable = Bio::Tools::CodonTable->new(); # Default Standard table # boolean tests is $myCodonTable->is_start_codon('ATG'), 1; @@ -222,10 +233,22 @@ is $myCodonTable->is_start_codon('TTG'), 1; is $myCodonTable->is_start_codon('CTG'), 1; is $myCodonTable->is_start_codon('CCC'), 0; -$myCodonTable->id(24); +$myCodonTable->id(0); # Special 'Strict' table (ATG-only start) is $myCodonTable->is_start_codon('ATG'), 1; is $myCodonTable->is_start_codon('GTG'), 0; is $myCodonTable->is_start_codon('TTG'), 0; is $myCodonTable->is_start_codon('CTG'), 0; is $myCodonTable->is_start_codon('CCC'), 0; + +# Pterobranchia Mitochondrial codon table +$myCodonTable->id(24); +is $myCodonTable->is_start_codon('GTG'), 1; +is $myCodonTable->is_start_codon('CTG'), 1; +is $myCodonTable->translate_strict('TGA'), 'W'; + +# Candidate Division SR1 and Gracilibacteria codon table +$myCodonTable->id(25); +is $myCodonTable->is_start_codon('GTG'), 1; +is $myCodonTable->is_start_codon('CTG'), 0; +is $myCodonTable->translate_strict('TGA'), 'G'; diff --git a/t/Species.t b/t/Species.t index ce94e9ed8f..f5f8e64a5b 100644 --- a/t/Species.t +++ b/t/Species.t @@ -54,7 +54,9 @@ is $species->rank, 'species'; # We can make a species object from just an id an db handle SKIP: { - test_skip(-tests => 5, -requires_networking => 1); + test_skip(-tests => 5, + -requires_module => 'LWP::UserAgent', + -requires_networking => 1); $species = Bio::Species->new(-id => 51351); my $taxdb = Bio::DB::Taxonomy->new(-source => 'entrez'); diff --git a/t/Tools/Analysis/Protein/ELM.t b/t/Tools/Analysis/Protein/ELM.t index 279a5e663a..1cb53a9f17 100755 --- a/t/Tools/Analysis/Protein/ELM.t +++ b/t/Tools/Analysis/Protein/ELM.t @@ -2,7 +2,6 @@ # $Id$ use strict; - BEGIN { use lib '.'; use Bio::Root::Test; @@ -41,5 +40,6 @@ exit if $tool->status eq 'TERMINATED_BY_ERROR'; ok my $raw = $tool->result(''); print $raw if $verbose; ok my $parsed = $tool->result('parsed'); -is $parsed->{'CLV_NDR_NDR_1'}{'locus'}[0], '54-56'; + +is $parsed->{'CLV_NRD_NRD_1'}{'locus'}[0], '54-56'; ok my @res = $tool->result('Bio::SeqFeatureI'); diff --git a/t/Tools/Analysis/Protein/GOR4.t b/t/Tools/Analysis/Protein/GOR4.t index 3df9ab412f..2fa1e5b03f 100644 --- a/t/Tools/Analysis/Protein/GOR4.t +++ b/t/Tools/Analysis/Protein/GOR4.t @@ -4,15 +4,16 @@ use strict; BEGIN { - use lib '.'; + use lib '.'; use Bio::Root::Test; - test_begin(-tests => 13, - -requires_modules => [qw(IO::String LWP::UserAgent)], - -requires_networking => 1); - - use_ok("Bio::Seq"); - use_ok("Bio::Tools::Analysis::Protein::GOR4"); + test_begin(-tests => 0, + -requires_modules => [qw(IO::String + LWP::UserAgent)], + -requires_networking => 1); + + use_ok("Bio::Seq"); + use_ok("Bio::Tools::Analysis::Protein::GOR4"); } my $seq = Bio::Seq->new(-seq => 'MSADQRWRQDSQDSFGDSFDGDPPPPPPPPFGDSFGDGFSDRSRQDQRS', @@ -21,22 +22,23 @@ ok my $tool = Bio::Tools::Analysis::Protein::GOR4->new(-seq=>$seq->primary_seq); SKIP: { ok $tool->run(); - skip "Skipping tests since we got terminated by a server error", 9 if $tool->status eq 'TERMINATED_BY_ERROR'; - ok my $raw = $tool->result(''); + skip "Skipping tests since we got terminated by a server error", 9 if $tool->status eq 'TERMINATED_BY_ERROR'; + ok my $raw = $tool->result(''); ok my $parsed = $tool->result('parsed'); - is $parsed->[0]{'coil'}, '999'; + + is $parsed->[0]{'coil'}, '56'; my @res = sort {$a->start <=> $b->start} $tool->result('Bio::SeqFeatureI'); if (scalar @res > 0) { - ok 1; + ok 1; } - else { - skip 'No results - could not connect to GOR4 server?', 6; + else { + skip 'No results - could not connect to GOR4 server?', 6; } - is $res[0]->start, 1; - is $res[0]->end, 43; + is $res[0]->start, 1; + is $res[0]->end, 7; ok my $meta = $tool->result('meta'); - test_skip(-tests => 2, -requires_module => 'Bio::Seq::Meta::Array'); - is $meta->named_submeta_text('GOR4_coil',1,2), '999 999'; - is $meta->seq, 'MSADQRWRQDSQDSFGDSFDGDPPPPPPPPFGDSFGDGFSDRSRQDQRS'; + test_skip(-tests => 2, -requires_module => 'Bio::Seq::Meta::Array'); + is $meta->named_submeta_text('GOR4_coil',1,2), '56 195'; + is $meta->seq, 'MSADQRWRQDSQDSFGDSFDGDPPPPPPPPFGDSFGDGFSDRSRQDQRS'; } diff --git a/t/Tools/Analysis/Protein/HNN.t b/t/Tools/Analysis/Protein/HNN.t index 108faf0224..03c4ed4287 100644 --- a/t/Tools/Analysis/Protein/HNN.t +++ b/t/Tools/Analysis/Protein/HNN.t @@ -4,15 +4,16 @@ use strict; BEGIN { - use lib '.'; + use lib '.'; use Bio::Root::Test; - - test_begin(-tests => 14, - -requires_modules => [qw(IO::String LWP::UserAgent)], - -requires_networking => 1); - - use_ok("Bio::Seq"); - use_ok("Bio::Tools::Analysis::Protein::HNN"); + + test_begin(-tests => 0, + -requires_modules => [qw(IO::String + LWP::UserAgent)], + -requires_networking => 1); + + use_ok("Bio::Seq"); + use_ok("Bio::Tools::Analysis::Protein::HNN"); } my $seq = Bio::Seq->new(-seq => 'MSADQRWRQDSQDSFGDSFDGDPPPPPPPPFGDSFGDGFSDRSRQDQRS', @@ -20,26 +21,26 @@ my $seq = Bio::Seq->new(-seq => 'MSADQRWRQDSQDSFGDSFDGDPPPPPPPPFGDSFGDGFSDRSRQDQ ok my $tool = Bio::Tools::Analysis::Protein::HNN->new(-seq=>$seq->primary_seq); SKIP: { - ok $tool->run(); - skip "Skipping tests since we got terminated by a server error", 9 if $tool->status eq 'TERMINATED_BY_ERROR'; - ok my $raw = $tool->result(''); - ok my $parsed = $tool->result('parsed'); - is $parsed->[0]{'coil'}, '1000'; - my @res = $tool->result('Bio::SeqFeatureI'); - if (scalar @res > 0) { - ok 1; - } - else { - skip 'No results - could not connect to HNN server?', 6; - } - - ok my $meta = $tool->result('meta'); - ok my $seqobj = Bio::Seq->new(-primary_seq => $meta, display_id=>"a"); - for ( $tool->result('Bio::SeqFeatureI') ) { - ok $seqobj->add_SeqFeature($_); - } - - test_skip(-tests => 2, -requires_module => 'Bio::Seq::Meta::Array'); - is $meta->named_submeta_text('HNN_helix',1,2), '0 111'; - is $meta->seq, 'MSADQRWRQDSQDSFGDSFDGDPPPPPPPPFGDSFGDGFSDRSRQDQRS'; + ok $tool->run(); + skip "Skipping tests since we got terminated by a server error", 9 if $tool->status eq 'TERMINATED_BY_ERROR'; + ok my $raw = $tool->result(''); + ok my $parsed = $tool->result('parsed'); + is $parsed->[0]{'coil'}, '1000'; + my @res = $tool->result('Bio::SeqFeatureI'); + if (scalar @res > 0) { + ok 1; + } + else { + skip 'No results - could not connect to HNN server?', 6; + } + + ok my $meta = $tool->result('meta'); + ok my $seqobj = Bio::Seq->new(-primary_seq => $meta, display_id=>"a"); + for ( $tool->result('Bio::SeqFeatureI') ) { + ok $seqobj->add_SeqFeature($_); + } + + test_skip(-tests => 2, -requires_module => 'Bio::Seq::Meta::Array'); + is $meta->named_submeta_text('HNN_helix',1,2), '0 111'; + is $meta->seq, 'MSADQRWRQDSQDSFGDSFDGDPPPPPPPPFGDSFGDGFSDRSRQDQRS'; } diff --git a/t/Tools/Analysis/Protein/Sopma.t b/t/Tools/Analysis/Protein/Sopma.t index 5062e0c93a..54c8c7a720 100644 --- a/t/Tools/Analysis/Protein/Sopma.t +++ b/t/Tools/Analysis/Protein/Sopma.t @@ -4,14 +4,16 @@ use strict; BEGIN { - use lib '.'; + use lib '.'; use Bio::Root::Test; - - test_begin(-tests => 16, - -requires_modules => [qw(IO::String LWP::UserAgent)]); - - use_ok('Bio::PrimarySeq'); - use_ok('Bio::Tools::Analysis::Protein::Sopma'); + + test_begin(-tests => 0, + -requires_modules => [qw(IO::String + LWP::UserAgent)], + -requires_networking => 1); + + use_ok('Bio::PrimarySeq'); + use_ok('Bio::Tools::Analysis::Protein::Sopma'); } my $verbose = test_debug(); @@ -19,32 +21,31 @@ my $verbose = test_debug(); ok my $tool = Bio::WebAgent->new(-verbose =>$verbose); my $seq = Bio::PrimarySeq->new( - -seq => 'MSADQRWRQDSQDSFGDSFDGDPPPPPPPPFGDSFGDGFSDRSRQDQRS', - -display_id => 'test2'); + -seq => 'MSADQRWRQDSQDSFGDSFDGDPPPPPPPPFGDSFGDGFSDRSRQDQRS', + -display_id => 'test2' +); ok $tool = Bio::Tools::Analysis::Protein::Sopma->new( -seq=>$seq, - #-verbose => $verbose, + #-verbose => $verbose, -window_width => 15); SKIP: { - test_skip(-tests => 12, -requires_networking => 1); - - ok $tool->run(); - skip "Tool was terminated by some error: problem connecting to server?", 11 if $tool->status eq 'TERMINATED_BY_ERROR'; - - ok my $raw = $tool->result(''); - ok my $parsed = $tool->result('parsed'); - is ($parsed->[0]{'helix'}, '102'); - ok my @res = $tool->result('Bio::SeqFeatureI'); - ok my $meta = $tool->result('meta', "ww15"); - - ok $tool->window_width(21); - ok $tool->clear(); - ok $tool->run; - ok my $meta2 = $tool->result('meta', "ww21"); - - SKIP: { - test_skip(-tests => 2, -requires_module => 'Bio::Seq::Meta::Array'); - is $meta->named_submeta_text('Sopma_helix|ww15',1,2), '102 195'; - is $meta->seq, 'MSADQRWRQDSQDSFGDSFDGDPPPPPPPPFGDSFGDGFSDRSRQDQRS'; - } + ok $tool->run(); + skip "Tool was terminated by some error: problem connecting to server?", 11 if $tool->status eq 'TERMINATED_BY_ERROR'; + + ok my $raw = $tool->result(''); + ok my $parsed = $tool->result('parsed'); + is ($parsed->[0]{'helix'}, '102'); + ok my @res = $tool->result('Bio::SeqFeatureI'); + ok my $meta = $tool->result('meta', "ww15"); + + ok $tool->window_width(21); + ok $tool->clear(); + ok $tool->run; + ok my $meta2 = $tool->result('meta', "ww21"); + + SKIP: { + test_skip(-tests => 2, -requires_module => 'Bio::Seq::Meta::Array'); + is $meta->named_submeta_text('Sopma_helix|ww15',1,2), '102 195'; + is $meta->seq, 'MSADQRWRQDSQDSFGDSFDGDPPPPPPPPFGDSFGDGFSDRSRQDQRS'; + } } diff --git a/t/Tools/Run/RemoteBlast.t b/t/Tools/Run/RemoteBlast.t index ce0967710e..baed5b0c06 100644 --- a/t/Tools/Run/RemoteBlast.t +++ b/t/Tools/Run/RemoteBlast.t @@ -9,7 +9,6 @@ BEGIN { test_begin(-tests => 21, -requires_modules => [qw(IO::String - LWP LWP::UserAgent)], -requires_networking => 1); diff --git a/t/Tools/Run/RemoteBlast_rpsblast.t b/t/Tools/Run/RemoteBlast_rpsblast.t index f4f66832ae..9ae95c9494 100644 --- a/t/Tools/Run/RemoteBlast_rpsblast.t +++ b/t/Tools/Run/RemoteBlast_rpsblast.t @@ -16,7 +16,6 @@ BEGIN { test_begin(-tests => 7, -requires_modules => [qw(IO::String - LWP LWP::UserAgent)], -requires_networking => 1); @@ -30,7 +29,7 @@ ok( -e $inputfilename); my $remote_rpsblast = Bio::Tools::Run::RemoteBlast->new(-verbose => test_debug(), -prog => 'blastp', - -data => 'cdsearch/cdd', + -data => 'cdd', -readmethod => 'blasttable', -expect => '1e-10', ); @@ -76,6 +75,7 @@ SKIP: { $remote_rpsblast->remove_rid($rids[0]); my $count = 0; isa_ok($rc, 'Bio::SearchIO'); + while (my $result = $rc->next_result) { while ( my $hit = $result->next_hit ) { $count++; diff --git a/t/Tree/TreeIO/nexml.t b/t/Tree/TreeIO/nexml.t index 10eb1afdc7..5cbae48114 100644 --- a/t/Tree/TreeIO/nexml.t +++ b/t/Tree/TreeIO/nexml.t @@ -4,7 +4,7 @@ use strict; use Bio::Root::Test; test_begin( -tests => 46, - -requires_modules => [qw(Bio::Phylo)]); + -requires_modules => [qw(Bio::Phylo XML::Twig)]); use_ok( 'Bio::Tree::Tree' ); use_ok( 'Bio::TreeIO' ); diff --git a/t/Tree/TreeIO/nhx.t b/t/Tree/TreeIO/nhx.t index a95307c703..73ed689d6f 100644 --- a/t/Tree/TreeIO/nhx.t +++ b/t/Tree/TreeIO/nhx.t @@ -13,6 +13,8 @@ BEGIN { } my $verbose = 0; #test_debug(); +my $nl = qr/\n/; +my $cr = qr/\r/; my $treeio = Bio::TreeIO->new( -format => 'nhx', @@ -85,8 +87,8 @@ sub read_file { binmode $IN; $string = <$IN>; close $IN; - $string =~ s/\n//g; - $string =~ s/\r//g; # For files with Windows line-endings + $string =~ s/$nl//g; + $string =~ s/$cr//g; # For files with Windows line-endings #print STDERR "STR: $string\n"; return $string; } diff --git a/t/data/AF222649-rc.gbk b/t/data/AF222649-rc.gbk new file mode 100644 index 0000000000..8db5cdb217 --- /dev/null +++ b/t/data/AF222649-rc.gbk @@ -0,0 +1,91 @@ +LOCUS AF222649rc 1659 bp RNA linear VRL 10-JAN-2001 +DEFINITION Influenza A virus (A/Quail/Hong Kong/NT28/99(H9N2)) segment 3 PA + (PA) gene, partial cds. +ACCESSION AF222649rc +VERSION AF222649rc.1 +KEYWORDS . +SOURCE Influenza A virus (A/Quail/Hong Kong/NT28/99(H9N2)) + ORGANISM Influenza A virus (A/Quail/Hong Kong/NT28/99(H9N2)) + Viruses; ssRNA negative-strand viruses; Orthomyxoviridae; + Influenzavirus A. +REFERENCE 1 (bases 1 to 1659) + AUTHORS Guan,Y., Shortridge,K.F., Krauss,S., Chin,P.S., Dyrting,K.C., + Ellis,T.M., Webster,R.G. and Peiris,M. + TITLE H9N2 influenza viruses possessing H5N1-like internal genomes + continue to circulate in poultry in southeastern China + JOURNAL J. Virol. 74 (20), 9372-9380 (2000) + PUBMED 11000205 +REFERENCE 2 (bases 1 to 1659) + AUTHORS Guan,Y., Krauss,S., Webster,R.G. and Shortridge,K.F. + TITLE Direct Submission + JOURNAL Submitted (10-JAN-2000) Virology and Molecular Biology, St. Jude + Children's Research Hospital, 332 N. Lauderdale, Memphis, TN 38105, + USA +FEATURES Location/Qualifiers + source complement(1..1659) + /organism="Influenza A virus (A/Quail/Hong + Kong/NT28/99(H9N2))" + /mol_type="genomic RNA" + /strain="A/Quail/Hong Kong/NT28/99(H9N2)" + /db_xref="taxon:109241" + /segment="3" + gene complement(1..1653) + /gene="PA" + CDS complement(1..1653) + /gene="PA" + /codon_start=1 + /product="PA" + /protein_id="AAG48207.1" + /db_xref="GI:12060686" + /translation="MEDFVRQCFSPMIVELAEKAMKEYGEDPKIETNKFAAICTHLEV + CFMYSDFHFIDERGESIIVEFGDPNVLLKHRFEIIEGRDRTMAWTVVNSICNTTGVEK + PKYLPDLYDYKENRFIEIGVTRREVHIYYLEKANKIKSERTHIHIFSFTGEEMATKAD + YTLDEESRARIKTRLFTIRQELASRGLWDSFRQSERGEETIEERFEITGTMRRLADQS + LPPNFSSLENFRAYVDGFEPNGCIEGKLSQMSKEVTARIEPFLKTTPRPLKLPDGPPC + SQRSKFLLMDALKLSIEDPSHEGEGIPLYDAIKCMKTFFGWKEPKIIKPHEKGINPNY + LLAWKQVLAELQDIENDEKIPKTKNMKKTSQLKWALGENMAPEKVDFEDCKDVGDLKQ + YDSDEPELKSLASWIQSEFNKACELTDSSWVELDEIGEDVAPIEHIASMRRNYFTAEV + SHCRATEYIMKGVYINTALLNASCAAMDDFQLIPMVSKCRTKEGRRKTNLYGFIIKGR + SHLRNDTDVVNFVSMEFSLTDPRLEPHKWEKYCVLEVGDMLLR" + misc_feature complement(4..1593) + /gene="PA" + /note="Influenza RNA-dependent RNA polymerase subunit PA; + Region: Flu_PA; pfam00603" + /db_xref="CDD:144261" + gene complement(join(1081..1653,894..1079)) + /gene="PA-X" + CDS complement(join(1081..1653,894..1079)) + /gene="PA-X" + /product="PA-X" + /codon_start=1 +BASE COUNT 374 a 412 c 312 g 561 t +ORIGIN + 1 tctcaggagc atgtccccta cttcaagaac acagtacttc tcccacttgt gtggctccaa + 61 tctcgggtct gtaagagaaa attccatgct cacaaagttt accacatcag tatcattcct + 121 caaatgggac cttcccttga taatgaaccc atacaggttt gttttccgtc tcccttcttt + 181 tgttctgcat ttgcttacca ttgggatcaa ttggaagtca tccatggctg cacaagatgc + 241 attgagcaac gctgtattga tatacactcc cttcattata tactcagtag cccggcaatg + 301 tgacacttct gctgtgaagt aattccttct catactcgca atgtgctcga ttggggcaac + 361 atcttctcct atttcatcaa gttctaccca gcttgaatcg gtcagttcac atgccttgtt + 421 aaattcactc tggatccaac ttgctagcga tttgagctct ggttcatcac tatcatattg + 481 tttcaggtca ccaacgtctt tgcagtcctc aaagtccact ttctccggtg ccatattctc + 541 acctaatgcc cattttaatt ggcttgtttt cttcatgttc ttcgtttttg ggatcttttc + 601 atcattttca atgtcctgga gctctgccag cacctgcttc caagctaaga gataatttgg + 661 gtttataccc ttctcatgtg gtttgataat tttgggctct ttccagccga aaaatgtttt + 721 catgcatttg atcgcatcat acagcggtat cccctctccc tcatgactcg ggtcctcaat + 781 gcttaatttc aaagcatcca tcagtaagaa ttttgacctt tgggaacagg gaggtccatc + 841 aggcaatttt agaggacgtg gtgttgtttt aagaaagggc tcaattctgg ccgttacctc + 901 ttttgacatc tgagaaagct tgccctcaat gcagccgttc ggttcgaatc catccacata + 961 ggctctaaag ttttcaaggc tggagaaatt cggtgggaga ctttggtcgg caagcctgcg + 1021 cattgttcct gtgatttcaa atctttcttc aattgtctct tcgcctctct cggactgacg + 1081 aaaggaatcc catagacccc tgctagccag ttcctgtctt atggtgaaca gcctagtttt + 1141 gatccttgcc ctgctctctt catcaagggt atagtccgcc ttggtggcca tttcttcccc + 1201 agtaaatgaa aagatgtgaa tgtgtgttct ctcggatttt atcttgttgg ccttttctag + 1261 atagtatatg tgcacttccc tccgtgtcac tccaatctca atgaatcgat tctccttgta + 1321 atcatacaga tctggaaggt atttaggttt ttcgactccc gtagtgttac agatgctatt + 1381 cactactgtc catgccattg ttcggtctct tccttcaatt atttcaaatc ggtgtttcaa + 1441 taacacattt ggatcaccaa attccacaat tattgattcg cctctttcgt caatgaagtg + 1501 gaaatctgaa tacatgaagc agacttccag atgagtgcat attgccgcaa acttgttcgt + 1561 ttcgattttc gggtcttccc cgtactcttt cattgccttt tccgcaagct cgacgatcat + 1621 tgggctgaag cattgtcgca caaagtcttc cattttgaa +// diff --git a/t/data/HM138502.gbk b/t/data/HM138502.gbk new file mode 100644 index 0000000000..3a1a8bf6eb --- /dev/null +++ b/t/data/HM138502.gbk @@ -0,0 +1,79 @@ +LOCUS HM138502 1410 bp cRNA linear VRL 22-APR-2010 +DEFINITION Influenza A virus (A/California/07/2009(H1N1)) segment 6 + neuraminidase (NA) gene, complete cds. +ACCESSION HM138502 +VERSION HM138502.1 GI:295002854 +DBLINK BioProject: PRJNA37813 +KEYWORDS . +SOURCE Influenza A virus (A/California/07/2009(H1N1)) + ORGANISM Influenza A virus (A/California/07/2009(H1N1)) + Viruses; ssRNA negative-strand viruses; Orthomyxoviridae; + Influenzavirus A. +REFERENCE 1 (bases 1 to 1410) + AUTHORS Starick,E. + TITLE Direct Submission + JOURNAL Submitted (21-APR-2010) Friedrich-Loeffler-Institut, Suedufer 10, + Greifswald-Insel Riems 17493, Germany +COMMENT Swine influenza A (H1N1) virus isolated during human swine flu + outbreak of 2009. + + ##GISAID_EpiFlu(TM)Data-START## + Isolate :: A/California/07/09 + Subtype :: H1N1 + Lineage :: swl + ##GISAID_EpiFlu(TM)Data-END## +FEATURES Location/Qualifiers + source 1..1410 + /organism="Influenza A virus (A/California/07/2009(H1N1))" + /mol_type="viral cRNA" + /strain="A/California/07/2009" + /serotype="H1N1" + /host="Homo sapiens" + /db_xref="taxon:641809" + /segment="6" + /country="USA" + /collection_date="09-Apr-2009" + /note="lineage: swl" + gene 1..1410 + /gene="NA" + CDS 1..1410 + /gene="NA" + /codon_start=1 + /product="neuraminidase" + /protein_id="ADF58339.1" + /db_xref="GI:295002855" + /translation="MNPNQKIITIGSVCMTIGMANLILQIGNIISIWISHSIQLGNQN + QIETCNQSVITYENNTWVNQTYVNISNTNFAAGQSVVSVKLAGNSSLCPVSGWAIYSK + DNSVRIGSKGDVFVIREPFISCSPLECRTFFLTQGALLNDKHSNGTIKDRSPYRTLMS + CPIGEVPSPYNSRFESVAWSASACHDGINWLTIGISGPDNGAVAVLKYNGIITDTIKS + WRNNILRTQESECACVNGSCFTVMTDGPSNGQASYKIFRIEKGKIVKSVEMNAPNYHY + EECSCYPDSSEITCVCRDNWHGSNRPWVSFNQNLEYQIGYICSGIFGDNPRPNDKTGS + CGPVSSNGANGVKGFSFKYGNGVWIGRTKSISSRNGFEMIWDPNGWTGTDNNFSIKQD + IVGINEWSGYSGSFVQHPELTGLDCIRPCFWVELIRGRPKENTIWTSGSSISFCGVNS + DTVGWSWPDGAELPFTIDK" +ORIGIN + 1 atgaatccaa accaaaagat aataaccatt ggttcggtct gtatgacaat tggaatggct + 61 aacttaatat tacaaattgg aaacataatc tcaatatgga ttagccactc aattcaactt + 121 gggaatcaaa atcagattga aacatgcaat caaagcgtca ttacttatga aaacaacact + 181 tgggtaaatc agacatatgt taacatcagc aacaccaact ttgctgctgg acagtcagtg + 241 gtttccgtga aattagcagg caattcctct ctctgccctg ttagtggatg ggctatatac + 301 agtaaagaca acagtgtaag aatcggttcc aagggggatg tgtttgtcat aagggaacca + 361 ttcatatcat gctccccctt ggaatgcaga accttcttct tgactcaagg ggccttgcta + 421 aatgacaaac attccaatgg aaccattaaa gacaggagcc catatcgaac cctaatgagc + 481 tgtcctattg gtgaagttcc ctctccatac aactcaagat ttgagtcagt cgcttggtca + 541 gcaagtgctt gtcatgatgg catcaattgg ctaacaattg gaatttctgg cccagacaat + 601 ggggcagtgg ctgtgttaaa gtacaacggc ataataacag acactatcaa gagttggaga + 661 aacaatatat tgagaacaca agagtctgaa tgtgcatgtg taaatggttc ttgctttact + 721 gtaatgaccg atggaccaag taatggacag gcctcataca agatcttcag aatagaaaag + 781 ggaaagatag tcaaatcagt cgaaatgaat gcccctaatt atcactatga ggaatgctcc + 841 tgttatcctg attctagtga aatcacatgt gtgtgcaggg ataactggca tggctcgaat + 901 cgaccgtggg tgtctttcaa ccagaatctg gaatatcaga taggatacat atgcagtggg + 961 attttcggag acaatccacg ccctaatgat aagacaggca gttgtggtcc agtatcgtct + 1021 aatggagcaa atggagtaaa agggttttca ttcaaatacg gcaatggtgt ttggataggg + 1081 agaactaaaa gcattagttc aagaaacggt tttgagatga tttgggatcc gaacggatgg + 1141 actgggacag acaataactt ctcaataaag caagatatcg taggaataaa tgagtggtca + 1201 ggatatagcg ggagttttgt tcagcatcca gaactaacag ggctggattg tataagacct + 1261 tgcttctggg ttgaactaat cagagggcga cccaaagaga acacaatctg gactagcggg + 1321 agcagcatat ccttttgtgg tgtaaacagt gacactgtgg gttggtcttg gccagacggt + 1381 gctgagttgc catttaccat tgacaagtaa +// diff --git a/t/data/KF527485.gbk b/t/data/KF527485.gbk new file mode 100644 index 0000000000..c64eb23638 --- /dev/null +++ b/t/data/KF527485.gbk @@ -0,0 +1,82 @@ +LOCUS KF527485 1444 bp cRNA linear VRL 02-OCT-2013 +DEFINITION Influenza A virus (A/California/07/2009(H1N1)) segment 5 + nucleocapsid protein (NP) gene, partial cds. +ACCESSION KF527485 +VERSION KF527485.1 GI:537446968 +KEYWORDS . +SOURCE Influenza A virus (A/California/07/2009(H1N1)) + ORGANISM Influenza A virus (A/California/07/2009(H1N1)) + Viruses; ssRNA negative-strand viruses; Orthomyxoviridae; + Influenzavirus A. +REFERENCE 1 (bases 1 to 1444) + AUTHORS Le,V.L., Courtney,C.L., Steel,J. and Compans,R.W. + TITLE Closely Related Influenza Viruses Induce Contrasting Respiratory + Tract Immunopathology + JOURNAL PLoS ONE 8 (9), E76708 (2013) + PUBMED 24086762 + REMARK Publication Status: Online-Only +REFERENCE 2 (bases 1 to 1444) + AUTHORS Le,V.L., Courtney,C.L., Steel,J. and Compans,R.W. + TITLE Direct Submission + JOURNAL Submitted (07-AUG-2013) Microbiology and Immunology, Emory + University, 1518 Clifton Rd Suite 5000, Atlanta, GA 30322, USA +COMMENT ##Assembly-Data-START## + Assembly Method :: Lasergene v. 10 + Sequencing Technology :: ABI37XL; Sanger dideoxy sequencing + ##Assembly-Data-END## +FEATURES Location/Qualifiers + source 1..1444 + /organism="Influenza A virus (A/California/07/2009(H1N1))" + /mol_type="viral cRNA" + /strain="A/California/07/2009" + /serotype="H1N1" + /host="Homo sapiens" + /db_xref="taxon:641809" + /segment="5" + /country="USA" + /collection_date="09-Apr-2009" + /note="passage details: MDCK 6" + gene <1..>1444 + /gene="NP" + CDS <1..>1444 + /gene="NP" + /codon_start=2 + /product="nucleocapsid protein" + /protein_id="AGU69828.1" + /db_xref="GI:537446969" + /translation="ASQGTKRSYEQMETGGERQDATEIRASVGRMIGGIGRFYIQMCT + ELKLSDYDGRLIQNSITIERMVLSAFDERRNKYLEEHPSAGKDPKKTGGPIYRRVGGK + WMRELILYDKEEIRRVWRQANNGEDATAGLTHIMIWHSNLNDATYQRTRALVRTGMDP + RMCSLMQGSTLPRRSGAAGAAVKGVGTIAMELIRMIKRGINDRNFWRGENGRRTRVAY + ERMCNILKGKFQTAAQRAMMDQVRESRNPGNAEIEDLIFLARSALILRGSVAHKSCLP + ACVYGLAVASGHDFEREGYSLVGIDPFKLLQNSQVVSLMRPNENPAHKSQLVWMACHS + AAFEDLRVSSFIRGKKVIPRGKLSTRGVQIASNENVETMDSNTLELRSRYWAIRTRSG + GNTNQQKASAGQISVQPTFSVQRNLPFERATVMAAFSGNNEGRTSDMRTEVIRMMESA + KPEDLSFQGRGVFELSDEKATNPIVPSFDMS" +ORIGIN + 1 ggcgtctcaa ggcaccaaac gatcatatga acaaatggag actggtgggg agcgccagga + 61 tgccacagaa atcagagcat ctgtcggaag aatgattggt ggaatcggga gattctacat + 121 ccaaatgtgc actgaactca aactcagtga ttatgatgga cgactaatcc agaatagcat + 181 aacaatagag aggatggtgc tttctgcttt tgatgagaga agaaataaat acctagaaga + 241 gcatcccagt gctgggaagg accctaagaa aacaggagga cccatatata gaagagtagg + 301 cggaaagtgg atgagagaac tcatccttta tgacaaagaa gaaataagga gagtttggcg + 361 ccaagcaaac aatggcgaag atgcaacagc aggtcttact catatcatga tttggcattc + 421 caacctgaat gatgccacat atcagagaac aagagcgctt gttcgcaccg gaatggatcc + 481 cagaatgtgc tctctaatgc aaggttcaac acttcccaga aggtctggtg ccgcaggtgc + 541 tgcggtgaaa ggagttggaa caatagcaat ggagttaatc agaatgatca aacgtggaat + 601 caatgaccga aatttctgga ggggtgaaaa tggacgaagg acaagggttg cttatgaaag + 661 aatgtgcaat atcctcaaag gaaaatttca aacagctgcc cagagggcaa tgatggatca + 721 agtaagagaa agtcgaaacc caggaaacgc tgagattgaa gacctcattt tcctggcacg + 781 gtcagcactc attctgaggg gatcagttgc acataaatcc tgcctgcctg cttgtgtgta + 841 tgggcttgca gtagcaagtg ggcatgactt tgaaagggaa gggtactcac tggtcgggat + 901 agacccattc aaattactcc aaaacagcca agtggtcagc ctgatgagac caaatgaaaa + 961 cccagctcac aagagtcaat tggtgtggat ggcatgccac tctgctgcat ttgaagattt + 1021 aagagtatca agtttcataa gaggaaagaa agtgattcca agaggaaagc tttccacaag + 1081 aggggtccag attgcttcaa atgagaatgt ggaaaccatg gactccaata ccctggaact + 1141 gagaagcaga tactgggcca taaggaccag gagtggagga aataccaatc aacaaaaggc + 1201 atccgcaggc cagatcagtg tgcagcctac attctcagtg cagcggaatc tcccttttga + 1261 aagagcaacc gttatggcag cattcagcgg gaacaatgaa ggacggacat ccgacatgcg + 1321 aacagaagtt ataagaatga tggaaagtgc aaagccagaa gatttgtcct tccaggggcg + 1381 gggagtcttc gagctctcgg acgaaaaggc aacgaacccg atcgtgcctt cctttgacat + 1441 gagt +// \ No newline at end of file