Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Bug 3415

  • Loading branch information...
commit 74288446602db5933c89f04e4718a5ddc5209d5d 1 parent 0e0ddd3
Brian Osborne bosborne authored
Showing with 166 additions and 142 deletions.
  1. +3 −3 Bio/AlignIO/meme.pm
  2. +163 −139 Bio/SearchIO/cross_match.pm
6 Bio/AlignIO/meme.pm
View
@@ -139,11 +139,11 @@ sub next_aln {
}
# The first regexp is for version 3, the second is for version 4
elsif ( $line =~ /^(\S+)\s+([+-]?)\s+(\d+)\s+
- \S+\s+[.ACTGNX\-]*\s+([ACTGNX\-]+)\s+
- ([.ACTGNX\-]*)/xi
+ \S+\s+[.A-Z\-]*\s+([A-Z\-]+)\s+
+ ([.A-Z\-]*)/xi
||
$line =~ /^(\S+)\s+([+-]?)\s+(\d+)\s+
- \S+\s+\.\s+([ACTGNX\-]+)/xi
+ \S+\s+\.\s+([A-Z\-]+)/xi
)
{
# Got a sequence line
302 Bio/SearchIO/cross_match.pm
View
@@ -22,16 +22,13 @@ Bio::SearchIO::cross_match - CrossMatch-specific subclass of Bio::SearchIO
-file => "$file.screen.out" )
while(my $r = $searchIO->next_result) {
while(my $hit = $r->next_hit) {
- while(my $hsp = $hit->next_hsp) {
+ while(my $hsp = $hit->next_hsp) {
#Do the processing here.
}
}
}
-# See Bio::SearchIO for information about working with Results.
-
-# See L<Bio::SearchIO>
-# for details about working with Bio::SearchIO.
+See L<Bio::SearchIO> for details about working with Bio::SearchIO.
=head1 DESCRIPTION
@@ -121,43 +118,51 @@ See L<Bio::Root::RootI>
=cut
sub next_result {
- my ($self) = @_;
- my $start = 0;
- while( defined ($_ = $self->_readline )) {
- return if($self->{'_end_document'});
- if(/^cross_match version\s+(.*?)$/) {
- $self->{_algorithm_version} = $1;
- } elsif(/^Maximal single base matches/) {
- $start = 1;
- } elsif(/^(\d+) matching entries/) {
- $self->{'_end_document'} = 1;
- return;
- } elsif(($start || $self->{'_result_count'}) && /^\s+(\d+)/xms) {
- $self->{'_result_count'}++;
- return $self->_parse($_);
- } elsif(! $self->{_parameters}) {
- if(/.*?\s+(\-.*?)$/) {
- my $p = $1;
- my @pp = split /\s+/, $p;
- for(my $i = 0; $i < @pp; $i ++) {
- if($pp[$i] =~ /^\-/) {
- if($pp[$i + 1] && $pp[$i + 1] !~ /^\-/) {
- $self->{_parameters}->{$pp[$i]} = $pp[$i + 1];
- $i ++;
- } else {
- $self->{_parameters}->{$pp[$i]} = "";
- }
- }
- }
- }
- } elsif(/^Query file(s):\s+(.*?)$/) {
- $self->{_query_name} = $1;
- } elsif(/^Subject file(s):\s+(.*?)$/) {
- $self->{_subject_name} = $2;
+ my ($self) = @_;
+ my $start = 0;
+ while ( defined( $_ = $self->_readline ) ) {
+ return if ( $self->{'_end_document'} );
+ if (/^cross_match version\s+(.*?)$/) {
+ $self->{_algorithm_version} = $1;
+ }
+ elsif (/^Maximal single base matches/) {
+ $start = 1;
+ }
+ elsif (/^(\d+) matching entries/) {
+ $self->{'_end_document'} = 1;
+ return;
+ }
+ elsif ( ( $start || $self->{'_result_count'} ) && /^\s*(\d+)/xms ) {
+ $self->{'_result_count'}++;
+ return $self->_parse($_);
+ }
+ elsif ( !$self->{_parameters} ) {
+ if (/.*?\s+(\-.*?)$/) {
+ my $p = $1;
+ my @pp = split /\s+/, $p;
+ for ( my $i = 0 ; $i < @pp ; $i++ ) {
+ if ( $pp[$i] =~ /^\-/ ) {
+ if ( $pp[ $i + 1 ] && $pp[ $i + 1 ] !~ /^\-/ ) {
+ $self->{_parameters}->{ $pp[$i] } = $pp[ $i + 1 ];
+ $i++;
+ }
+ else {
+ $self->{_parameters}->{ $pp[$i] } = "";
+ }
+ }
+ }
+ }
+ }
+ elsif (/^Query file(s):\s+(.*?)$/) {
+ $self->{_query_name} = $1;
+ }
+ elsif (/^Subject file(s):\s+(.*?)$/) {
+ $self->{_subject_name} = $2;
+ }
}
- }
}
+
=head2 _alignment
Title : _alignment
@@ -166,60 +171,62 @@ sub next_result {
=cut
sub _alignment {
- my $self = shift;
-
+ my $self = shift;
+
# C H_EO-aaa01PCR02 243 CCTCTGAATGGCTGAAGACCCCTCTGCCGAGGGAGGTTGGGGATTGTGGG 194
-#
+#
# 0284119_008.c1- 1 CCTCTGAATGGCTGAAGACCCCTCTGCCGAGGGAGGTTGGGGATTGTGGG 50
#
# C H_EO-aaa01PCR02 193 ACAAGGTCCCTTGGTGCTGATGGCCTGAAGGGGCCTGAGCTGTGGGCAGA 144
-#
+#
# 0284119_008.c1- 51 ACAAGGTCCCTTGGTGCTGATGGCCTGAAGGGGCCTGAGCTGTGGGCAGA 100
#
# C H_EO-aaa01PCR02 143 TGCAGTTTTCTGTGGGCTTGGGGAACCTCTCACGTTGCTGTGTCCTGGTG 94
-#
+#
# 0284119_008.c1- 101 TGCAGTTTTCTGTGGGCTTGGGGAACCTCTCACGTTGCTGTGTCCTGGTG 150
#
# C H_EO-aaa01PCR02 93 AGCAGCCCGACCAATAAACCTGCTTTTCTAAAAGGATCTGTGTTTGATTG 44
-#
+#
# 0284119_008.c1- 151 AGCAGCCCGACCAATAAACCTGCTTTTCTAAAAGGATCTGTGTTTGATTG 200
#
# C H_EO-aaa01PCR02 43 TATTCTCTGAAGGCAGTTACATAGGGTTACAGAGG 9
-#
+#
# 0284119_008.c1- 201 TATTCTCTGAAGGCAGTTACATAGGGTTACAGAGG 235
-
-
-
- #LSF: Should be the blank line. Otherwise error.
- my $blank = $self->_readline;
- unless($blank =~ /^\s*$/) {
- return;
- }
- my @data;
- my @pad;
- $count = 0;
- while( defined ($_ = $self->_readline )) {
- $count = 0 if($count >= 3);
- next if(/^$/);
- if(/^(C \S+.*?\d+ )(\S+) \d+$|^( \S+.*?\d+ )(\S+) \d+$$|^\s+$/) {
- $count ++;
- if($1 || $3) {
- $pad[$count] = $1 ? $1 : $3;
- push @{$data[$count]}, ($2 ? $2 : $4);
- } else {
- if(/\s{$pad[0],$pad[0]}(.*?)$/) {
- push @{$data[$count]}, $1;
- } else {
- $self->throw("Format error for the homology line [$_].");
- }
- }
- } else {
- last;
+
+ # LSF: Should be the blank line. Otherwise error.
+ my $blank = $self->_readline;
+ unless ( $blank =~ /^\s*$/ ) {
+ return;
+ }
+ my @data;
+ my @pad;
+ $count = 0;
+ while ( defined( $_ = $self->_readline ) ) {
+ $count = 0 if ( $count >= 3 );
+ next if (/^$/);
+ if (/^(C \S+.*?\d+ )(\S+) \d+$|^( \S+.*?\d+ )(\S+) \d+$$|^\s+$/) {
+ $count++;
+ if ( $1 || $3 ) {
+ $pad[$count] = $1 ? $1 : $3;
+ push @{ $data[$count] }, ( $2 ? $2 : $4 );
+ }
+ else {
+ if (/\s{$pad[0],$pad[0]}(.*?)$/) {
+ push @{ $data[$count] }, $1;
+ }
+ else {
+ $self->throw("Format error for the homology line [$_].");
+ }
+ }
+ }
+ else {
+ last;
+ }
}
- }
- return @data;
+ return @data;
}
+
=head2 _parse
Title : _parse
@@ -228,70 +235,86 @@ sub _alignment {
=cut
sub _parse {
- my $self = shift;
- my $line = shift;
- my $is_alignment = 0;
- my($hit_seq, $homology_seq, $query_seq);
-# 32 5.13 0.00 0.00 H_DO-0065PCR0005792_034a.b1-1 327 365 (165) C 1111547847_forward (0) 39 1
+ my $self = shift;
+ my $line = shift;
+ my $is_alignment = 0;
+ my ( $hit_seq, $homology_seq, $query_seq );
+
+# 32 5.13 0.00 0.00 H_DO-0065PCR0005792_034a.b1-1 327 365 (165) C 1111547847_forward (0) 39 1
#OR
-#ALIGNMENT 32 5.13 0.00 0.00 H_DO-0065PCR0005792_034a.b1-1 327 365 (165) C 1111547847_forward (0) 39 1
- $line =~ s/^\s+|\s+$//g;
- my @r = split /\s+/, $line;
- if($r[0] eq "ALIGNMENT") {
- $is_alignment = 1;
- shift @r;
- ($hit_seq, $homology_seq, $query_seq) = $self->_alignment();
- }
- my $subject_seq_id;
- my $query_seq_id = $r[4];
- my $query_start = $r[5];
- my $query_end = $r[6];
- my $is_complement = 0;
- my $subject_start;
- my $subject_end;
- if($r[8] eq "C" && $r[9] !~ /^\(\d+\)$/) {
- $subject_seq_id = $r[9];
- $is_complement = 1;
- $subject_start = $r[11];
- $subject_end = $r[12];
- } else {
- $subject_seq_id = $r[8];
- $subject_start = $r[9];
- $subject_end = $r[10];
- }
- my $hit = Bio::Search::Hit::GenericHit->new(-name => $subject_seq_id,
- -hsps => [Bio::Search::HSP::GenericHSP->new(-query_name => $query_seq_id,
- -query_start => $query_start,
- -query_end => $query_end,
- -hit_name => $subject_seq_id,
- -hit_start => $subject_start,
- -hit_end => $subject_end,
- -query_length => 0,
- -hit_length => 0,
- -identical => $r[0],
- -conserved => $r[0],
- -query_seq => $query_seq ? (join "", @$query_seq) : "", #query sequence portion of the HSP
- -hit_seq => $hit_seq ? (join "", @$hit_seq) : "", #hit sequence portion of the HSP
- -homology_seq=> $homology_seq ? (join "", @$homology_seq) : "", #homology sequence for the HSP
- #LSF: Need the direction, just to fool the GenericHSP module.
- -algorithm => 'SW',)],
- );
- my $result = Bio::Search::Result::CrossMatchResult->new( -query_name => $self->{_query_name},
- -query_accession => '',
- -query_description => '',
- -query_length => 0,
- -database_name => $self->{_subject_name},
- -database_letters => 0,
- -database_entries => 0,
- -parameters => $self->{_parameters},
- -statistics => { },
- -algorithm => 'cross_match',
- -algorithm_version => $self->{_algorithm_version},
- );
- $result->add_hit($hit);
- return $result;
+#ALIGNMENT 32 5.13 0.00 0.00 H_DO-0065PCR0005792_034a.b1-1 327 365 (165) C 1111547847_forward (0) 39 1
+ $line =~ s/^\s+|\s+$//g;
+ my @r = split /\s+/, $line;
+ if ( $r[0] eq "ALIGNMENT" ) {
+ $is_alignment = 1;
+ shift @r;
+ ( $hit_seq, $homology_seq, $query_seq ) = $self->_alignment();
+ }
+ my $subject_seq_id;
+ my $query_seq_id = $r[4];
+ my $query_start = $r[5];
+ my $query_end = $r[6];
+ my $is_complement = 0;
+ my $subject_start;
+ my $subject_end;
+
+ if ( $r[8] eq "C" && $r[9] !~ /^\(\d+\)$/ ) {
+ $subject_seq_id = $r[9];
+ $is_complement = 1;
+ $subject_start = $r[11];
+ $subject_end = $r[12];
+ }
+ else {
+ $subject_seq_id = $r[8];
+ $subject_start = $r[9];
+ $subject_end = $r[10];
+ }
+ my $hit = Bio::Search::Hit::GenericHit->new(
+ -name => $subject_seq_id,
+ -hsps => [
+ Bio::Search::HSP::GenericHSP->new(
+ -query_name => $query_seq_id,
+ -query_start => $query_start,
+ -query_end => $query_end,
+ -hit_name => $subject_seq_id,
+ -hit_start => $subject_start,
+ -hit_end => $subject_end,
+ -query_length => 0,
+ -hit_length => 0,
+ -identical => $r[0],
+ -conserved => $r[0],
+ -query_seq => $query_seq
+ ? ( join "", @$query_seq )
+ : "", #query sequence portion of the HSP
+ -hit_seq => $hit_seq
+ ? ( join "", @$hit_seq )
+ : "", #hit sequence portion of the HSP
+ -homology_seq => $homology_seq
+ ? ( join "", @$homology_seq )
+ : "", #homology sequence for the HSP
+ #LSF: Need the direction, just to fool the GenericHSP module.
+ -algorithm => 'SW',
+ )
+ ],
+ );
+ my $result = Bio::Search::Result::CrossMatchResult->new(
+ -query_name => $self->{_query_name},
+ -query_accession => '',
+ -query_description => '',
+ -query_length => 0,
+ -database_name => $self->{_subject_name},
+ -database_letters => 0,
+ -database_entries => 0,
+ -parameters => $self->{_parameters},
+ -statistics => {},
+ -algorithm => 'cross_match',
+ -algorithm_version => $self->{_algorithm_version},
+ );
+ $result->add_hit($hit);
+ return $result;
}
+
=head2 result_count
Title : result_count
@@ -304,9 +327,10 @@ sub _parse {
=cut
sub result_count {
- my $self = shift;
- return $self->{'_result_count'};
+ my $self = shift;
+ return $self->{'_result_count'};
}
+
1;
#$Header$
Please sign in to comment.
Something went wrong with that request. Please try again.