diff --git a/lib/Bio/Root/IO.pm b/lib/Bio/Root/IO.pm index ce9c081..79bec62 100644 --- a/lib/Bio/Root/IO.pm +++ b/lib/Bio/Root/IO.pm @@ -521,21 +521,28 @@ sub _insert { =head2 _readline Title : _readline - Usage : $io->_readline(%args) - Function: Read a line of input and convert the line ending to "\n". - - By default, this method uses the value of $/, Perl's input record - separator, to detect the end of each line. Note that the current - implementation does not handle pushed back input correctly unless - the pushed back input ends with the value of $/. - - When the PerlIO::eol module is installed, you can use it to convert - line endings by setting $Bio::Root::IO::HAS_EOL to 1. - - Args : A hash of arguments: - -raw: 1 to do not convert line endings, i.e. keep them as "\r\n" - in Windows-formatted files. This option has no effect when - using $Bio::Root::IO::HAS_EOL = 1. + Usage : local $Bio::Root::IO::HAS_EOL = 1; + my $io = Bio::Root::IO->new(-file => 'data.txt'); + my $line = $io->_readline(); + $io->close; + Function: Read a line of input and normalize all end of line characters. + + End of line characters are typically "\n" on Linux platforms, "\r\n" + on Windows and "\r" on older Mac OS. By default, the _readline() + method uses the value of $/, Perl's input record separator, to + detect the end of each line. This means that you will not get the + expected lines if your input has Mac-formatted end of line characters. + Also, note that the current implementation does not handle pushed + back input correctly unless the pushed back input ends with the + value of $/. For each line parsed, its line ending, e.g. "\r\n" is + converted to "\n", unless you provide the -raw argument. + + Altogether it is easier to let the PerlIO::eol module automatically + detect the proper end of line character and normalize it to "\n". Do + so by setting $Bio::Root::IO::HAS_EOL to 1. + + Args : -raw : Avoid converting end of line characters to "\n" This option + has no effect when using $Bio::Root::IO::HAS_EOL = 1. Returns : Line of input, or undef when there is nothing to read anymore =cut @@ -553,11 +560,9 @@ sub _readline { $line = <$fh>; } - # don't strip line endings if -raw is specified - # $line =~ s/\r\n/\n/g if( (!$param{-raw}) && (defined $line) ); - # Dave Howorth's fix if( !$HAS_EOL && !$param{-raw} && (defined $line) ) { - $line =~ s/\015\012/\012/g; # Change all CR/LF pairs to LF + # don't strip line endings if -raw or $HAS_EOL is specified + $line =~ s/\015\012/\012/g; # Change all CR/LF pairs to LF $line =~ tr/\015/\n/ unless $ONMAC; # Change all single CRs to NEWLINE } return $line; diff --git a/t/IO.t b/t/IO.t index 37ed27b..361a0f4 100644 --- a/t/IO.t +++ b/t/IO.t @@ -6,7 +6,7 @@ use warnings; BEGIN { use lib '.'; use Bio::Root::Test; - test_begin(-tests => 136); + test_begin(-tests => 148); use_ok('Bio::Root::IO'); } @@ -197,30 +197,36 @@ is_deeply \@content, ["insertion at line 1\n"]; { ok my $unix_rio = Bio::Root::IO->new(-file => test_input_file('U71225.gb.unix')); ok my $win_rio = Bio::Root::IO->new(-file => test_input_file('U71225.gb.win' )); + ok my $mac_rio = Bio::Root::IO->new(-file => test_input_file('U71225.gb.mac' )); my $expected = "LOCUS U71225 1164 bp DNA linear VRT 27-NOV-2001\n"; is $unix_rio->_readline, $expected; is $win_rio->_readline , $expected; + like $mac_rio->_readline, qr#^LOCUS.*//\n$#ms; + # line spans entire file because lines end with "\r" but $/ is "\n" $expected = "DEFINITION Desmognathus quadramaculatus 12S ribosomal RNA gene, partial\n"; is $unix_rio->_readline, $expected; is $win_rio->_readline , $expected; + is $mac_rio->_readline , undef; $expected = " sequence; tRNA-Val gene, complete sequence; and 16S ribosomal RNA\n"; is $unix_rio->_readline, $expected; is $win_rio->_readline , $expected; + is $mac_rio->_readline , undef; $expected = " gene, partial sequence, mitochondrial genes for mitochondrial RNAs.\n"; is $unix_rio->_readline, $expected; is $win_rio->_readline , $expected; + is $mac_rio->_readline , undef; $expected = "ACCESSION U71225\n"; is $unix_rio->_readline, $expected; is $win_rio->_readline , $expected; + is $mac_rio->_readline , undef; is $win_rio->_readline( -raw => 1) , "VERSION U71225.1 GI:2804359\r\n"; is $win_rio->_readline( -raw => 0) , "KEYWORDS .\n"; - } @@ -229,30 +235,39 @@ is_deeply \@content, ["insertion at line 1\n"]; ############################################## SKIP: { - test_skip(-tests => 14, -requires_module => 'PerlIO::eol'); + test_skip(-tests => 19, -requires_module => 'PerlIO::eol'); + local $Bio::Root::IO::HAS_EOL = 1; ok my $unix_rio = Bio::Root::IO->new(-file => test_input_file('U71225.gb.unix')); ok my $win_rio = Bio::Root::IO->new(-file => test_input_file('U71225.gb.win' )); + ok my $mac_rio = Bio::Root::IO->new(-file => test_input_file('U71225.gb.mac' )); + my $expected = "LOCUS U71225 1164 bp DNA linear VRT 27-NOV-2001\n"; is $unix_rio->_readline, $expected; is $win_rio->_readline , $expected; + is $mac_rio->_readline , $expected; $expected = "DEFINITION Desmognathus quadramaculatus 12S ribosomal RNA gene, partial\n"; is $unix_rio->_readline, $expected; is $win_rio->_readline , $expected; + is $mac_rio->_readline , $expected; $expected = " sequence; tRNA-Val gene, complete sequence; and 16S ribosomal RNA\n"; is $unix_rio->_readline, $expected; is $win_rio->_readline , $expected; + is $mac_rio->_readline , $expected; $expected = " gene, partial sequence, mitochondrial genes for mitochondrial RNAs.\n"; is $unix_rio->_readline, $expected; is $win_rio->_readline , $expected; + is $mac_rio->_readline , $expected; $expected = "ACCESSION U71225\n"; is $unix_rio->_readline, $expected; is $win_rio->_readline , $expected; + is $mac_rio->_readline , $expected; + # $HAS_EOL ignores -raw is $win_rio->_readline( -raw => 1) , "VERSION U71225.1 GI:2804359\n"; is $win_rio->_readline( -raw => 0) , "KEYWORDS .\n"; }