Skip to content

Commit

Permalink
Item13338: added stubs for Encode:: functions to make it easier to sw…
Browse files Browse the repository at this point in the history
…ap in alternatives. Cleaned up UTF8 errors testcase.
  • Loading branch information
Comment committed May 29, 2015
1 parent 1f3d3ed commit 2ab35f8
Show file tree
Hide file tree
Showing 26 changed files with 162 additions and 138 deletions.
39 changes: 12 additions & 27 deletions PlainFileStoreContrib/lib/Foswiki/Store/PlainFile.pm
Expand Up @@ -81,29 +81,14 @@ BEGIN {

# Interface to file operations.

*_decode = sub {
return $_[0] unless defined $_[0];
my $s = $_[0];
return Encode::decode( $Foswiki::cfg{Store}{Encoding} || 'utf-8',
$s, Encode::FB_HTMLCREF );
};
*_decode = \&Foswiki::Store::decode;

# readdir returns bytes
*_readdir = sub {
map { _decode($_) } readdir( $_[0] );
};

*_encode = sub {
return $_[0] unless utf8::is_utf8( $_[0] );
my $s = $_[0];
return Encode::encode(
$Foswiki::cfg{Store}{Encoding} || 'utf-8', $s,

# Throw an exception if the {Store}{Encoding}
# can't represent a unicode character
Encode::FB_CROAK
);
};
*_encode = \&Foswiki::Store::encode;

# The remaining file level functions work on wide chars,
# silently converting to utf-8. But we want to explicitly
Expand Down Expand Up @@ -365,7 +350,7 @@ sub testAttachment {
sub openAttachment {
my ( $this, $meta, $att, $mode, @opts ) = @_;
ASSERT($att) if DEBUG;
return _openStream( $meta, $att, $mode, @opts );
return _openBinaryStream( $meta, $att, $mode, @opts );
}

# Implement Foswiki::Store
Expand Down Expand Up @@ -650,7 +635,7 @@ sub atomicLockInfo {
my ( $this, $meta ) = @_;
my $filename = _getData($meta) . '.lock';
if ( _e $filename ) {
my $t = _readFile($filename);
my $t = _readTextFile($filename);
return split( /\s+/, $t, 2 );
}
return ( undef, undef );
Expand Down Expand Up @@ -899,7 +884,7 @@ sub getLease {
my $filename = _getData($meta) . '.lease';
my $lease;
if ( _e $filename ) {
my $t = _readFile($filename);
my $t = _readTextFile($filename);
$lease = { split( /\r?\n/, $t ) };
}
return $lease;
Expand Down Expand Up @@ -1108,7 +1093,7 @@ DONE
# No existing revs; create
# If this is a topic, correct the TOPICINFO
unless ($attachment) {
my $t = _readFile($latest);
my $t = _readTextFile($latest);

$t =~ s/^%META:TOPICINFO\{(.*)\}%$//m;
$t =
Expand Down Expand Up @@ -1158,7 +1143,7 @@ sub _latestIsNewer {
sub _readMetaFile {
my $mf = shift;
return () unless _e $mf;
return split( "\n", _readFile($mf), 2 );
return split( "\n", _readTextFile($mf), 2 );
}

sub _writeMetaFile {
Expand All @@ -1170,7 +1155,7 @@ sub _writeMetaFile {
sub _readChanges {
my ( $file, $web ) = @_;

my $all_lines = Foswiki::Sandbox::untaintUnchecked( _readFile($file) );
my $all_lines = Foswiki::Sandbox::untaintUnchecked( _readTextFile($file) );

# Look at the first line to deduce format
if ( $all_lines =~ m/^\[/s ) {
Expand Down Expand Up @@ -1307,7 +1292,7 @@ sub eachChange {
}

# Read an entire (text) file
sub _readFile {
sub _readTextFile {
my $name = shift;

my $IN_FILE;
Expand All @@ -1331,7 +1316,7 @@ sub _readFile {
}

# Open a stream onto a (binary) file
sub _openStream {
sub _openBinaryStream {
my ( $meta, $att, $mode, %opts ) = @_;
my $stream;

Expand Down Expand Up @@ -1515,15 +1500,15 @@ sub _getRevision {
if ( $nr && $version && $version <= $nr ) {
my $fn = _historyDir( $meta, $attachment ) . "/$version";
if ( _e $fn ) {
return ( _readFile($fn), $version == $nr );
return ( _readTextFile($fn), $version == $nr );
}
}
my $latest = _latestFile( $meta, $attachment );

return ( undef, 0 ) unless _e $latest;

# no version given, give latest (may not be checked in yet)
return ( _readFile($latest), 1 );
return ( _readTextFile($latest), 1 );
}

# Split a string on \n making sure we have all newlines. If the string
Expand Down
4 changes: 2 additions & 2 deletions RCSStoreContrib/lib/Foswiki/Store/Rcs/Handler.pm
Expand Up @@ -56,8 +56,8 @@ BEGIN {
import locale();
}

*_decode = \&Foswiki::Store::Rcs::Store::_decode;
*_encode = \&Foswiki::Store::Rcs::Store::_encode;
*_decode = \&Foswiki::Store::decode;
*_encode = \&Foswiki::Store::encode;
*_stat = \&Foswiki::Store::Rcs::Store::_stat;
*_unlink = \&Foswiki::Store::Rcs::Store::_unlink;
*_e = sub { -e _encode( $_[0] ) };
Expand Down
4 changes: 2 additions & 2 deletions RCSStoreContrib/lib/Foswiki/Store/Rcs/RcsLiteHandler.pm
Expand Up @@ -129,8 +129,8 @@ BEGIN {
import locale();
}

*_decode = \&Foswiki::Store::Rcs::Store::_decode;
*_encode = \&Foswiki::Store::Rcs::Store::_encode;
*_decode = \&Foswiki::Store::decode;
*_encode = \&Foswiki::Store::encode;
}

# implements Rcs::Handler
Expand Down
4 changes: 2 additions & 2 deletions RCSStoreContrib/lib/Foswiki/Store/Rcs/RcsWrapHandler.pm
Expand Up @@ -31,8 +31,8 @@ BEGIN {
import locale();
}

*_decode = \&Foswiki::Store::Rcs::Store::_decode;
*_encode = \&Foswiki::Store::Rcs::Store::_encode;
*_decode = \&Foswiki::Store::decode;
*_encode = \&Foswiki::Store::encode;
}

sub new {
Expand Down
20 changes: 2 additions & 18 deletions RCSStoreContrib/lib/Foswiki/Store/Rcs/Store.pm
Expand Up @@ -56,24 +56,8 @@ BEGIN {
if ($Foswiki::UNICODE) {
require Encode;

*_decode = sub {
return $_[0] unless defined $_[0];
my $s = $_[0];
return Encode::decode( $Foswiki::cfg{Store}{Encoding} || 'utf-8',
$s, Encode::FB_CROAK );
};

*_encode = sub {
return $_[0] unless defined $_[0];
my $s = $_[0];
return Encode::encode(
$Foswiki::cfg{Store}{Encoding} || 'utf-8', $s,

# Throw an exception if the {Store}{Encoding}
# can't represent a unicode character
Encode::FB_CROAK
);
};
*_decode = \&Foswiki::Store::decode;
*_encode = \&Foswiki::Store::encode;
*_stat = sub { stat( _encode( $_[0] ) ); };
*_unlink = sub { unlink( _encode( $_[0] ) ); };
}
Expand Down
22 changes: 11 additions & 11 deletions UnitTestContrib/test/unit/FoswikiStoreTestCase.pm
Expand Up @@ -104,7 +104,7 @@ sub fixture_groups {
}
}

sub _mkFiles {
sub _make_data {
my $this = shift;
my $FILE;
my $enc = $Foswiki::cfg{Store}{Encoding} || 'utf-8';
Expand All @@ -121,6 +121,14 @@ sub _mkFiles {
close($FILE);
}

sub open_data {
my ( $this, $k ) = @_;

my $fh;
open( $fh, '<', $this->{$k} );
return $fh;
}

sub utf8 {
my $this = shift;
$Foswiki::cfg{Site}{Locale} = 'en_US.utf-8';
Expand All @@ -133,7 +141,7 @@ sub utf8 {
$this->{t_datapath} = "$Foswiki::cfg{TempfileDir}/$this->{t_datafile}";
$this->{t_datafile2} = "پښتانهټبرونه.gif";
$this->{t_datapath2} = "$Foswiki::cfg{TempfileDir}/$this->{t_datafile2}";
$this->_mkFiles();
$this->_make_data();
}

sub iso8859 {
Expand All @@ -152,15 +160,7 @@ sub iso8859 {
$this->{t_topic} = "Test${n}Topic";
$this->{t_datafile} = "${n}1.gif";
$this->{t_datafile2} = "${n}2.gif";
$this->_mkFiles();
}

sub open_data {
my ( $this, $k ) = @_;

my $fh;
open( $fh, '<', $this->{$k} );
return $fh;
$this->_make_data();
}

1;
39 changes: 14 additions & 25 deletions core/data/TestCases/TestCaseUtf8Errors.txt
Expand Up @@ -6,11 +6,11 @@

-->

---++ Demo of *some* UTF8 errors
---++ Demo of some UTF8 challenges

View the topic with System.EditRowPlugin disabled, and the utf8 text is rendered
correctly. [[%TOPIC%?debugenableplugins=JQueryPlugin][Click here to reload this topic with only the JQueryPlugin enabled]].
*Note* this page uses Javascript to perform some check. The source is included inline below.

[[%TOPIC%?debugenableplugins=JQueryPlugin][Click here to reload this topic with only the JQueryPlugin enabled]].

%TOC%

Expand All @@ -20,7 +20,7 @@ correctly. [[%TOPIC%?debugenableplugins=JQueryPlugin][Click here to reload thi
| Q-plain | %PLTWL1%,%PUTWL1%,%PWTWL1%,%PTTWL1% | %QUERY{"preferences[name =~ 'TWL1'].value"}% | :FWTEST: |
| Q-json | ["%PLTWL1%","%PUTWL1%","%PWTWL1%","%PTTWL1%"] | %QUERY{"preferences[name =~ 'TWL1'].value" style="json"}% | :FWTEST: |

---++ Regex QUERY - fails for character classes and case comparisons too...
---++ Regex QUERY
| *Testname* | *Wanted* | *Actual* | *Result* |
| asc-re2: /AbCdefgh/ =~ /\wCde\w/ | 1 | %QUERY{"preferences[name='PWTWA1'].value =~ '\wCde\w'"}% | :FWTEST: |
| asc-re3: /AbCdefgh/ =~ /(?i)cDE/ | 1 | %QUERY{"preferences[name='PWTWA1'].value =~ '(?i)cDE'"}% | :FWTEST: |
Expand All @@ -29,19 +29,19 @@ correctly. [[%TOPIC%?debugenableplugins=JQueryPlugin][Click here to reload thi
| utf-re3: /ÌæĈąṁēńã/ =~ /(?i)ĉĄṀ/ | 1 | %QUERY{"preferences[name='PWTWL1'].value =~ '(?i)ĉĄṀ'"}% | :FWTEST: |
| utf-re4: /ÌæĈąṁēńã/ =~ /[<nop>[:upper:]]/ | 1 | %QUERY{"preferences[name='PWTWL1'].value =~ '[[:upper:]]'"}% | :FWTEST: |

---++ QUERY lc/uc - fails
---++ QUERY lc/uc
| *Testname* | *Wanted* | *Actual* | *Result* |
| WTWA1: lc(%PWTWA1%) | %PLTWA1% | %QUERY{"lc(preferences[name='PWTWA1'].value)"}% | :FWTEST: |
| WTWA1: uc(%PWTWA1%) | %PUTWA1% | %QUERY{"uc(preferences[name='PWTWA1'].value)"}% | :FWTEST: |
| WTWL1: lc(%PWTWL1%) | %PLTWL1% | %QUERY{"lc(preferences[name='PWTWL1'].value)"}% | :FWTEST: |
| WTWL1: uc(%PWTWL1%) | %PUTWL1% | %QUERY{"uc(preferences[name='PWTWL1'].value)"}% | :FWTEST: |

---++ SEARCH with regex containing character classes such \w
---++ SEARCH
With regex containing character classes such \w
| *Testname* | *Wanted* | *Actual* | *Result* |
| search for =\dxx= <br> _$pattern_ regex charclass<br> =\w*xx\w*= | abc1xxdef,áčž2xxøđß,ábc3xxdeš,qwe4xxuio | %SEARCH{ "\dxx" type="regex" nonoise="on" topic="%TOPIC%" multiple="on" separator="," format="$pattern(.*?(\w*xx\w*).*)" }% | :FWTEST: |
| Besides the =\w= regex error, I actually don't understand why found the string =<nop>d<nop>x<nop>x<nop>= too. The regex contais the =\d= as a character class. ||||
| search for =\dxx= <br> _$pattern_ regex charclass<br> =\w*xx\w*= | abc1xxdef,abc1xxdef,áčž2xxøđß,ábc3xxdeš,qwe4xxuio | %SEARCH{ "\dxx" type="regex" nonoise="on" topic="%TOPIC%" multiple="on" separator="," format="$pattern(.*?(\w*\dxx\w*).*)" }% | :FWTEST: |

---++ Calc ERRORS (many string operation - examples (here are more))
---++ CALC
| *Test* | *Wanted* | *Actual* | *Result* |
| %<nop>CALC{"$FIND(e, abcdefgh, 2)"}% | 5 | %CALC{"$FIND(e, abcdefgh, 2)"}% | :FWTEST: |
| %<nop>CALC{"$FIND(ó, śõñļóțķụ, 2)"}% | 5 | %CALC{"$FIND(ó, śõñļóțķụ, 2)"}% | :FWTEST: |
Expand All @@ -59,12 +59,11 @@ correctly. [[%TOPIC%?debugenableplugins=JQueryPlugin][Click here to reload thi
| %<nop>CALC{"$TRANSLATE(cumi,ci,čí)"}% | čumí | %CALC{"$TRANSLATE(cumi,ci,čí)"}% | :FWTEST: |
| %<nop>CALC{"$TRANSLATE(čumí,čí,ci)"}% | cumi | %CALC{"$TRANSLATE(čumí,čí,ci)"}% | :FWTEST: |

------
---++ Non critical (but annoying) errors
The table sorting doesn't sorts by the accented characters. For
this will be need to use *utf8 + locale* based sorting, what is
correctly available only from perl v5.20 (using an special pragma
usable from v.5.16).
---++ SPACEOUT
=%<nop>SPACEOUT{"UśđáṁAŝșūṁėṉďáCøṅŝêĉṫēťúŕDøḻóř" separator=", "}%=
%SPACEOUT{"UśđáṁAŝșūṁėṉďáCøṅŝêĉṫēťúŕDøḻóř" separator=", "}%

---++ TABLE and EDITTABLE

Try: sort by the *Dolořęṁ* column.

Expand All @@ -78,16 +77,6 @@ Try: sort by the *Dolořęṁ* column.
| 5 | | Ábc | John | | |
| *Nìsi* | *Aḻíqúid* | *Dolořęṁ* | *Fáčërę* | *Omņįs* | *Iṗsuṁ* |

The =%<nop>SPACEOUT{"SomeUtfText"}%= won't add spaces even if the Uppercases are ASCII.
=%<nop>SPACEOUT{"UśđáṁAŝșūṁėṉďáCøṅŝêĉṫēťúŕDøḻóř" separator=", "}%=
%SPACEOUT{"UśđáṁAŝșūṁėṉďáCøṅŝêĉṫēťúŕDøḻóř" separator=", "}%

---+ Strange Errors
---++ EDITTABLE
The following *NOT utf8* table using the %<nop>EDITTABLE% (copy
from the System.EditRowPlugin), screws the normal Utf8 topic text.
(also somewhat changes the width of the previous table).

%EDITTABLE{ format="| row, -1 | text, 20, init | select, 1, not started, starting, ongoing, completed | radio, 3,:-),:-I,:-( | date, 20 |" changerows="on" quietsave="on"}%
| *Nr* | *Project* | *State* | *Progress* | *Timestamp* |
| 1 | Sliced yoghourt | completed | :-) | 26 Jun 2002 |
Expand Down
36 changes: 31 additions & 5 deletions core/lib/Foswiki.pm
Expand Up @@ -147,6 +147,32 @@ sub _getLibDir {
return $foswikiLibDir;
}

# Character encoding/decoding stubs. Done so we can ovveride
# if necessary (e.g. on OSX we may want to monkey-patch in a
# NFC/NFD module)

=begin TML
---++ StaticMethod decode_utf8($octets) -> $unicode
Decode a binary string of octets known to be encoded using UTF-8 into
perl characters (unicode).
=cut

*decode_utf8 = \&Encode::decode_utf8;

=begin TML
---++ StaticMethod encode_utf8($unicode) -> $octets
Encode a perl character string into a binary string of octets
encoded using UTF-8.
=cut

*encode_utf8 = \&Encode::encode_utf8;

BEGIN {

# First thing we do; make sure we print unicode errors
Expand Down Expand Up @@ -793,7 +819,7 @@ BOGUS
else {
# Not available from the cache, or it has dirty areas
require Compress::Zlib;
$text = Compress::Zlib::memGzip( Encode::encode_utf8($text) );
$text = Compress::Zlib::memGzip( encode_utf8($text) );
}
$binary_body = 1;
}
Expand Down Expand Up @@ -867,7 +893,7 @@ sub satisfiedByCache {
$cache->renderDirtyAreas( \$text );

# dirty pages are cached in unicode
$text = Encode::encode_utf8($text);
$text = encode_utf8($text);
}
elsif ( $Foswiki::cfg{HttpCompress} ) {

Expand Down Expand Up @@ -2151,7 +2177,7 @@ sub new {
# bin/script?topic=WebPreferences;defaultweb=Sandbox
my $defaultweb = $query->param('defaultweb') || $Foswiki::cfg{UsersWebName};

my $webtopic = Encode::decode_utf8( $query->path_info() || '' );
my $webtopic = decode_utf8( $query->path_info() || '' );
my $topicOverride = '';
my $topic = $query->param('topic');
if ( defined $topic ) {
Expand Down Expand Up @@ -2928,7 +2954,7 @@ sub urlEncode {
my $text = shift;

# URLs work quite happily with %-encoded utf-8 characters
$text = Encode::encode_utf8($text);
$text = encode_utf8($text);
$text =~ s{([^0-9a-zA-Z-_.:~!*#/])}{sprintf('%%%02x',ord($1))}ge;

return $text;
Expand All @@ -2949,7 +2975,7 @@ sub urlDecode {
my $text = shift;

$text =~ s/%([\da-f]{2})/chr(hex($1))/gei;
$text = Encode::decode_utf8($text);
$text = decode_utf8($text);

return $text;
}
Expand Down

0 comments on commit 2ab35f8

Please sign in to comment.