diff --git a/PlainFileStoreContrib/lib/Foswiki/Store/PlainFile.pm b/PlainFileStoreContrib/lib/Foswiki/Store/PlainFile.pm index 5233dab841..812e285e74 100644 --- a/PlainFileStoreContrib/lib/Foswiki/Store/PlainFile.pm +++ b/PlainFileStoreContrib/lib/Foswiki/Store/PlainFile.pm @@ -81,29 +81,14 @@ BEGIN { # Interface to file operations. - *_decode = sub { - return $_[0] unless defined $_[0]; - my $s = $_[0]; - return Encode::decode( $Foswiki::cfg{Store}{Encoding} || 'utf-8', - $s, Encode::FB_HTMLCREF ); - }; + *_decode = \&Foswiki::Store::decode; # readdir returns bytes *_readdir = sub { map { _decode($_) } readdir( $_[0] ); }; - *_encode = sub { - return $_[0] unless utf8::is_utf8( $_[0] ); - my $s = $_[0]; - return Encode::encode( - $Foswiki::cfg{Store}{Encoding} || 'utf-8', $s, - - # Throw an exception if the {Store}{Encoding} - # can't represent a unicode character - Encode::FB_CROAK - ); - }; + *_encode = \&Foswiki::Store::encode; # The remaining file level functions work on wide chars, # silently converting to utf-8. But we want to explicitly @@ -365,7 +350,7 @@ sub testAttachment { sub openAttachment { my ( $this, $meta, $att, $mode, @opts ) = @_; ASSERT($att) if DEBUG; - return _openStream( $meta, $att, $mode, @opts ); + return _openBinaryStream( $meta, $att, $mode, @opts ); } # Implement Foswiki::Store @@ -650,7 +635,7 @@ sub atomicLockInfo { my ( $this, $meta ) = @_; my $filename = _getData($meta) . '.lock'; if ( _e $filename ) { - my $t = _readFile($filename); + my $t = _readTextFile($filename); return split( /\s+/, $t, 2 ); } return ( undef, undef ); @@ -899,7 +884,7 @@ sub getLease { my $filename = _getData($meta) . '.lease'; my $lease; if ( _e $filename ) { - my $t = _readFile($filename); + my $t = _readTextFile($filename); $lease = { split( /\r?\n/, $t ) }; } return $lease; @@ -1108,7 +1093,7 @@ DONE # No existing revs; create # If this is a topic, correct the TOPICINFO unless ($attachment) { - my $t = _readFile($latest); + my $t = _readTextFile($latest); $t =~ s/^%META:TOPICINFO\{(.*)\}%$//m; $t = @@ -1158,7 +1143,7 @@ sub _latestIsNewer { sub _readMetaFile { my $mf = shift; return () unless _e $mf; - return split( "\n", _readFile($mf), 2 ); + return split( "\n", _readTextFile($mf), 2 ); } sub _writeMetaFile { @@ -1170,7 +1155,7 @@ sub _writeMetaFile { sub _readChanges { my ( $file, $web ) = @_; - my $all_lines = Foswiki::Sandbox::untaintUnchecked( _readFile($file) ); + my $all_lines = Foswiki::Sandbox::untaintUnchecked( _readTextFile($file) ); # Look at the first line to deduce format if ( $all_lines =~ m/^\[/s ) { @@ -1307,7 +1292,7 @@ sub eachChange { } # Read an entire (text) file -sub _readFile { +sub _readTextFile { my $name = shift; my $IN_FILE; @@ -1331,7 +1316,7 @@ sub _readFile { } # Open a stream onto a (binary) file -sub _openStream { +sub _openBinaryStream { my ( $meta, $att, $mode, %opts ) = @_; my $stream; @@ -1515,7 +1500,7 @@ sub _getRevision { if ( $nr && $version && $version <= $nr ) { my $fn = _historyDir( $meta, $attachment ) . "/$version"; if ( _e $fn ) { - return ( _readFile($fn), $version == $nr ); + return ( _readTextFile($fn), $version == $nr ); } } my $latest = _latestFile( $meta, $attachment ); @@ -1523,7 +1508,7 @@ sub _getRevision { return ( undef, 0 ) unless _e $latest; # no version given, give latest (may not be checked in yet) - return ( _readFile($latest), 1 ); + return ( _readTextFile($latest), 1 ); } # Split a string on \n making sure we have all newlines. If the string diff --git a/RCSStoreContrib/lib/Foswiki/Store/Rcs/Handler.pm b/RCSStoreContrib/lib/Foswiki/Store/Rcs/Handler.pm index bc8f7e8efe..4340f77981 100644 --- a/RCSStoreContrib/lib/Foswiki/Store/Rcs/Handler.pm +++ b/RCSStoreContrib/lib/Foswiki/Store/Rcs/Handler.pm @@ -56,8 +56,8 @@ BEGIN { import locale(); } - *_decode = \&Foswiki::Store::Rcs::Store::_decode; - *_encode = \&Foswiki::Store::Rcs::Store::_encode; + *_decode = \&Foswiki::Store::decode; + *_encode = \&Foswiki::Store::encode; *_stat = \&Foswiki::Store::Rcs::Store::_stat; *_unlink = \&Foswiki::Store::Rcs::Store::_unlink; *_e = sub { -e _encode( $_[0] ) }; diff --git a/RCSStoreContrib/lib/Foswiki/Store/Rcs/RcsLiteHandler.pm b/RCSStoreContrib/lib/Foswiki/Store/Rcs/RcsLiteHandler.pm index c4f78d8e65..23fef1dcb5 100644 --- a/RCSStoreContrib/lib/Foswiki/Store/Rcs/RcsLiteHandler.pm +++ b/RCSStoreContrib/lib/Foswiki/Store/Rcs/RcsLiteHandler.pm @@ -129,8 +129,8 @@ BEGIN { import locale(); } - *_decode = \&Foswiki::Store::Rcs::Store::_decode; - *_encode = \&Foswiki::Store::Rcs::Store::_encode; + *_decode = \&Foswiki::Store::decode; + *_encode = \&Foswiki::Store::encode; } # implements Rcs::Handler diff --git a/RCSStoreContrib/lib/Foswiki/Store/Rcs/RcsWrapHandler.pm b/RCSStoreContrib/lib/Foswiki/Store/Rcs/RcsWrapHandler.pm index 2b5c984568..19897bc034 100644 --- a/RCSStoreContrib/lib/Foswiki/Store/Rcs/RcsWrapHandler.pm +++ b/RCSStoreContrib/lib/Foswiki/Store/Rcs/RcsWrapHandler.pm @@ -31,8 +31,8 @@ BEGIN { import locale(); } - *_decode = \&Foswiki::Store::Rcs::Store::_decode; - *_encode = \&Foswiki::Store::Rcs::Store::_encode; + *_decode = \&Foswiki::Store::decode; + *_encode = \&Foswiki::Store::encode; } sub new { diff --git a/RCSStoreContrib/lib/Foswiki/Store/Rcs/Store.pm b/RCSStoreContrib/lib/Foswiki/Store/Rcs/Store.pm index c4cf25fdea..22580990ae 100644 --- a/RCSStoreContrib/lib/Foswiki/Store/Rcs/Store.pm +++ b/RCSStoreContrib/lib/Foswiki/Store/Rcs/Store.pm @@ -56,24 +56,8 @@ BEGIN { if ($Foswiki::UNICODE) { require Encode; - *_decode = sub { - return $_[0] unless defined $_[0]; - my $s = $_[0]; - return Encode::decode( $Foswiki::cfg{Store}{Encoding} || 'utf-8', - $s, Encode::FB_CROAK ); - }; - - *_encode = sub { - return $_[0] unless defined $_[0]; - my $s = $_[0]; - return Encode::encode( - $Foswiki::cfg{Store}{Encoding} || 'utf-8', $s, - - # Throw an exception if the {Store}{Encoding} - # can't represent a unicode character - Encode::FB_CROAK - ); - }; + *_decode = \&Foswiki::Store::decode; + *_encode = \&Foswiki::Store::encode; *_stat = sub { stat( _encode( $_[0] ) ); }; *_unlink = sub { unlink( _encode( $_[0] ) ); }; } diff --git a/UnitTestContrib/test/unit/FoswikiStoreTestCase.pm b/UnitTestContrib/test/unit/FoswikiStoreTestCase.pm index 28d0123078..451b4d85f5 100644 --- a/UnitTestContrib/test/unit/FoswikiStoreTestCase.pm +++ b/UnitTestContrib/test/unit/FoswikiStoreTestCase.pm @@ -104,7 +104,7 @@ sub fixture_groups { } } -sub _mkFiles { +sub _make_data { my $this = shift; my $FILE; my $enc = $Foswiki::cfg{Store}{Encoding} || 'utf-8'; @@ -121,6 +121,14 @@ sub _mkFiles { close($FILE); } +sub open_data { + my ( $this, $k ) = @_; + + my $fh; + open( $fh, '<', $this->{$k} ); + return $fh; +} + sub utf8 { my $this = shift; $Foswiki::cfg{Site}{Locale} = 'en_US.utf-8'; @@ -133,7 +141,7 @@ sub utf8 { $this->{t_datapath} = "$Foswiki::cfg{TempfileDir}/$this->{t_datafile}"; $this->{t_datafile2} = "پښتانهټبرونه.gif"; $this->{t_datapath2} = "$Foswiki::cfg{TempfileDir}/$this->{t_datafile2}"; - $this->_mkFiles(); + $this->_make_data(); } sub iso8859 { @@ -152,15 +160,7 @@ sub iso8859 { $this->{t_topic} = "Test${n}Topic"; $this->{t_datafile} = "${n}1.gif"; $this->{t_datafile2} = "${n}2.gif"; - $this->_mkFiles(); -} - -sub open_data { - my ( $this, $k ) = @_; - - my $fh; - open( $fh, '<', $this->{$k} ); - return $fh; + $this->_make_data(); } 1; diff --git a/core/data/TestCases/TestCaseUtf8Errors.txt b/core/data/TestCases/TestCaseUtf8Errors.txt index c5e3fa5440..863678a7f3 100644 --- a/core/data/TestCases/TestCaseUtf8Errors.txt +++ b/core/data/TestCases/TestCaseUtf8Errors.txt @@ -6,11 +6,11 @@ --> ----++ Demo of *some* UTF8 errors +---++ Demo of some UTF8 challenges -View the topic with System.EditRowPlugin disabled, and the utf8 text is rendered -correctly. [[%TOPIC%?debugenableplugins=JQueryPlugin][Click here to reload this topic with only the JQueryPlugin enabled]]. +*Note* this page uses Javascript to perform some check. The source is included inline below. +[[%TOPIC%?debugenableplugins=JQueryPlugin][Click here to reload this topic with only the JQueryPlugin enabled]]. %TOC% @@ -20,7 +20,7 @@ correctly. [[%TOPIC%?debugenableplugins=JQueryPlugin][Click here to reload thi | Q-plain | %PLTWL1%,%PUTWL1%,%PWTWL1%,%PTTWL1% | %QUERY{"preferences[name =~ 'TWL1'].value"}% | :FWTEST: | | Q-json | ["%PLTWL1%","%PUTWL1%","%PWTWL1%","%PTTWL1%"] | %QUERY{"preferences[name =~ 'TWL1'].value" style="json"}% | :FWTEST: | ----++ Regex QUERY - fails for character classes and case comparisons too... +---++ Regex QUERY | *Testname* | *Wanted* | *Actual* | *Result* | | asc-re2: /AbCdefgh/ =~ /\wCde\w/ | 1 | %QUERY{"preferences[name='PWTWA1'].value =~ '\wCde\w'"}% | :FWTEST: | | asc-re3: /AbCdefgh/ =~ /(?i)cDE/ | 1 | %QUERY{"preferences[name='PWTWA1'].value =~ '(?i)cDE'"}% | :FWTEST: | @@ -29,19 +29,19 @@ correctly. [[%TOPIC%?debugenableplugins=JQueryPlugin][Click here to reload thi | utf-re3: /ÌæĈąṁēńã/ =~ /(?i)ĉĄṀ/ | 1 | %QUERY{"preferences[name='PWTWL1'].value =~ '(?i)ĉĄṀ'"}% | :FWTEST: | | utf-re4: /ÌæĈąṁēńã/ =~ /[[:upper:]]/ | 1 | %QUERY{"preferences[name='PWTWL1'].value =~ '[[:upper:]]'"}% | :FWTEST: | ----++ QUERY lc/uc - fails +---++ QUERY lc/uc | *Testname* | *Wanted* | *Actual* | *Result* | | WTWA1: lc(%PWTWA1%) | %PLTWA1% | %QUERY{"lc(preferences[name='PWTWA1'].value)"}% | :FWTEST: | | WTWA1: uc(%PWTWA1%) | %PUTWA1% | %QUERY{"uc(preferences[name='PWTWA1'].value)"}% | :FWTEST: | | WTWL1: lc(%PWTWL1%) | %PLTWL1% | %QUERY{"lc(preferences[name='PWTWL1'].value)"}% | :FWTEST: | | WTWL1: uc(%PWTWL1%) | %PUTWL1% | %QUERY{"uc(preferences[name='PWTWL1'].value)"}% | :FWTEST: | ----++ SEARCH with regex containing character classes such \w +---++ SEARCH +With regex containing character classes such \w | *Testname* | *Wanted* | *Actual* | *Result* | -| search for =\dxx=
_$pattern_ regex charclass
=\w*xx\w*= | abc1xxdef,áčž2xxøđß,ábc3xxdeš,qwe4xxuio | %SEARCH{ "\dxx" type="regex" nonoise="on" topic="%TOPIC%" multiple="on" separator="," format="$pattern(.*?(\w*xx\w*).*)" }% | :FWTEST: | -| Besides the =\w= regex error, I actually don't understand why found the string =dxx= too. The regex contais the =\d= as a character class. |||| +| search for =\dxx=
_$pattern_ regex charclass
=\w*xx\w*= | abc1xxdef,abc1xxdef,áčž2xxøđß,ábc3xxdeš,qwe4xxuio | %SEARCH{ "\dxx" type="regex" nonoise="on" topic="%TOPIC%" multiple="on" separator="," format="$pattern(.*?(\w*\dxx\w*).*)" }% | :FWTEST: | ----++ Calc ERRORS (many string operation - examples (here are more)) +---++ CALC | *Test* | *Wanted* | *Actual* | *Result* | | %CALC{"$FIND(e, abcdefgh, 2)"}% | 5 | %CALC{"$FIND(e, abcdefgh, 2)"}% | :FWTEST: | | %CALC{"$FIND(ó, śõñļóțķụ, 2)"}% | 5 | %CALC{"$FIND(ó, śõñļóțķụ, 2)"}% | :FWTEST: | @@ -59,12 +59,11 @@ correctly. [[%TOPIC%?debugenableplugins=JQueryPlugin][Click here to reload thi | %CALC{"$TRANSLATE(cumi,ci,čí)"}% | čumí | %CALC{"$TRANSLATE(cumi,ci,čí)"}% | :FWTEST: | | %CALC{"$TRANSLATE(čumí,čí,ci)"}% | cumi | %CALC{"$TRANSLATE(čumí,čí,ci)"}% | :FWTEST: | ------- ----++ Non critical (but annoying) errors -The table sorting doesn't sorts by the accented characters. For -this will be need to use *utf8 + locale* based sorting, what is -correctly available only from perl v5.20 (using an special pragma -usable from v.5.16). +---++ SPACEOUT +=%SPACEOUT{"UśđáṁAŝșūṁėṉďáCøṅŝêĉṫēťúŕDøḻóř" separator=", "}%= +%SPACEOUT{"UśđáṁAŝșūṁėṉďáCøṅŝêĉṫēťúŕDøḻóř" separator=", "}% + +---++ TABLE and EDITTABLE Try: sort by the *Dolořęṁ* column. @@ -78,16 +77,6 @@ Try: sort by the *Dolořęṁ* column. | 5 | | Ábc | John | | | | *Nìsi* | *Aḻíqúid* | *Dolořęṁ* | *Fáčërę* | *Omņįs* | *Iṗsuṁ* | -The =%SPACEOUT{"SomeUtfText"}%= won't add spaces even if the Uppercases are ASCII. -=%SPACEOUT{"UśđáṁAŝșūṁėṉďáCøṅŝêĉṫēťúŕDøḻóř" separator=", "}%= -%SPACEOUT{"UśđáṁAŝșūṁėṉďáCøṅŝêĉṫēťúŕDøḻóř" separator=", "}% - ----+ Strange Errors ----++ EDITTABLE -The following *NOT utf8* table using the %EDITTABLE% (copy -from the System.EditRowPlugin), screws the normal Utf8 topic text. -(also somewhat changes the width of the previous table). - %EDITTABLE{ format="| row, -1 | text, 20, init | select, 1, not started, starting, ongoing, completed | radio, 3,:-),:-I,:-( | date, 20 |" changerows="on" quietsave="on"}% | *Nr* | *Project* | *State* | *Progress* | *Timestamp* | | 1 | Sliced yoghourt | completed | :-) | 26 Jun 2002 | diff --git a/core/lib/Foswiki.pm b/core/lib/Foswiki.pm index 2a02c5856c..a99a647995 100644 --- a/core/lib/Foswiki.pm +++ b/core/lib/Foswiki.pm @@ -147,6 +147,32 @@ sub _getLibDir { return $foswikiLibDir; } +# Character encoding/decoding stubs. Done so we can ovveride +# if necessary (e.g. on OSX we may want to monkey-patch in a +# NFC/NFD module) + +=begin TML + +---++ StaticMethod decode_utf8($octets) -> $unicode + +Decode a binary string of octets known to be encoded using UTF-8 into +perl characters (unicode). + +=cut + +*decode_utf8 = \&Encode::decode_utf8; + +=begin TML + +---++ StaticMethod encode_utf8($unicode) -> $octets + +Encode a perl character string into a binary string of octets +encoded using UTF-8. + +=cut + +*encode_utf8 = \&Encode::encode_utf8; + BEGIN { # First thing we do; make sure we print unicode errors @@ -793,7 +819,7 @@ BOGUS else { # Not available from the cache, or it has dirty areas require Compress::Zlib; - $text = Compress::Zlib::memGzip( Encode::encode_utf8($text) ); + $text = Compress::Zlib::memGzip( encode_utf8($text) ); } $binary_body = 1; } @@ -867,7 +893,7 @@ sub satisfiedByCache { $cache->renderDirtyAreas( \$text ); # dirty pages are cached in unicode - $text = Encode::encode_utf8($text); + $text = encode_utf8($text); } elsif ( $Foswiki::cfg{HttpCompress} ) { @@ -2151,7 +2177,7 @@ sub new { # bin/script?topic=WebPreferences;defaultweb=Sandbox my $defaultweb = $query->param('defaultweb') || $Foswiki::cfg{UsersWebName}; - my $webtopic = Encode::decode_utf8( $query->path_info() || '' ); + my $webtopic = decode_utf8( $query->path_info() || '' ); my $topicOverride = ''; my $topic = $query->param('topic'); if ( defined $topic ) { @@ -2928,7 +2954,7 @@ sub urlEncode { my $text = shift; # URLs work quite happily with %-encoded utf-8 characters - $text = Encode::encode_utf8($text); + $text = encode_utf8($text); $text =~ s{([^0-9a-zA-Z-_.:~!*#/])}{sprintf('%%%02x',ord($1))}ge; return $text; @@ -2949,7 +2975,7 @@ sub urlDecode { my $text = shift; $text =~ s/%([\da-f]{2})/chr(hex($1))/gei; - $text = Encode::decode_utf8($text); + $text = decode_utf8($text); return $text; } diff --git a/core/lib/Foswiki.spec b/core/lib/Foswiki.spec index 0994b30807..b8d6f5873e 100644 --- a/core/lib/Foswiki.spec +++ b/core/lib/Foswiki.spec @@ -1369,21 +1369,17 @@ $Foswiki::cfg{Store}{RememberChangesFor} = 2678400; # **SELECTCLASS Foswiki::Store::SearchAlgorithms::* LABEL="Search Algorithm" ** # This is the algorithm used to perform plain text (not query) searches. # Foswiki has two built-in search algorithms, both of which are designed to -# work with the default flat-file database. +# work with the default flat-file databases (RcsWrap, RcsLite, and PlainFile). # * The default 'Forking' algorithm, which forks a subprocess that # runs a 'grep' command, is recommended for Linux/Unix. # Forking may also work OK on Windows if you keep the directory path # to Foswiki very short. # * The 'PurePerl' algorithm, which is written in Perl and -# usually only used for native Windows installations where forking +# usually used on native Windows installations where forking # is not stable, due to limitations in the length of command lines. # On Linux/Unix you will be just fine with the 'Forking' implementation. # However if you find searches run very slowly, you may want to try a # different algorithm, which may work better on your configuration. -# For example, there is an alternative algorithm available from -# [[http://foswiki.org/Extensions/NativeSearchContrib][NativeSearchContrib]], -# that usually gives better performance with mod_perl and Speedy CGI, but -# requires root access to install. # # Other store implementations and indexing search engines (for example, # [[http://foswiki.org/Extensions/KinoSearchContrib][KinoSearchContrib]]) diff --git a/core/lib/Foswiki/Engine.pm b/core/lib/Foswiki/Engine.pm index bbca732b70..c2c478c9cc 100644 --- a/core/lib/Foswiki/Engine.pm +++ b/core/lib/Foswiki/Engine.pm @@ -192,12 +192,12 @@ sub prepareQueryParameters { if ( defined $value ) { $value =~ tr/+/ /; $value =~ s/%([0-9A-F]{2})/chr(hex($1))/gei; - $value = Encode::decode_utf8($value); + $value = Foswiki::decode_utf8($value); } if ( defined $param ) { $param =~ tr/+/ /; $param =~ s/%([0-9A-F]{2})/chr(hex($1))/gei; - $param = Encode::decode_utf8($param); + $param = Foswiki::decode_utf8($param); push( @{ $params{$param} }, $value ); push( @plist, $param ); } diff --git a/core/lib/Foswiki/Engine/CGI.pm b/core/lib/Foswiki/Engine/CGI.pm index caa06fe051..aebbfa5560 100644 --- a/core/lib/Foswiki/Engine/CGI.pm +++ b/core/lib/Foswiki/Engine/CGI.pm @@ -232,11 +232,12 @@ sub prepareBodyParameters { return unless $ENV{CONTENT_LENGTH}; my @plist = $this->{cgi}->multi_param(); foreach my $pname (@plist) { - my $upname = Encode::decode_utf8($pname); + my $upname = Foswiki::decode_utf8($pname); my @values; if ($Foswiki::UNICODE) { @values = - map { Encode::decode_utf8($_) } $this->{cgi}->multi_param($pname); + map { Foswiki::decode_utf8($_) } + $this->{cgi}->multi_param($pname); } else { @values = $this->{cgi}->multi_param($pname); @@ -256,7 +257,7 @@ sub prepareUploads { my %uploads; foreach my $key ( keys %{ $this->{uploads} } ) { my $fname = $this->{cgi}->param($key); - my $ufname = Encode::decode_utf8($fname); + my $ufname = Foswiki::decode_utf8($fname); $uploads{$ufname} = new Foswiki::Request::Upload( headers => $this->{cgi}->uploadInfo($fname), tmpname => $this->{cgi}->tmpFileName($fname), diff --git a/core/lib/Foswiki/I18N.pm b/core/lib/Foswiki/I18N.pm index d1a32059a6..ff3daa641e 100644 --- a/core/lib/Foswiki/I18N.pm +++ b/core/lib/Foswiki/I18N.pm @@ -332,7 +332,7 @@ sub _discover_languages { #use the cache, if available if ( open LANGUAGE, '<', "$Foswiki::cfg{WorkingDir}/languages.cache" ) { $cache_open = 1; - foreach my $line ( map { Encode::decode( 'utf-8', $_ ) } ) { + foreach my $line ( map { Foswiki::decode_utf8($_) } ) { my ( $key, $name ) = split( '=', $line ); # Filter on enabled languages @@ -351,7 +351,7 @@ sub _discover_languages { foreach my $tag ( available_languages() ) { my $h = Foswiki::I18N->get_handle($tag); my $name = eval { $h->maketext("_language_name") } or next; - print LANGUAGE Encode::encode_utf8("$tag=$name\n") if $cache_open; + print LANGUAGE Foswiki::encode_utf8("$tag=$name\n") if $cache_open; # Filter on enabled languages next diff --git a/core/lib/Foswiki/LoginManager.pm b/core/lib/Foswiki/LoginManager.pm index 3d7038f00d..cd27766c6c 100644 --- a/core/lib/Foswiki/LoginManager.pm +++ b/core/lib/Foswiki/LoginManager.pm @@ -632,7 +632,7 @@ sub redirectToLoggedOutUrl { ); #TODO: consider if we should risk passing on the urlparams on logout - my $path_info = Encode::decode_utf8( $session->{request}->path_info() ); + my $path_info = Foswiki::decode_utf8( $session->{request}->path_info() ); if ( my $topic = $session->{request}->param('topic') ) { #we should at least respect the ?topic= request my $topicRequest = Foswiki::Sandbox::untaintUnchecked($topic); diff --git a/core/lib/Foswiki/Net.pm b/core/lib/Foswiki/Net.pm index ffe07f4b86..691867b0e7 100644 --- a/core/lib/Foswiki/Net.pm +++ b/core/lib/Foswiki/Net.pm @@ -258,7 +258,7 @@ sub getExternalResource { if ( defined $options{content} ) { # Force body encoding to octets - $options{content} = Encode::encode_utf8( $options{content} ); + $options{content} = Foswiki::encode_utf8( $options{content} ); $req .= 'Content-length: ' . length( $options{content} ) . "\r\n"; } diff --git a/core/lib/Foswiki/PageCache.pm b/core/lib/Foswiki/PageCache.pm index 6a303c7ac0..d0e8f48bb1 100644 --- a/core/lib/Foswiki/PageCache.pm +++ b/core/lib/Foswiki/PageCache.pm @@ -243,7 +243,7 @@ sub cachePage { $data =~ s/([\t ]?)[ \t]*<\/?(nop|noautolink)\/?>/$1/gis; # clean pages are stored utf8-encoded, whether plaintext or zip - $data = Encode::encode_utf8($data); + $data = Foswiki::encode_utf8($data); if ( $Foswiki::cfg{HttpCompress} ) { # Cache compressed page diff --git a/core/lib/Foswiki/PageCache/DBI.pm b/core/lib/Foswiki/PageCache/DBI.pm index 82658f1c76..487cd000ad 100644 --- a/core/lib/Foswiki/PageCache/DBI.pm +++ b/core/lib/Foswiki/PageCache/DBI.pm @@ -137,7 +137,7 @@ sub setPageVariation { $variation->{md5} = Digest::MD5::md5_hex( - Encode::encode_utf8( $web . $topic . $variationKey ) ) + Foswiki::encode_utf8( $web . $topic . $variationKey ) ) unless defined $variation->{md5}; #writeDebug("INSERT topic $webTopic, variation=$variationKey"); @@ -288,7 +288,7 @@ sub deletePage { #writeDebug( "DELETE page $webTopic variation=" . $variationKey ); my $md5 = Digest::MD5::md5_hex( - Encode::encode_utf8( $web . $topic . $variationKey ) ); + Foswiki::encode_utf8( $web . $topic . $variationKey ) ); my $fileName = Foswiki::Sandbox::normalizeFileName( $this->{cacheDir} . '/' . $md5 ); diff --git a/core/lib/Foswiki/Request.pm b/core/lib/Foswiki/Request.pm index 56abbe448b..4d780ce066 100644 --- a/core/lib/Foswiki/Request.pm +++ b/core/lib/Foswiki/Request.pm @@ -487,7 +487,7 @@ name $name from utf8 to unicode. sub unicode_param { my ( $this, $key ) = @_; - return Encode::decode_utf8( $this->param($key) ); + return Foswiki::decode_utf8( $this->param($key) ); } =begin TML diff --git a/core/lib/Foswiki/Response.pm b/core/lib/Foswiki/Response.pm index 5511edd185..2bcbbc06fb 100644 --- a/core/lib/Foswiki/Response.pm +++ b/core/lib/Foswiki/Response.pm @@ -426,7 +426,7 @@ Add text content to the end of the body. Content may be unicode. sub print { my $this = shift; $this->{body} = '' unless defined $this->{body}; - $this->body( $this->{body} . Encode::encode_utf8( join( '', @_ ) ) ); + $this->body( $this->{body} . Foswiki::encode_utf8( join( '', @_ ) ) ); } =begin TML diff --git a/core/lib/Foswiki/Search.pm b/core/lib/Foswiki/Search.pm index fb7fdea46c..7b2361e0b9 100644 --- a/core/lib/Foswiki/Search.pm +++ b/core/lib/Foswiki/Search.pm @@ -341,7 +341,7 @@ sub searchWeb { # need to tell the Meta::query pager settings so it can optimise require Digest::MD5; my $string_id = $params{_RAW} || 'we had better not go there'; - $string_id = Encode::encode_utf8($string_id); + $string_id = Foswiki::encode_utf8($string_id); my $paging_ID = 'SEARCH' . Digest::MD5::md5_hex($string_id); $params{pager_urlparam_id} = $paging_ID; diff --git a/core/lib/Foswiki/Store.pm b/core/lib/Foswiki/Store.pm index 8921ee61a4..434fd7564d 100644 --- a/core/lib/Foswiki/Store.pm +++ b/core/lib/Foswiki/Store.pm @@ -235,6 +235,43 @@ sub getAttachmentURL { return $url; } +=begin TML + +---++ StaticMethod decode($octets) -> $unicode + +Utility function to decode a binary string of octets read from +the store and known known to be encoded using the +currently selected {Store}{Encoding} (or UTF-8 if none is set) +into perl characters (unicode). May die if $octets contains +an invalid byte sequence for the encoding. + +=cut + +sub decode { + return $_[0] unless defined $_[0]; + my $s = $_[0]; + return Encode::decode( $Foswiki::cfg{Store}{Encoding} || 'utf-8', + $s, Encode::FB_CROAK ); +} + +=begin TML + +---++ StaticMethod encode($unicode) -> $octets + +Utility function to encode a perl character string into +a string of octets encoded using the currently selected +{Store}{Encoding} (or UTF-8 if none is set). May die if +=$unicode= cannot be represented in the {Store}{Encoding}. + +=cut + +sub encode { + return $_[0] unless defined $_[0]; + my $s = $_[0]; + return Encode::encode( $Foswiki::cfg{Store}{Encoding} || 'utf-8', + $s, Encode::FB_CROAK ); +} + 1; __END__ # Comment out the above two lines (1; __END__) during development of a @@ -928,7 +965,7 @@ sub removeSpuriousLeases { __END__ Foswiki - The Free and Open Source Wiki, http://foswiki.org/ -Copyright (C) 2008-2010 Foswiki Contributors. Foswiki Contributors +Copyright (C) 2008-2015 Foswiki Contributors. Foswiki Contributors are listed in the AUTHORS file in the root of this distribution. NOTE: Please extend that file, not this notice. diff --git a/core/lib/Foswiki/Store/SearchAlgorithms/Forking.pm b/core/lib/Foswiki/Store/SearchAlgorithms/Forking.pm index e287dc3bb4..c664646667 100644 --- a/core/lib/Foswiki/Store/SearchAlgorithms/Forking.pm +++ b/core/lib/Foswiki/Store/SearchAlgorithms/Forking.pm @@ -165,8 +165,7 @@ sub _search { # throw Error::Simple( # "$program Grep for '$searchString' returned error") } - $matches .= - Encode::decode( $Foswiki::cfg{Store}{Encoding} || 'utf-8', $m ); + $matches .= Foswiki::Store::decode($m); } } my %seen; diff --git a/core/lib/Foswiki/Store/SearchAlgorithms/PurePerl.pm b/core/lib/Foswiki/Store/SearchAlgorithms/PurePerl.pm index bb739e626f..7b35711aad 100644 --- a/core/lib/Foswiki/Store/SearchAlgorithms/PurePerl.pm +++ b/core/lib/Foswiki/Store/SearchAlgorithms/PurePerl.pm @@ -95,12 +95,17 @@ sub _search { #TODO: need to BM if this is faster than doing it via an object in the MetaCache. my $file; - if ( open( $file, '<', "$sDir/$topic.txt" ) ) { + my $enc = $Foswiki::cfg{Store}{Encoding} || 'utf-8'; + if ( + open( + $file, "<:encoding($enc)", + Foswiki::Store::encode("$sDir/$topic.txt") + ) + ) + { while ( my $line = <$file> ) { if ( &$doMatch($line) ) { chomp($line); - Encode::decode( $Foswiki::cfg{Store}{Encoding} || 'utf-8', - $line, Encode::FB_PERLQQ ); push( @{ $seen{$webtopic} }, $line ); if ( $options->{files_without_match} ) { close($file); diff --git a/core/lib/Foswiki/UI.pm b/core/lib/Foswiki/UI.pm index 1552e58c4c..92004fcb29 100644 --- a/core/lib/Foswiki/UI.pm +++ b/core/lib/Foswiki/UI.pm @@ -449,7 +449,7 @@ sub _execute { $html .= CGI::h1( {}, 'Bad Request' ); $html .= CGI::p( {}, $e->{reason} ); $html .= CGI::end_html(); - $res->print( Encode::encode_utf8($html) ); + $res->print( Foswiki::encode_utf8($html) ); } $Foswiki::engine->finalizeError( $res, $session->{request} ); } @@ -467,7 +467,7 @@ sub _execute { if (DEBUG) { # output the full message and stacktrace to the browser - $res->print( Encode::encode_utf8( $e->stringify() ) ); + $res->print( Foswiki::encode_utf8( $e->stringify() ) ); } else { my $mess = $e->stringify(); @@ -483,7 +483,7 @@ sub _execute { # cut out pathnames from public announcement $mess =~ s#/[\w./]+#path#g unless DEBUG; $text .= $mess; - $res->print( Encode::encode_utf8($text) ); + $res->print( Foswiki::encode_utf8($text) ); } } otherwise { @@ -495,7 +495,7 @@ sub _execute { $res->print("Unspecified internal error\n\n"); if (DEBUG) { eval "require Data::Dumper"; - $res->print( Encode::encode_utf8( Data::Dumper::Dumper( \$e ) ) ); + $res->print( Data::Dumper::Dumper( \$e ) ); } }; $session->finish() if $session; diff --git a/core/lib/Foswiki/UI/Viewfile.pm b/core/lib/Foswiki/UI/Viewfile.pm index 393e280a18..098736b596 100644 --- a/core/lib/Foswiki/UI/Viewfile.pm +++ b/core/lib/Foswiki/UI/Viewfile.pm @@ -76,7 +76,7 @@ sub viewfile { # This is a standard path extended by the attachment name e.g. # /Web/Topic/Attachment.gif - $pathInfo = Encode::decode_utf8( $query->path_info() ); + $pathInfo = Foswiki::decode_utf8( $query->path_info() ); } # If we have path_info but no ?filename= diff --git a/core/lib/Foswiki/Users/HtPasswdUser.pm b/core/lib/Foswiki/Users/HtPasswdUser.pm index 59014eb7d0..9b54521f3c 100644 --- a/core/lib/Foswiki/Users/HtPasswdUser.pm +++ b/core/lib/Foswiki/Users/HtPasswdUser.pm @@ -534,7 +534,7 @@ sub encrypt { } my $encodedPassword = '{SHA}' - . Digest::SHA::sha1_base64( Encode::encode_utf8($passwd) ) . '='; + . Digest::SHA::sha1_base64( Foswiki::encode_utf8($passwd) ) . '='; # don't use chomp, it relies on $/ $encodedPassword =~ s/\s+$//; @@ -554,8 +554,8 @@ sub encrypt { $saltchars[ int( rand( $#saltchars + 1 ) ) ] . $saltchars[ int( rand( $#saltchars + 1 ) ) ]; } - return crypt( Encode::encode_utf8($passwd), - Encode::encode_utf8( substr( $salt, 0, 2 ) ) ); + return crypt( Foswiki::encode_utf8($passwd), + Foswiki::encode_utf8( substr( $salt, 0, 2 ) ) ); } elsif ( $enc eq 'md5' || $enc eq 'htdigest-md5' ) { @@ -563,7 +563,7 @@ sub encrypt { # SMELL: what does this do if we are using a htpasswd file? my $realm = $entry->{realm} || $Foswiki::cfg{AuthRealm}; my $toEncode = "$login:$realm:$passwd"; - return Digest::MD5::md5_hex( Encode::encode_utf8($toEncode) ); + return Digest::MD5::md5_hex( Foswiki::encode_utf8($toEncode) ); } elsif ( $enc eq 'apache-md5' ) { @@ -591,8 +591,9 @@ sub encrypt { ]; } } - return Crypt::PasswdMD5::apache_md5_crypt( Encode::encode_utf8($passwd), - Encode::encode_utf8( substr( $salt, 0, 14 ) ) ); + return Crypt::PasswdMD5::apache_md5_crypt( + Foswiki::encode_utf8($passwd), + Foswiki::encode_utf8( substr( $salt, 0, 14 ) ) ); } elsif ( $enc eq 'crypt-md5' ) { my $salt; @@ -617,12 +618,12 @@ sub encrypt { # crypt is not cross-plaform, so use Crypt::PasswdMD5 if it's available if ( $this->{APR} ) { return Crypt::PasswdMD5::unix_md5_crypt( - Encode::encode_utf8($passwd), - Encode::encode_utf8( substr( $salt, 0, 11 ) ) ); + Foswiki::encode_utf8($passwd), + Foswiki::encode_utf8( substr( $salt, 0, 11 ) ) ); } else { - return crypt( Encode::encode_utf8($passwd), - Encode::encode_utf8( substr( $salt, 0, 11 ) ) ); + return crypt( Foswiki::encode_utf8($passwd), + Foswiki::encode_utf8( substr( $salt, 0, 11 ) ) ); } } @@ -654,12 +655,13 @@ sub encrypt { } $salt = Crypt::Eksblowfish::Bcrypt::en_base64( - Encode::encode_utf8($salt) ); + Foswiki::encode_utf8($salt) ); $salt = '$2a$08$' . $salt; } $salt = substr( $salt, 0, 29 ); - return Crypt::Eksblowfish::Bcrypt::bcrypt( Encode::encode_utf8($passwd), - Encode::encode_utf8($salt) ); + return Crypt::Eksblowfish::Bcrypt::bcrypt( + Foswiki::encode_utf8($passwd), + Foswiki::encode_utf8($salt) ); } die 'Unsupported password encoding ' . $enc; } diff --git a/core/lib/Foswiki/Validation.pm b/core/lib/Foswiki/Validation.pm index 12cd3d4b38..5441c3278d 100644 --- a/core/lib/Foswiki/Validation.pm +++ b/core/lib/Foswiki/Validation.pm @@ -117,7 +117,7 @@ sub generateValidationKey { # Use scalar keys %$actions to ensure we generate a unique token # for each form on a page. my $nonce = Digest::MD5::md5_hex( - Encode::encode_utf8( + Foswiki::encode_utf8( $context . $cgis->id() . scalar( keys %$actions ) ) );