Permalink
Browse files

Item9973: Merge over work from trunk @ Foswikirev:10344

This also incorporates work from ItemItem10171. We still have problems
with attachments using international charsets but the situation has
improved, especially if you're on utf8. Will continue Item9973 in a new
task.


git-svn-id: http://svn.foswiki.org/branches/Release01x01@10486 0b4bb1d4-4e5a-0410-9cc4-b2b747904278
  • Loading branch information...
PaulHarvey PaulHarvey
PaulHarvey authored and PaulHarvey committed Jan 6, 2011
1 parent 3aabcc6 commit 39cf6785c11ed47609a17d49717191e44948e88e
@@ -187,8 +187,9 @@ Many thanks to the following sponsors for supporting this work:
| Version: | %$VERSION% |
| Release: | %$RELEASE% |
| Change History: | |
| ? | Foswikitask:Item10048: Try to use Macros in the src URLs of images with titles |
| ? | Foswikitask:Item2174: Protect line breaks within paragraphs in TML |
| 26 Nov 2010 | Foswikitask:Item2174: Protect line breaks within paragraphs in TML <br/> \
Foswikitask:Item10048: Try to use Macros in the src URLs of images with titles <br/> \
Foswikitask:Item9973: Fix attachments REST handler to deal with topics named with international characters |
| 04 Oct 2010 | Foswikitask:Item1391: Protect div and span tags with =style= attributes |
| 28 Jun 2010 | Foswikitask:Item761, Foswikitask:Item2311, Foswikitask:Item5990, Foswikitask:Item9170: Fix conversion between character encodings. Any characters may be entered in the WYSIWYG editor, regardless of the site's encoding. Where possible, they are converted to the site encoding, otherwise they become entities. <br />\
Foswikitask:Item2254: Fix cursor-movement problems on Mozilla browsers (introduced by Foswikitask:Item1798)<br/>\
@@ -113,23 +113,26 @@ sub convert {
# Convert (safe) named entities back to the
# site charset. Numeric entities are mapped straight to the
# corresponding code point unless their value overflow.
# HTML::Entities::_decode_entities converts numeric entities
# HTML::Entities::_decode_entities converts numeric entities
# to Unicode codepoints, so first convert the text to Unicode
# characters
if ( WC::encoding() =~ /^utf-?8/ ) {
# text is already UTF-8, so just decode
$text = Encode::decode_utf8($text);
}
else {
# convert to unicode codepoints
$text = Encode::decode(WC::encoding(), $text);
$text = Encode::decode( WC::encoding(), $text );
}
# $text is now Unicode characters
#print STDERR "unicoded [". debugEncode($text). "]\n\n";
# Make sure that & < > ' and " remain encoded, because the parser depends
# on it. The safe-entities does not include the corresponding named
# entities, so convert numeric entities for these characters to the named
# entities, so convert numeric entities for these characters to the named
# entity.
$text =~ s/\&\#38;/\&amp;/go;
$text =~ s/\&\#x26;/\&amp;/goi;
@@ -141,9 +144,11 @@ sub convert {
$text =~ s/\&\#x27;/\&apos;/goi;
$text =~ s/\&\#34;/\&quot;/go;
$text =~ s/\&\#x22;/\&quot;/goi;
$text =~ s/\&\#160;/\&nbsp;/goi;
require HTML::Entities;
HTML::Entities::_decode_entities( $text, WC::safeEntities() );
#print STDERR "decodedent[". debugEncode($text). "]\n\n";
# HTML::Entities::_decode_entities is NOT aware of the site charset
@@ -152,16 +157,19 @@ sub convert {
# site character set cannot represent them.
# Convert them back to entities:
WC::convertNotRepresentabletoEntity($text);
#print STDERR "notrep2ent[". debugEncode($text). "]\n\n";
# $text is now Unicode characters that are representable
# in the site charset. Convert to the site charset:
if ( WC::encoding() =~ /^utf-?8/ ) {
# nothing to do, already in unicode
}
else {
$text = Encode::encode(WC::encoding(), $text);
$text = Encode::encode( WC::encoding(), $text );
}
#print STDERR "sitechrset[". debugEncode($text). "]\n\n";
# get rid of nasties
@@ -182,6 +190,7 @@ sub convert {
# If the site charset is UTF8, we need to recode
if ( WC::encoding() =~ /^utf-?8/ ) {
$text = Encode::encode_utf8($text);
#print STDERR "re-encoded[". debugEncode($text). "]\n\n";
}
@@ -59,6 +59,7 @@ sub generate {
unless ( $options & $WC::KEEP_ENTITIES ) {
$t =~ s/&($text_entities_re);/chr($text_entities{$1})/ego;
$t =~ s/&nbsp;/$WC::NBSP/g;
$t =~ s/&#160;/$WC::NBSP/g;
}
return ( 0, $t );
}
@@ -33,12 +33,12 @@ use Foswiki::Plugins::WysiwygPlugin::Constants;
use Foswiki::Plugins::WysiwygPlugin::HTML2TML::WC;
my %jqueryChiliClass = map { $_ => 1 }
qw( cplusplus csharp css bash delphi html java js
lotusscript php-f php sql tml );
qw( cplusplus csharp css bash delphi html java js
lotusscript php-f php sql tml );
my %tml2htmlClass = map { $_ => 1 }
qw( WYSIWYG_PROTECTED WYSIWYG_STICKY TMLverbatim WYSIWYG_LINK
TMLhtml WYSIWYG_HIDDENWHITESPACE );
qw( WYSIWYG_PROTECTED WYSIWYG_STICKY TMLverbatim WYSIWYG_LINK
TMLhtml WYSIWYG_HIDDENWHITESPACE );
=pod
@@ -77,7 +77,7 @@ sub stringify {
foreach my $attr ( sort keys %{ $this->{attrs} } ) {
$r .= " " . $attr . "='" . $this->{attrs}->{$attr} . "'";
}
$r .= ' /' if $WC::SELFCLOSING{ lc($this->{tag}) };
$r .= ' /' if $WC::SELFCLOSING{ lc( $this->{tag} ) };
$r .= '>';
}
if ($shallow) {
@@ -90,7 +90,7 @@ sub stringify {
$kid = $kid->{next};
}
}
if ( $this->{tag} and not $WC::SELFCLOSING{ lc($this->{tag}) } ) {
if ( $this->{tag} and not $WC::SELFCLOSING{ lc( $this->{tag} ) } ) {
$r .= '</' . $this->{tag} . '>';
}
return $r;
@@ -346,6 +346,7 @@ s/$WC::CHECKw(($WC::PON|$WC::POFF)?[$WC::CHECKn$WC::CHECKs$WC::NBSP $WC::NBBR])/
unless ($protect) {
$tml =~ s/<br( \/)?>\n/\n/g;
}
#print STDERR WC::debugEncode($before);
#print STDERR " -> '",WC::debugEncode($tml),"'\n";
$text .= $tml;
@@ -494,7 +495,7 @@ sub generate {
}
if ( $this->hasClass('TMLhtml') ) {
return $this->_defaultTag($options & ~$WC::VERY_CLEAN);
return $this->_defaultTag( $options & ~$WC::VERY_CLEAN );
}
my $tag = $this->{tag};
@@ -618,26 +619,27 @@ sub _htmlParams {
# Sort the attributes when converting back to TML
# so that the conversion is deterministic
ATTR: for my $k ( sort keys %$attrs ) {
ATTR: for my $k ( sort keys %$attrs ) {
next ATTR unless $k;
my $v = $attrs->{$k};
if ( $k eq 'class' ) {
my @classes;
$v =~ s/^\s*(.*?)\s*$/$1/;
CLASS: for my $class (split /\s+/, $v) {
CLASS: for my $class ( split /\s+/, $v ) {
next CLASS unless $class =~ /\S/;
next CLASS if $tml2htmlClass{$class};
# if cleaning aggressively, remove class attributes
# except for the JQuery "Chili" classes
next CLASS if ( $options & $WC::VERY_CLEAN
next CLASS
if ( $options & $WC::VERY_CLEAN
and not $jqueryChiliClass{$class} );
push @classes, $class;
}
next ATTR unless @classes;
$v = join(' ', @classes);
$v = join( ' ', @classes );
}
my $q = $v =~ /"/ ? "'" : '"';
push( @params, $k . '=' . $q . $v . $q );
@@ -952,6 +954,7 @@ sub _isConvertableTableRow {
}
}
$text =~ s/&nbsp;/$WC::NBSP/g;
$text =~ s/&#160;/$WC::NBSP/g;
#if (--$ignoreCols > 0) {
# # colspanned
@@ -1124,6 +1127,7 @@ sub _emphasis {
# Remove whitespace from either side of the contents, retaining the
# whitespace
$contents =~ s/&nbsp;/$WC::NBSP/go;
$contents =~ s/&#160;/$WC::NBSP/go;
$contents =~ /^($WC::WS)(.*?)($WC::WS)$/;
my ( $pre, $post ) = ( $1, $3 );
$contents = $2;
@@ -1515,13 +1519,15 @@ sub _handleHR {
return ( $f, '<hr />' . $kids ) if ( $options & $WC::NO_BLOCK_TML );
my $dashes = 3;
if ( $this->{attrs}->{style} and
$this->{attrs}->{style} =~ s/\bnumdashes\s*:\s*(\d+)\b// ) {
if ( $this->{attrs}->{style}
and $this->{attrs}->{style} =~ s/\bnumdashes\s*:\s*(\d+)\b// )
{
$dashes = $1;
$dashes = 3 if $dashes < 3;
$dashes = 160 if $dashes > 160; # Filter out probably-bad data
$dashes = 160 if $dashes > 160; # Filter out probably-bad data
}
return ( $f | $WC::BLOCK_TML, $WC::CHECKn . ('-' x $dashes) . $WC::CHECKn . $kids );
return ( $f | $WC::BLOCK_TML,
$WC::CHECKn . ( '-' x $dashes ) . $WC::CHECKn . $kids );
}
sub _handleHTML { return _flatten(@_); }
@@ -1687,39 +1693,55 @@ sub _handleSPAN {
}
if ( _removeClass( \%atts, 'WYSIWYG_HIDDENWHITESPACE' ) ) {
# This regular expression ensures the encoded whitespace is valid.
# The limit on the number of digits will ensure that the numbers are reasonable.
if ( $atts{style} and $atts{style} =~ s/\bencoded\s*:\s*(['"])((?:b|n|t\d{1,2}|s\d{1,3})+)\1;?// ) {
# This regular expression ensures the encoded whitespace is valid.
# The limit on the number of digits will ensure that the numbers are reasonable.
if ( $atts{style}
and $atts{style} =~
s/\bencoded\s*:\s*(['"])((?:b|n|t\d{1,2}|s\d{1,3})+)\1;?// )
{
my $whitespace = $2;
#print STDERR "'$whitespace' -> ";
$whitespace =~ s/b/\\/g;
$whitespace =~ s/n/$WC::NBBR/g;
$whitespace =~ s/t(\d+)/'\t' x $1/ge;
$whitespace =~ s/s(\d+)/$WC::NBSP x $1/ge;
#print STDERR "'$whitespace'\n";
#require Data::Dumper;
my ( $f, $kids ) = $this->_flatten($options | $WC::KEEP_WS | $WC::KEEP_ENTITIES);
#die Data::Dumper::Dumper($kids);
my ( $f, $kids ) =
$this->_flatten( $options | $WC::KEEP_WS | $WC::KEEP_ENTITIES );
#die Data::Dumper::Dumper($kids);
if ( $kids eq ' ' ) {
# The space was not changed
# So restore the encoded whitespace
return ( $f, $whitespace );
}
elsif ( length($kids) == 0 ) {
# The user deleted the space
# So return blank
return ( 0, '' );
}
#else {die "'".ord($kids)."'";}if(1){}
elsif ( 0 and ($kids eq '&nbsp;' or $kids eq chr(160)) ) { # SMELL: Firefox-specific
# This was probably inserted by Firefox after the user deleted the space.
# So return blank
elsif ( 0
and
( $kids eq '&nbsp;' or $kids eq '&#160;' or $kids eq chr(160) )
)
{ # SMELL: Firefox-specific
# This was probably inserted by Firefox after the user deleted the space.
# So return blank
return ( 0, '' );
}
else {
# The user entered some new text
# Return the combination.
# Assume that a leading space corresponds to the encoded whitespace
# The user entered some new text
# Return the combination.
# Assume that a leading space corresponds to the encoded whitespace
$kids =~ s/^ //;
return ( $f, $whitespace . $kids );
}
Oops, something went wrong.

0 comments on commit 39cf678

Please sign in to comment.