From 78e94326b16414e5159bec211223e2ec9b96e531 Mon Sep 17 00:00:00 2001 From: GeorgeClark Date: Thu, 23 Jun 2011 21:41:34 +0000 Subject: [PATCH] Item10905: Permit spaces in link query string Add unit tests for legacy mailto links with and without query strings. Fix auto test that expected ?query strings to work in legacy links. Add {AntiSpam}{EntityEncode} with default enabled. Email addresses will have all non-alphanumeric characters encoded to HTML hex entities. Revert the previous fix to this task that removed legacy support for [[mailto Link Text]] style links. Add exception for recognizing space delimited square bracket links. This format was deprecated in 2005. If a ? is detected in the URL part of the legacy format, then any spaces are assumed to be part of the query string. [[mailto:you@here.com?subject=test subject]] Before this code, the mailto link would be generated with link text of "subject". After this commit, subject becomes part of the query string and the entire url becomes the link text. git-svn-id: http://svn.foswiki.org/trunk@12040 0b4bb1d4-4e5a-0410-9cc4-b2b747904278 --- UnitTestContrib/test/unit/FormattingTests.pm | 79 ++++++++++++++++++- .../data/TestCases/TestCaseAutoFormatting.txt | 4 +- core/lib/Foswiki.spec | 17 ++-- core/lib/Foswiki/Render.pm | 35 +++++--- 4 files changed, 117 insertions(+), 18 deletions(-) diff --git a/UnitTestContrib/test/unit/FormattingTests.pm b/UnitTestContrib/test/unit/FormattingTests.pm index 635417efc1..91249f8286 100644 --- a/UnitTestContrib/test/unit/FormattingTests.pm +++ b/UnitTestContrib/test/unit/FormattingTests.pm @@ -38,18 +38,23 @@ sub set_up { $topicObject->save(); $Foswiki::cfg{AntiSpam}{RobotsAreWelcome} = 1; $Foswiki::cfg{AntiSpam}{EmailPadding} = 'STUFFED'; + $Foswiki::cfg{AntiSpam}{EntityEncode} = 1; $Foswiki::cfg{AllowInlineScript} = 1; } # This formats the text up to immediately before s are removed, so we # can see the nops. sub do_test { - my ( $this, $expected, $actual ) = @_; + my ( $this, $expected, $actual, $noHtml ) = @_; my $session = $this->{session}; $this->{test_topicObject}->expandMacros($actual); $actual = $this->{test_topicObject}->renderTML($actual); - $this->assert_html_equals( $expected, $actual ); + if ($noHtml) { + $this->assert_equals( $expected, $actual ); + } else { + $this->assert_html_equals( $expected, $actual ); + } } # current topic WikiWord @@ -229,6 +234,64 @@ ACTUAL $this->do_test( $expected, $actual ); } +# [[mailtoUrl Alt TextAlt]] +sub test_squabbedMailtoUrlAltTextOldUndocumentedUse { + my $this = shift; + my $expected = <Alt TextAlt +EXPECTED + + my $actual = <do_test( $expected, $actual, 1); +} + +# [[mailtoUrl?with params]] +sub test_squabbedMailtoUrlWithSpaces { + my $this = shift; + my $expected = <mailto:user@exampleSTUFFED.com?subject=asdf; asdf&body=asdf +EXPECTED + + my $actual = <do_test( $expected, $actual, 1); +} + +# [[mailtoUrl?with params][Link text]] +sub test_squabbedMailtoUrlWithSpacesLinkText { + my $this = shift; + my $expected = <Link text +EXPECTED + + my $actual = <do_test( $expected, $actual, 1); +} + +# [[mailtoUrl?with parms]] +# - The only entities that should be encoded are & and spaces +sub test_squabbedMailtoUrlWithSpacesNotEncoded { + my $this = shift; + $Foswiki::cfg{AntiSpam}{EntityEncode} = 0; + my $expected = <mailto:user\@exampleSTUFFED.com?subject=asdf; asdf&body=asdf +EXPECTED + + my $actual = <do_test( $expected, $actual, 1); +} + # [[Web.WikiWord]] sub test_squabbedWebWikiword_params { my $this = shift; @@ -869,6 +932,18 @@ ACTUAL $this->do_test( $expected, $actual ); } +sub test_externalLinkWithSpacedQuery { + my $this = shift; + my $expected = <topic +EXPECTED + + my $actual = <do_test( $expected, $actual ); +} + sub test_internalLinkWithSpacedUrl { my $this = shift; my $expected = <Subject_only Subject only Subject only +Embedded Spaces ---+++ Actual [[mailto:a@z.com Mail]] -[[mailto:?subject=Hi Hi]] +[[mailto:?subject=Hi][Hi]] a@b.com [[mailto:a@b.com][Mail]] [[mailto:a@b.com][Mail link]] [[mailto:?subject=Hi][Subject_only]] [[mailto:?subject=Hi][Subject only]] Subject only +[[mailto:?subject=With spaces&body=Spaces too][Embedded Spaces]] ---++ Wiki Word Links diff --git a/core/lib/Foswiki.spec b/core/lib/Foswiki.spec index 6834a29479..9329ef3f1c 100644 --- a/core/lib/Foswiki.spec +++ b/core/lib/Foswiki.spec @@ -573,7 +573,7 @@ $Foswiki::cfg{SafeEnvPath} = ''; # and %QUERY{}%. Extensions can push into this array to extend the set. This is done as # a filter in because while the bulk of configuration items are quite innocent, # it's better to be a bit paranoid. -$Foswiki::cfg{AccessibleCFG} = [ '{ScriptSuffix}', '{LoginManager}', '{AuthScripts}', '{LoginNameFilterIn}', '{AdminUserLogin}', '{AdminUserWikiName}', '{SuperAdminGroup}', '{UsersTopicName}', '{AuthRealm}', '{MinPasswordLength}', '{Register}{AllowLoginName}', '{Register}{EnableNewUserRegistration}', '{Register}{NeedVerification}', '{Register}{RegistrationAgentWikiName}', '{AllowInlineScript}', '{DenyDotDotInclude}', '{UploadFilter}', '{NameFilter}', '{AccessibleCFG}', '{AntiSpam}{EmailPadding}', '{AntiSpam}{HideUserDetails}', '{AntiSpam}{RobotsAreWelcome}', '{Stats}{TopViews}', '{Stats}{TopContrib}', '{Stats}{TopicName}', '{UserInterfaceInternationalisation}', '{UseLocale}', '{Site}{Locale}', '{Site}{CharSet}', '{DisplayTimeValues}', '{DefaultDateFormat}', '{Site}{LocaleRegexes}', '{UpperNational}', '{LowerNational}', '{PluralToSingular}', '{EnableHierarchicalWebs}', '{WebMasterEmail}', '{WebMasterName}', '{NotifyTopicName}', '{SystemWebName}', '{TrashWebName}', '{SitePrefsTopicName}', '{LocalSitePreferences}', '{HomeTopicName}', '{WebPrefsTopicName}', '{UsersWebName}', '{TemplatePath}', '{LinkProtocolPattern}', '{NumberOfRevisions}', '{MaxRevisionsInADiff}', '{ReplaceIfEditedAgainWithin}', '{LeaseLength}', '{LeaseLengthLessForceful}', '{Plugins}{WebSearchPath}', '{PluginsOrder}', '{Cache}{Enabled}', '{Validation}{Method}', '{Register}{DisablePasswordConfirmation}' ]; +$Foswiki::cfg{AccessibleCFG} = [ '{ScriptSuffix}', '{LoginManager}', '{AuthScripts}', '{LoginNameFilterIn}', '{AdminUserLogin}', '{AdminUserWikiName}', '{SuperAdminGroup}', '{UsersTopicName}', '{AuthRealm}', '{MinPasswordLength}', '{Register}{AllowLoginName}', '{Register}{EnableNewUserRegistration}', '{Register}{NeedVerification}', '{Register}{RegistrationAgentWikiName}', '{AllowInlineScript}', '{DenyDotDotInclude}', '{UploadFilter}', '{NameFilter}', '{AccessibleCFG}', '{AntiSpam}{EmailPadding}', '{AntiSpam}{EntityEncode}','{AntiSpam}{HideUserDetails}', '{AntiSpam}{RobotsAreWelcome}', '{Stats}{TopViews}', '{Stats}{TopContrib}', '{Stats}{TopicName}', '{UserInterfaceInternationalisation}', '{UseLocale}', '{Site}{Locale}', '{Site}{CharSet}', '{DisplayTimeValues}', '{DefaultDateFormat}', '{Site}{LocaleRegexes}', '{UpperNational}', '{LowerNational}', '{PluralToSingular}', '{EnableHierarchicalWebs}', '{WebMasterEmail}', '{WebMasterName}', '{NotifyTopicName}', '{SystemWebName}', '{TrashWebName}', '{SitePrefsTopicName}', '{LocalSitePreferences}', '{HomeTopicName}', '{WebPrefsTopicName}', '{UsersWebName}', '{TemplatePath}', '{LinkProtocolPattern}', '{NumberOfRevisions}', '{MaxRevisionsInADiff}', '{ReplaceIfEditedAgainWithin}', '{LeaseLength}', '{LeaseLengthLessForceful}', '{Plugins}{WebSearchPath}', '{PluginsOrder}', '{Cache}{Enabled}', '{Validation}{Method}', '{Register}{DisablePasswordConfirmation}' ]; # **BOOLEAN** # Allow %INCLUDE of URLs. This is disabled by default, because it is possible @@ -683,15 +683,14 @@ $Foswiki::cfg{AccessibleENV} = '^(HTTP_\w+|REMOTE_\w+|SERVER_\w+|REQUEST_\w+|MOD # AntiWikiSpamPlugin

# **STRING 50** -# Text added to email addresses to prevent spambots from grabbing +# Text added to e-mail addresses to prevent spambots from grabbing # addresses e.g. set to 'NOSPAM' to get fred@user.co.ru -# rendered as fred@user.co.NOSPAM.ru +# rendered as fred@user.coNOSPAM.ru $Foswiki::cfg{AntiSpam}{EmailPadding} = ''; # **BOOLEAN** # Normally Foswiki stores the user's sensitive information (such as their e-mail -# address) in a database out of public view. It also obfuscates e-mail -# addresses displayed in the browser. This is to help prevent e-mail +# address) in a database out of public view. This is to help prevent e-mail # spam and identity fraud.
# If that is not a risk for you (e.g. you are behind a firewall) and you # are happy for e-mails to be made public to all Foswiki users, @@ -700,6 +699,14 @@ $Foswiki::cfg{AntiSpam}{EmailPadding} = ''; # %USERINFO is ignored. $Foswiki::cfg{AntiSpam}{HideUserDetails} = $TRUE; +# **BOOLEAN** +# By default Foswiki will also manipulate e-mail addresses to reduce the harvesting +# of e-mail addresses. Foswiki will encode all non-alphanumeric characters to their +# HTML entity equivalent. e.g. @ becomes &#64; This is not completely effective, +# however it can prevent some primitive spambots from seeing the addresses. +# More advanced bots will still collect addresses. +$Foswiki::cfg{AntiSpam}{EntityEncode} = $TRUE; + # **BOOLEAN** # By default, Foswiki doesn't do anything to stop robots, such as those used # by search engines, from visiting "normal view" pages. diff --git a/core/lib/Foswiki/Render.pm b/core/lib/Foswiki/Render.pm index 5206458d05..e51c308b9b 100644 --- a/core/lib/Foswiki/Render.pm +++ b/core/lib/Foswiki/Render.pm @@ -43,6 +43,9 @@ our $TRMARK = "is\1all\1th"; # General purpose marker used to mark escapes inthe text; for example, we # use it to mark hoisted blocks, such as verbatim blocks. our $REMARKER = "\0"; +# Optional End marker for escapes where the default end character ; also +# must be removed. Used for email anti-spam encoding. +our $REEND = "\1"; # Default format for a link to a non-existant topic use constant DEFAULT_NEWLINKFORMAT => <<'NLF'; @@ -771,15 +774,21 @@ sub _handleSquareBracketedLink { # Explicit external [[http://$link]] or [[http://$link][$text]] # or explicit absolute [[/$link]] or [[/$link][$text]] - - if ( !defined($text) && $link =~ /^(\S+)\s+(.*)$/ && $link !~ /^mailto:/ ) { - - # Legacy case of '[[URL anchor display text]]' link - # implicit untaint is OK as we are just recycling topic content - $link = $1; - $text = _escapeAutoLinks($2); + if ( !defined($text) && $link =~ /^(\S+)\s+(.*)$/ ) { + + my $candidateLink = $1; + my $candidateText = $2; + # If the URL portion contains a ? indicating query parameters then + # the spaces are possibly embedded in the query string, so don't + # use the legacy format. + if ( $candidateLink !~ m/\?/ ) { + + # Legacy case of '[[URL anchor display text]]' link + # implicit untaint is OK as we are just recycling topic content + $link = $candidateLink; + $text = _escapeAutoLinks($candidateText); + } } - return _externalLink( $this, $link, $text ); } @@ -862,13 +871,19 @@ sub _externalLink { /$1$Foswiki::cfg{AntiSpam}{EmailPadding}$2/x; } } - if ( $Foswiki::cfg{AntiSpam}{HideUserDetails} ) { + if ( $Foswiki::cfg{AntiSpam}{EntityEncode} ) { # Much harder obfuscation scheme. For link text we only encode '@' # See also http://develop.twiki.org/~twiki4/cgi-bin/view/Bugs/Item2928 # and http://develop.twiki.org/~twiki4/cgi-bin/view/Bugs/Item3430 # before touching this - $url =~ s/(\W)/'&#'.ord($1).';'/ge; + # Note: & is already encoded, so don't encode any entities + # See http://foswiki.org/Tasks/Item10905 + $url =~ s/&(\w+);/$REMARKER$1$REEND/g; # "&abc;" + $url =~ s/&(#x?[0-9a-f]+);/$REMARKER$1$REEND/gi; # "{" + $url =~ s/([^\w$REMARKER$REEND])/'&#'.ord($1).';'/ge; + $url =~ s/$REMARKER(#x?[0-9a-f]+)$REEND/&$1;/goi; + $url =~ s/$REMARKER(\w+)$REEND/&$1;/go; if ($text) { $text =~ s/\@/'&#'.ord('@').';'/ge; }