From 4a13ca0b6e79fab0a8be951928b05ede1df05148 Mon Sep 17 00:00:00 2001 From: Michael Smith Date: Fri, 28 Mar 2014 09:50:50 -0600 Subject: [PATCH] Handle encoding in HTMLEntities --- lib/DDG/Goodie/HTMLEntities.pm | 34 ++++++++++++++++++++++++---------- t/HTMLEntities.t | 9 ++++++--- 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/lib/DDG/Goodie/HTMLEntities.pm b/lib/DDG/Goodie/HTMLEntities.pm index b0a3e1823a7..46c92315f9c 100644 --- a/lib/DDG/Goodie/HTMLEntities.pm +++ b/lib/DDG/Goodie/HTMLEntities.pm @@ -9,7 +9,10 @@ zci answer_type => 'html_entity'; zci is_cached => 1; -triggers query_nowhitespace => qr/^(?:html|entity|htmlentity)?(&#?\w+;?)$/i; +triggers query_nowhitespace => qr/^(?: + (?:html|entity|htmlentity|htmldecode)?(&\#?\w+;?) | + html(?:entity|encode)?(.{1,50}) + )$/ix; primary_example_queries '!'; secondary_example_queries 'html entity &'; @@ -22,26 +25,37 @@ attribution twitter => 'crazedpsyc', cpan => 'CRZEDPSYC' ; handle matches => sub { - my $entity = $_[0]; - $entity =~ s/;?$/;/; # append a semicolon (some entities like &mdash do not work without one) - my $decoded = decode_entities($entity); - my $decoded_html = $decoded; - my $decimal = ord($decoded); + my ($entity, $decoded) = @_; + my $html; + my $decimal; + my $encoding = 0; + if (defined $entity) { # decoding + $entity =~ s/;?$/;/; # append a semicolon (some entities like &mdash do not work without one) + $decoded = decode_entities($entity); + $html = $entity; + } else { # encoding + $encoding = 1; + $entity = encode_entities($decoded); + $html = encode_entities($entity); + } + $decimal = ord($decoded); my $info = charinfo($decimal); if( $$info{name} eq '' ) { - $decoded_html = "Unicode control character (no visual representation)"; + $html = "Unicode control character (no visual representation)"; $decoded = "Unicode control character (no visual representation)"; } elsif(substr($$info{category},0,1) eq 'C') { $decoded = "Special character (no visual representation)"; - $decoded_html = "Special character (no visual representation)"; + $html = "Special character (no visual representation)"; } my $hex = sprintf("%04x", $decimal); - return "Decoded HTML Entity: $decoded, decimal: $decimal, hexadecimal: $hex", - html => "Decoded HTML Entity: $decoded_html, decimal: $decimal, hexadecimal: $hex" unless $entity eq $decoded; # decode_entities will return the input if it cannot be decoded + my $label = $encoding ? "Encoded HTML: " : "Decoded HTML Entity: "; + # decode_entities will return the input if it cannot be decoded + return $label . ($encoding ? "$entity" : "$decoded, decimal: $decimal, hexadecimal: $hex"), + html => $label.$html.($encoding ? "" : ", decimal: $decimal, hexadecimal: $hex") unless $entity eq $decoded; return; }; diff --git a/t/HTMLEntities.t b/t/HTMLEntities.t index fa8be755bf7..50405209bff 100644 --- a/t/HTMLEntities.t +++ b/t/HTMLEntities.t @@ -12,9 +12,12 @@ ddg_goodie_test( [qw( DDG::Goodie::HTMLEntities )], - '!' => test_zci("Decoded HTML Entity: !, decimal: 33, hexadecimal: 0021", html => "Decoded HTML Entity: !, decimal: 33, hexadecimal: 0021"), - '!' => test_zci("Decoded HTML Entity: !, decimal: 33, hexadecimal: 0021", html => "Decoded HTML Entity: !, decimal: 33, hexadecimal: 0021"), - 'html entity &' => test_zci("Decoded HTML Entity: &, decimal: 38, hexadecimal: 0026", html => "Decoded HTML Entity: &, decimal: 38, hexadecimal: 0026"), + '!' => test_zci("Decoded HTML Entity: !, decimal: 33, hexadecimal: 0021", html => "Decoded HTML Entity: !, decimal: 33, hexadecimal: 0021"), + '!' => test_zci("Decoded HTML Entity: !, decimal: 33, hexadecimal: 0021", html => "Decoded HTML Entity: !, decimal: 33, hexadecimal: 0021"), + 'html entity &' => test_zci("Decoded HTML Entity: &, decimal: 38, hexadecimal: 0026", html => "Decoded HTML Entity: &, decimal: 38, hexadecimal: 0026"), + 'html encode ' => test_zci("Encoded HTML: <foo>", html => "Encoded HTML: &lt;foo&gt;"), + 'html encode amp;' => undef, + 'html encode &' => test_zci("Encoded HTML: &", html => "Encoded HTML: &amp;"), ); done_testing;