Skip to content
This repository
Fetching contributors…

Octocat-spinner-32-eaf2f5

Cannot retrieve contributors at this time

file 63 lines (52 sloc) 2.134 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
package DDG::Goodie::HTMLEntities;
# ABSTRACT: Decode HTML Entities.

use DDG::Goodie;
use HTML::Entities;
use Unicode::UCD 'charinfo';

zci answer_type => 'html_entity';

zci is_cached => 1;

triggers query_nowhitespace => qr/^(?:
(?:html|entity|htmlentity|htmldecode)?(&\#?\w+;?) |
html(?:entity|encode)?(.{1,50})
)$/ix;

primary_example_queries '!';
secondary_example_queries 'html entity &';
description 'decode HTML entities';
name 'HTMLEntities';
code_url 'https://github.com/duckduckgo/zeroclickinfo-goodies/blob/master/lib/DDG/Goodie/HTMLEntities.pm';
category 'computing_tools';
topics 'programming';
attribution twitter => 'crazedpsyc',
            cpan => 'CRZEDPSYC' ;

handle matches => sub {
    my ($entity, $decoded) = @_;
    my $html;
    my $decimal;
    my $encoding = 0;
    if (defined $entity) { # decoding
        $entity =~ s/;?$/;/; # append a semicolon (some entities like &mdash do not work without one)
        $decoded = decode_entities($entity);
        $html = $entity;
    } else { # encoding
        $encoding = 1;
        $entity = encode_entities($decoded);
        $html = encode_entities($entity);
    }

    $decimal = ord($decoded);
    my $info = charinfo($decimal);
    if( $$info{name} eq '<control>' ) {
        $html = "<a href='https://en.wikipedia.org/wiki/Unicode_control_characters'>Unicode control character</a> (no visual representation)";
        $decoded = "Unicode control character (no visual representation)";
    }
    elsif(substr($$info{category},0,1) eq 'C') {
        $decoded = "Special character (no visual representation)";
        $html = "Special character (no visual representation)";
    }
    

    my $hex = sprintf("%04x", $decimal);
    my $label = $encoding ? "Encoded HTML: " : "Decoded HTML Entity: ";
    # decode_entities will return the input if it cannot be decoded
    return $label . ($encoding ? "$entity" : "$decoded, decimal: $decimal, hexadecimal: $hex"),
           html => $label.$html.($encoding ? "" : ", decimal: $decimal, hexadecimal: <a href=\"/?q=U%2B$hex\">$hex</a>") unless $entity eq $decoded;
    return;
};

1;
Something went wrong with that request. Please try again.