Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
213 lines (186 sloc) 5.72 KB
use 5.014; # strict, Unicode 6, unicode regexp modifiers
BEGIN {
require charnames;
if ($^V gt v5.16.0) {
charnames->import(':loose');
} else {
print "unicode.pl: Loose unicode matching not supported on this version of Perl.";
print "Upgrade to 5.16 or newer for case-insensitive names.";
charnames->import(':full');
}
}
use Encode qw(decode_utf8 encode_utf8);
use POSIX ();
use Unicode::UCD qw(charblock charblocks charinfo);
use Irssi qw(command_bind command_bind_first);
our $VERSION = "2";
our %IRSSI = (
authors => 'David Leadbeater',
contact => 'dgl@dgl.cx',
name => 'unicode',
description => 'Get infomation about unicode characters',
license => 'WTFPL <http://dgl.cx/licence>',
url => 'http://dgl.cx/irssi',
);
my $CHARCODE_RE = qr/(?:\d+|(?:U\+|0x)[0-9a-f]+)/ai;
my $pipe_in_progress;
my $USAGE = <<'EOF';
/UNICODE <character | code | name | block name>
Print details about Unicode characters or blocks.
Details about a single character:
/unicode 😸
/unicode U+1F626
Print details about a block (more concise):
/unicode Emoticons
Print details about a range:
/unicode U+1F600..U+1F700
Find a character:
/unicode /\bcat\b/
EOF
if (Irssi::settings_get_str('term_charset') !~ /utf-8/i) {
print "\x{3}4unicode.pl\x{3}: term_charset is not set to UTF-8. ",
"Please set your terminal and Irssi to use UTF-8 so this script works correctly.";
print "Current settings:";
print " Irssi term_charset = ", Irssi::settings_get_str('term_charset');
print " $_ = $ENV{$_}" for grep /^(?:LANG|LC_|TERM$)/, keys %ENV;
}
# TODO: Can we fix Irssi to not need encoding here?
sub p { Irssi::active_win()->print(encode_utf8("@_"), MSGLEVEL_CLIENTCRAP) }
command_bind_first help => sub {
my($arg) = @_;
return unless $arg =~ /^unicode\s*$/i;
print $USAGE;
print "[Perl internal unicode version " . Unicode::UCD::UnicodeVersion() . "]";
Irssi::signal_stop();
};
command_bind unicode => sub {
my($arg) = @_;
if(!$arg) {
print "Usage: /UNICODE <character | code | name | block name>";
print "See /help unicode for more.";
return;
}
# Decode is always required right now, but really irssi core should handle
# this so written in a future proof way.
$arg = decode_utf8 $arg unless Encode::is_utf8($arg, 1);
if (length $arg == 1) {
# Single character
print_info(ord $arg, 1);
} elsif ($arg =~ /^$CHARCODE_RE\s*$/) {
# Character code (decimal or hex)
print_info($arg, 1);
} elsif ($arg =~ /^($CHARCODE_RE)\s*\.\.\s*($CHARCODE_RE)\s*$/) {
# Character range
my($start, $end) = (charinfo($1), charinfo($2));
print_info($_) for hex $start->{code} .. hex $end->{code};
} elsif ($arg =~ m{/(.*)/\s*$}) {
my $re = qr/$1/i;
if ($pipe_in_progress) {
p "Another unicode search is in progress";
return;
}
fork_wrapper(sub { # Child
my($fh) = @_;
my @found;
my $data = "";
# This is not a public API at all, but taking 2 minutes when using the
# public API is a bit of a joke, so we take advantage of perl's cache if
# we can.
$data = do "unicore/Name.pl";
if (!$data) {
for my $block(map { $_->[0] } values %{charblocks()}) {
for($block->[0] .. $block->[1]) {
my $name = charnames::viacode($_);
next unless $name;
$data .= sprintf "%X %s\n", $_, $name;
}
}
}
while ($data =~ /(?:^([A-F0-9]+).*$re)/gm) {
push @found, $1;
}
if(@found > 100) {
syswrite $fh, "- More than 100 matches found, aborting";
} else {
syswrite $fh, "@found";
}
},
sub { # Parent
my($line) = @_;
if ($line =~ /^- (.*)/) {
p $1;
} elsif (!$line) {
p "No matches found";
} else {
print_info($_) for sort { hex $a <=> hex $b } split / /, $line;
}
});
} else {
# Character (or named sequence) or block name
my $string = charnames::string_vianame($arg);
if ($string) {
# Character(s) found
for my $char(split //, $string) {
print_info(ord $char);
}
} elsif(charblock $arg) {
my $block = charblock($arg);
print_info($_) for $block->[0]->[0] .. $block->[0]->[1];
} else {
p "Not found. Try for example /unicode /\\bcat\\b/ for partial matching.";
}
}
};
sub print_info {
my($character, $extra) = @_;
my $info = charinfo $character;
if (!$info) {
p "Character not found" if $extra;
} else {
p chr(hex $info->{code}) . " (U+$info->{code}): $info->{name}";
return unless $extra;
my %extra;
for(qw(block category script)) {
$extra{$_} = $info->{$_}
}
# Optional things
for(qw(decimal digit numeric upper lower title)) {
$extra{$_} = $info->{$_} if $info->{$_};
}
$extra{"utf-8 (hex)"} = join "", map sprintf("\\x%02x", ord), split //, encode_utf8 chr(hex $info->{code});
p " " x (7 + length $info->{code}), join(", ", map { "$_=$extra{$_}" } sort keys %extra);
}
}
# Based on scriptassist.
sub fork_wrapper {
my($child, $parent) = @_;
pipe(my $rfh, my $wfh);
my $pid = fork;
$pipe_in_progress = 1;
return unless defined $pid;
if($pid) {
close $wfh;
Irssi::pidwait_add($pid);
my $pipetag;
my @args = ($rfh, \$pipetag, $parent);
$pipetag = Irssi::input_add(fileno($rfh), INPUT_READ, \&pipe_input, \@args);
} else {
eval {
$child->($wfh);
};
syswrite $wfh, "- $@" if $@;
POSIX::_exit(1);
}
}
sub pipe_input {
my ($rfh, $pipetag, $parent) = @{$_[0]};
my $line = <$rfh>;
close($rfh);
Irssi::input_remove($$pipetag);
$pipe_in_progress = 0;
$parent->($line);
}
command_bind charblocks => sub {
my @blocks = sort keys %{charblocks()};
print for @blocks;
}