Permalink
Browse files

First version of utf8::all

Michael Schwern did the hard work; Mike Doherty split that from
perl5i::2 into it's own module.

See evalEmpire/perl5i#189
  • Loading branch information...
doherty committed Apr 20, 2011
1 parent 5b76e8c commit d279868c46c97e15ae1245e6ecd898d29c061015
Showing with 237 additions and 13 deletions.
  1. +1 −1 .gitignore
  2. +1 −1 CHANGES → Changes
  3. +36 −1 MANIFEST.SKIP
  4. +1 −0 corpus/testfile
  5. +4 −3 dist.ini
  6. +0 −7 lib/Unicode/All.pm
  7. +92 −0 lib/utf8/all.pm
  8. +11 −0 t/ARGV.t
  9. +16 −0 t/ARGV_twice.t
  10. +19 −0 t/lexical.t
  11. +15 −0 t/open.t
  12. +41 −0 t/utf8.t
View
@@ -1,2 +1,2 @@
-Unicode-All*
+utf8-all*
.build
View
@@ -1,4 +1,4 @@
Revision history for Perl module {{$dist->name}}
{{$NEXT}}
- *
+ * Split code out of perl5i
View
@@ -1,4 +1,39 @@
-#!include_default
+
+#!start included /home/mike/perl5/perlbrew/perls/perl-5.10.1/lib/5.10.1/ExtUtils/MANIFEST.SKIP
+# Avoid version control files.
+\bRCS\b
+\bCVS\b
+\bSCCS\b
+,v$
+\B\.svn\b
+\B\.git\b
+\B\.gitignore\b
+\b_darcs\b
+
+# Avoid Makemaker generated and utility files.
+\bMANIFEST\.bak
+\bMakefile$
+\bblib/
+\bMakeMaker-\d
+\bpm_to_blib\.ts$
+\bpm_to_blib$
+\bblibdirs\.ts$ # 6.18 through 6.25 generated this
+
+# Avoid Module::Build generated and utility files.
+\bBuild$
+\b_build/
+
+# Avoid temp and backup files.
+~$
+\.old$
+\#$
+\b\.#
+\.bak$
+
+# Avoid Devel::Cover files.
+\bcover_db\b
+#!end included /home/mike/perl5/perlbrew/perls/perl-5.10.1/lib/5.10.1/ExtUtils/MANIFEST.SKIP
+
# Specific to this project
^dist\.ini$
View
@@ -0,0 +1 @@
+føø
View
@@ -1,7 +1,8 @@
-name = Unicode-All
+name = utf8-all
+author = Michael Schwern <mschwern@cpan.org>
author = Mike Doherty <doherty@cpan.org>
license = Perl_5
-copyright_holder = Mike Doherty
-copyright_year = 2011
+copyright_holder = Michael Schwern <mschwern@cpan.org> ; he wrote it, not me
+copyright_year = 2009
[@Author::DOHERTY]
View
@@ -1,7 +0,0 @@
-package Unicode::All;
-use strict;
-use warnings;
-# ABSTRACT:
-# VERSION
-
-1;
View
@@ -0,0 +1,92 @@
+package utf8::all;
+use strict;
+use warnings;
+use 5.010; # state
+# ABSTRACT: turn on unicode. All of it.
+# VERSION
+
+=head1 SYNOPSIS
+
+ use utf8::all; # Turn on UTF-8. All of it.
+
+ open my $in, '<', 'contains-utf8'; # UTF-8 already turned on here
+ print length 'føø bār'; # 7 UTF-8 characters
+ my $utf8_arg = shift @ARGV; # @ARGV is UTF-8 too!
+
+=head1 DESCRIPTION
+
+L<utf8> allows you to write your Perl encoded in UTF-8. That means UTF-8
+strings, variable names, and regular expressions. C<utf8::all> goes further, and
+makes C<@ARGV> encoded in UTF-8, and filehandles are opened with UTF-8 encoding
+turned on by default (including STDIN, STDOUT, STDERR). If you I<don't> want
+UTF-8 for a particular filehandle, you'll have to set C<binmode $filehandle>.
+
+The pragma is lexically-scoped, so you can do the following if you had some
+reason to:
+
+ {
+ use utf8::all;
+ open my $out, '>', 'outfile';
+ my $utf8_str = 'føø bār';
+ print length $utf8_str, "\n"; # 7
+ print $out $utf8_str; # out as utf8
+ }
+ open my $in, '<', 'outfile'; # in as raw
+ my $text = do { local $/; <$in>};
+ print length $text, "\n"; # 10, not 7!
+
+=cut
+
+use Encode ();
+use parent 'utf8';
+use parent 'open';
+
+sub import {
+ my $class = shift;
+
+ $^H{'utf8::all'} = 1; # Is that allowed?
+
+ # utf8 source code
+ utf8::import($class);
+
+ # utf8 by default on filehandles
+ open::import($class, ":encoding(UTF-8)");
+ open::import($class, ":std");
+ {
+ no strict 'refs'; ## no critic (TestingAndDebugging::ProhibitNoStrict)
+ *{$class . '::open'} = \&utf8_open;
+ }
+
+ #utf8 in @ARGV
+ state $have_encoded_argv = 0;
+ _encode_argv() unless $have_encoded_argv++;
+
+}
+
+sub utf8_open(*;$@) { ## no critic (Subroutines::ProhibitSubroutinePrototypes)
+ my $ret;
+ if( @_ == 1 ) {
+ $ret = CORE::open $_[0];
+ }
+ else {
+ $ret = CORE::open $_[0], $_[1], @_[2..$#_];
+ }
+
+ # Don't try to binmode an unopened filehandle
+ return $ret unless $ret;
+
+ my $h = (caller 1)[10];
+ binmode $_[0], ":encoding(UTF-8)" if $h->{'utf8::all'};
+ return $ret;
+}
+
+sub _encode_argv {
+ $_ = Encode::decode('UTF-8', $_) for @ARGV;
+ return;
+}
+
+=for Pod::Coverage
+utf8_open
+=cut
+
+1;
View
@@ -0,0 +1,11 @@
+#!perl
+# Test that utf8::all makes @ARGV utf8
+
+BEGIN {
+ @ARGV = qw(føø bar bāz);
+}
+
+use utf8::all;
+use Test::More tests => 1;
+
+is_deeply \@ARGV, [qw(føø bar bāz)];
View
@@ -0,0 +1,16 @@
+#!perl
+# Test that utf8::all doesn't double encode @ARGV [perl5i github 176]
+
+BEGIN {
+ @ARGV = qw(føø bar bāz);
+}
+
+{
+ package Foo;
+ use utf8::all;
+}
+
+use utf8::all;
+use Test::More tests => 1;
+
+is_deeply \@ARGV, [qw(føø bar bāz)];
View
@@ -0,0 +1,19 @@
+#!perl
+# utf8::all should have lexical effect
+
+use Test::More tests => 2;
+
+BEGIN {
+ @ARGV = qw(føø bar bāz);
+}
+
+# use utf8::all in a narrow lexical scope.
+# It shouldn't effect the rest of the program.
+{ use utf8::all }
+
+is_deeply \@ARGV, ["f\x{f8}\x{f8}", 'bar', "b\x{101}z"] or diag explain \@ARGV;
+
+{ # Bring utf8::all back into effect
+ use utf8::all;
+ is_deeply \@ARGV, [qw(føø bar bāz)];
+}
View
@@ -0,0 +1,15 @@
+#!perl
+# Test opening an actual file
+use utf8::all;
+use PerlIO;
+use Test::More tests => 4;
+
+ok open my $in, '<', 'corpus/testfile';
+my @layers = PerlIO::get_layers($in);
+ok(grep(m/utf8/, @layers), 'utf8 appears in the perlio layers')
+ or diag explain { $fh => \@layers };
+ok(grep(m/utf-8-strict/, @layers), 'utf-8-strict appears in the perlio layers')
+ or diag explain { $fh => \@layers };
+
+my $contents = do { local $/; <$in>};
+is $contents, "f\x{f8}\x{f8}\n", 'unicode retrieved OK';
View
@@ -0,0 +1,41 @@
+#!perl
+# utf8::all turns on utf8
+use strict;
+use warnings;
+
+use PerlIO;
+use Test::More;
+
+# Test with it on
+{
+ use utf8::all;
+
+ is length "utf8::all is MËTÁŁ", 18;
+
+ # Test the standard handles and all newly opened handles are utf8
+ ok open my $test_fh, ">", "perlio_test";
+ END { unlink "perlio_test" }
+ for my $fh (*STDOUT, *STDIN, *STDERR, $test_fh) {
+ my @layers = PerlIO::get_layers($fh);
+ ok(grep(m/utf8/, @layers), 'utf8 appears in the perlio layers')
+ or diag explain { $test_fh => \@layers };
+ ok(grep(m/utf-8-strict/, @layers), 'utf-8-strict appears in the perlio layers')
+ or diag explain { $test_fh => \@layers };
+ }
+}
+
+
+# And off
+{
+ ok open my $test_fh, ">", "perlio_test2";
+ END { unlink "perlio_test2" }
+
+ my @layers = PerlIO::get_layers($test_fh);
+ ok( !grep(/utf8/, @layers), q{utf8 doesn't appear in perlio layers})
+ or diag explain { $test_fh => \@layers };
+ ok( !grep(m/utf-8-strict/, @layers), q{utf-8-strict doesn't appear in the perlio layers})
+ or diag explain { $test_fh => \@layers };
+
+}
+
+done_testing;

0 comments on commit d279868

Please sign in to comment.