Browse files

Tenjin now properly decodes template input and encodes template outpu…

…t according to the user's selected encoding; Default encoding now spelled UTF-8 instead of utf8; Added t/encoding.t to test template encoding
  • Loading branch information...
1 parent 6e0e39c commit 3f9dad8de0be0707cf9dc4ff49d969861aa88b0b @ido50 committed Mar 29, 2011
Showing with 89 additions and 23 deletions.
  1. +4 −0 Changes
  2. +26 −6 README
  3. +37 −13 lib/Tenjin.pm
  4. +6 −4 lib/Tenjin/Template.pm
  5. +14 −0 t/06-encoding.t
  6. +1 −0 t/data/encoding/chinese.html
  7. +1 −0 t/data/encoding/hebrew.html
View
4 Changes
@@ -1,6 +1,10 @@
Revision history for Perl extension Tenjin
{{$NEXT}}
+ - Tenjin now properly decodes template input and encodes template output
+ according to the user's selected encoding
+ - Default encoding now spelled UTF-8 instead of utf8
+ - Added t/encoding.t to test template encoding
0.062 2010-08-06 20:49:28 Asia/Jerusalem
- Tenjin now croaks instead of dies, and only inside the context
View
32 README
@@ -8,10 +8,10 @@ SYNOPSIS
# your templates. Recommended, but not used
# by default.
- $Tenjin::ENCODING = "utf8"; # set the encoding of your template files
- # to utf8. This is the default encoding used
+ $Tenjin::ENCODING = "UTF-8"; # set the encoding of your template files
+ # to UTF-8. This is the default encoding used
# so there's no need to do this if your
- # templates really are utf8.
+ # templates really are UTF-8.
my $engine = Tenjin->new(\%options);
my $context = { title => 'Tenjin Example', items => [qw/AAA BBB CCC/] };
@@ -40,7 +40,9 @@ DESCRIPTION
* Support for rendering templates from non-file sources (such as a
database) is added.
- * Ability to set the encoding of your templates is added.
+ * Ability to set the encoding of your templates is added (Tenjin will
+ decode template files according to this encoding; by default, Tenjin
+ will decode
* HTML is encoded and decoded using the HTML::Entities module, instead
of internally.
@@ -54,6 +56,24 @@ DESCRIPTION
I'll do my best). Please note that version 0.05 (and above) of this
module is NOT backwards compatible with previous versions.
+ A NOTE ABOUT ENCODING
+ When Tenjin opens template files, it will automatically decode their
+ contents according to the selected encoding (UTF-8 by default), so make
+ sure your template files are properly encoded. Tenjin also writes cache
+ files of compiled template structure. These will be automatically
+ encoded according to the selected encoding.
+
+ When it comes to UTF-8, it might interest you to know how Tenjin
+ behaves:
+
+ 1. "UTF-8" is the default encoding used. If for some reason, either
+ before running "Tenjin->new()" or during, you provide an alternate
+ spelling (such as "utf8" or "UTF8"), Tenjin will convert it to UTF-8.
+ 2. When reading files, Tenjin uses "<:encoding(UTF-8)", while when
+ writing files, Tenjin uses ">:utf8", as recommended by this article
+ <https://secure.wikimedia.org/wikibooks/en/w/index.php?title=Perl_Progra
+ mming/Unicode_UTF-8&oldid=2020796>.
+
METHODS
new( \%options )
This creates a new instant of Tenjin. "\%options" is a hash-ref
@@ -85,7 +105,7 @@ METHODS
(turned off by default).
* encoding - Another way to set the encoding of your template files
- (set to utf8 by default).
+ (set to "UTF-8" by default).
render( $tmpl_name, [\%_context, $use_layout] )
Renders a template whose name is identified by $tmpl_name. Remember that
@@ -290,7 +310,7 @@ SUPPORT
LICENSE AND COPYRIGHT
Tenjin is licensed under the MIT license.
- Copyright (c) 2007-2010 the aforementioned authors.
+ Copyright (c) 2007-2011 the aforementioned authors.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
View
50 lib/Tenjin.pm
@@ -10,11 +10,11 @@ use Tenjin::Context;
use Tenjin::Template;
use Tenjin::Preprocessor;
-our $VERSION = "0.070_001";
+our $VERSION = "0.070";
$VERSION = eval $VERSION;
our $USE_STRICT = 0;
-our $ENCODING = 'utf8';
+our $ENCODING = 'UTF-8';
our $BYPASS_TAINT = 1; # unset if you like taint mode
our $TEMPLATE_CLASS = 'Tenjin::Template';
our $CONTEXT_CLASS = 'Tenjin::Context';
@@ -33,10 +33,10 @@ Tenjin - Fast templating engine with support for embedded Perl.
# your templates. Recommended, but not used
# by default.
- $Tenjin::ENCODING = "utf8"; # set the encoding of your template files
- # to utf8. This is the default encoding used
+ $Tenjin::ENCODING = "UTF-8"; # set the encoding of your template files
+ # to UTF-8. This is the default encoding used
# so there's no need to do this if your
- # templates really are utf8.
+ # templates really are UTF-8.
my $engine = Tenjin->new(\%options);
my $context = { title => 'Tenjin Example', items => [qw/AAA BBB CCC/] };
@@ -67,7 +67,8 @@ module (i.e. this one).
=item * Support for rendering templates from non-file sources (such as
a database) is added.
-=item * Ability to set the encoding of your templates is added.
+=item * Ability to set the encoding of your templates is added (Tenjin will decode
+template files according to this encoding; by default, Tenjin will decode
=item * HTML is encoded and decoded using the L<HTML::Entities> module,
instead of internally.
@@ -83,6 +84,26 @@ with features and changes from the original), I cannot guarantee it (but I'll
do my best). Please note that version 0.05 (and above) of this module is
NOT backwards compatible with previous versions.
+=head2 A NOTE ABOUT ENCODING
+
+When Tenjin opens template files, it will automatically decode their contents
+according to the selected encoding (UTF-8 by default), so make sure your template
+files are properly encoded. Tenjin also writes cache files of compiled template
+structure. These will be automatically encoded according to the selected encoding.
+
+When it comes to UTF-8, it might interest you to know how Tenjin behaves:
+
+=over
+
+=item 1. "UTF-8" is the default encoding used. If for some reason, either before
+running C<< Tenjin->new() >> or during, you provide an alternate spelling (such
+as "utf8" or "UTF8"), Tenjin will convert it to UTF-8.
+
+=item 2. When reading files, Tenjin uses "<:encoding(UTF-8)", while when writing
+files, Tenjin uses ">:utf8", as recommended by L<this article|https://secure.wikimedia.org/wikibooks/en/w/index.php?title=Perl_Programming/Unicode_UTF-8&oldid=2020796>.
+
+=back
+
=head1 METHODS
=head2 new( \%options )
@@ -115,7 +136,7 @@ in the location where you use C<[== $_content ==]>.
=item * B<strict> - Another way to make Tenjin use strict on embedded Perl code (turned
off by default).
-=item * B<encoding> - Another way to set the encoding of your template files (set to utf8
+=item * B<encoding> - Another way to set the encoding of your template files (set to "UTF-8"
by default).
=back
@@ -135,12 +156,15 @@ sub new {
$self->{prefix} = '' unless $self->{prefix};
$self->{postfix} = '' unless $self->{postfix};
- if ($self->{encoding}) {
- $Tenjin::ENCODING = $self->{encoding};
- }
- if (defined $self->{strict}) {
- $Tenjin::USE_STRICT = $self->{strict};
- }
+ $Tenjin::ENCODING = $self->{encoding}
+ if $self->{encoding};
+
+ # if encoding is utf8, make sure it's spelled UTF-8 and not otherwise
+ $Tenjin::ENCODING = 'UTF-8'
+ if $Tenjin::ENCODING =~ m/^utf-?8$/i;
+
+ $Tenjin::USE_STRICT = $self->{strict}
+ if defined $self->{strict};
return bless $self, $class;
}
View
10 lib/Tenjin/Template.pm
@@ -488,8 +488,8 @@ file will be locked for reading.
sub _read_file {
my ($self, $filename, $lock_required) = @_;
- open(IN, $filename) or croak "[Tenjin] Can't open $filename for reading: $!";
- binmode(IN);
+ open(IN, "<:encoding($Tenjin::ENCODING)", $filename)
+ or croak "[Tenjin] Can't open $filename for reading: $!";
flock(IN, LOCK_SH) if $lock_required;
read(IN, my $content, -s $filename);
@@ -511,8 +511,10 @@ locked exclusively when writing.
sub _write_file {
my ($self, $filename, $content, $lock_required) = @_;
- open(OUT, ">$filename") or croak "[Tenjin] Can't open $filename for writing: $!";
- binmode(OUT);
+ my $enc = $Tenjin::ENCODING eq 'UTF-8' ? '>:utf8' : ">:encoding($Tenjin::ENCODING)";
+
+ open(OUT, $enc, $filename)
+ or croak "[Tenjin] Can't open $filename for writing: $!";
flock(OUT, LOCK_EX) if $lock_required;
print OUT $content;
close(OUT);
View
14 t/06-encoding.t
@@ -0,0 +1,14 @@
+#!perl -T
+
+use strict;
+use warnings;
+use Test::More tests => 3;
+use Tenjin;
+use utf8;
+
+my $t = Tenjin->new({ path => ['t/data/encoding'] });
+ok($t, 'Got a proper Tenjin instance');
+
+is($t->render('hebrew.html'), "<h1>ג'רי סיינפלד</h1>\n", 'UTF-8 (Hebrew) properly decoded');
+
+is($t->render('chinese.html'), "<a title=\"汉语/漢語\">Chinese</a>\n", 'UTF-8 (Chinese) properly decoded');
View
1 t/data/encoding/chinese.html
@@ -0,0 +1 @@
+<a title="汉语/漢語">Chinese</a>
View
1 t/data/encoding/hebrew.html
@@ -0,0 +1 @@
+<h1>ג'רי סיינפלד</h1>

0 comments on commit 3f9dad8

Please sign in to comment.