Skip to content

Commit

Permalink
Merge pull request #2116 from kareila/2112-unicode
Browse files Browse the repository at this point in the history
[#2112] remove $LJ::UNICODE config parameter
  • Loading branch information
zorkian committed Apr 15, 2017
2 parents 817c733 + 72d135f commit 0d1f340
Show file tree
Hide file tree
Showing 23 changed files with 48 additions and 99 deletions.
2 changes: 1 addition & 1 deletion cgi-bin/Apache/LiveJournal.pm
Expand Up @@ -1352,7 +1352,7 @@ sub journal_content
my $status = $opts->{'status'} || "200 OK";
$opts->{'contenttype'} ||= $opts->{'contenttype'} = "text/html";
if ($opts->{'contenttype'} =~ m!^text/! &&
$LJ::UNICODE && $opts->{'contenttype'} !~ /charset=/) {
$opts->{'contenttype'} !~ /charset=/) {
$opts->{'contenttype'} .= "; charset=utf-8";
}

Expand Down
2 changes: 1 addition & 1 deletion cgi-bin/DW/Controller/RPC/CutExpander.pm
Expand Up @@ -84,7 +84,7 @@ sub load_cuttext {

#load and prepare text of entry
my $text = LJ::CleanHTML::quote_html( $entry_obj->event_raw, $get->{nohtml} );
LJ::item_toutf8( $journal, \$subject, \$text ) if $LJ::UNICODE && $entry_obj->props->{unknown8bit};
LJ::item_toutf8( $journal, \$subject, \$text ) if $entry_obj->props->{unknown8bit};

my $suspend_msg = $entry_obj && $entry_obj->should_show_suspend_msg_to( $remote ) ? 1 : 0;
my $cleanhtml_opts = { cuturl => $entry_obj->url,
Expand Down
9 changes: 2 additions & 7 deletions cgi-bin/LJ/CleanHTML.pm
Expand Up @@ -62,16 +62,11 @@ sub helper_preload


# this treats normal characters and &entities; as single characters
# also treats UTF-8 chars as single characters if $LJ::UNICODE
# also treats UTF-8 chars as single characters
my $onechar;
{
my $utf_longchar = '[\xc2-\xdf][\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xef][\x80-\xbf][\x80-\xbf]|\xf0[\x90-\xbf][\x80-\xbf][\x80-\xbf]|[\xf1-\xf7][\x80-\xbf][\x80-\xbf][\x80-\xbf]';
my $match;
if (not $LJ::UNICODE) {
$match = '[^&\s]|(&\#?\w{1,7};)';
} else {
$match = $utf_longchar . '|[^&\s\x80-\xff]|(?:&\#?\w{1,7};)';
}
my $match = $utf_longchar . '|[^&\s\x80-\xff]|(?:&\#?\w{1,7};)';
$onechar = qr/$match/o;
}

Expand Down
3 changes: 1 addition & 2 deletions cgi-bin/LJ/Entry.pm
Expand Up @@ -1513,7 +1513,7 @@ sub get_posts


while (my ($id, $rp) = each %$rawposts) {
if ($LJ::UNICODE && $rp->{props}{unknown8bit}) {
if ( $rp->{props}{unknown8bit} ) {
#LJ::item_toutf8($u, \$rp->{text}[0], \$rp->{text}[1], $rp->{props});
}
}
Expand Down Expand Up @@ -2611,7 +2611,6 @@ sub expand_embedded {
sub item_toutf8
{
my ($u, $subject, $text, $props) = @_;
return unless $LJ::UNICODE;
$props ||= {};

my $convert = sub {
Expand Down
2 changes: 1 addition & 1 deletion cgi-bin/LJ/Feed.pm
Expand Up @@ -193,7 +193,7 @@ sub make_feed
next ENTRY if $posteru{$it->{'posterid'}} && $posteru{$it->{'posterid'}}->is_suspended;
next ENTRY if $entry_obj && $entry_obj->is_suspended_for($remote);

if ($LJ::UNICODE && $logprops{$itemid}->{'unknown8bit'}) {
if ( $logprops{$itemid}->{'unknown8bit'} ) {
LJ::item_toutf8($u, \$logtext->{$itemid}->[0],
\$logtext->{$itemid}->[1], $logprops{$itemid});
}
Expand Down
4 changes: 1 addition & 3 deletions cgi-bin/LJ/Global/BMLInit.pm
Expand Up @@ -57,9 +57,7 @@ BML::register_hook("codeerror", sub {
}
});

if ($LJ::UNICODE) {
BML::set_config("DefaultContentType", "text/html; charset=utf-8");
}
BML::set_config("DefaultContentType", "text/html; charset=utf-8");

# register BML multi-language hook
BML::register_hook("ml_getter", \&LJ::Lang::get_text);
Expand Down
4 changes: 2 additions & 2 deletions cgi-bin/LJ/Global/Constants.pm
Expand Up @@ -28,8 +28,8 @@ use constant MAX_32BIT_SIGNED => 2147483647;
$LJ::MAX_32BIT_SIGNED = 2147483647;

# width constants. BMAX_ constants are restrictions on byte width,
# CMAX_ on character width (character means byte unless $LJ::UNICODE,
# in which case it means a UTF-8 character).
# CMAX_ on character width (character used to mean byte, but now
# it means a UTF-8 character).

use constant BMAX_SUBJECT => 255; # *_SUBJECT for journal events, not comments
use constant CMAX_SUBJECT => 100;
Expand Down
2 changes: 0 additions & 2 deletions cgi-bin/LJ/Global/Defaults.pm
Expand Up @@ -47,8 +47,6 @@ no strict "vars";

$SERVER_NAME ||= Sys::Hostname::hostname();

$UNICODE = 1 unless defined $UNICODE;

@LANGS = ("en") unless @LANGS;
$DEFAULT_LANG ||= $LANGS[0];

Expand Down
15 changes: 3 additions & 12 deletions cgi-bin/LJ/Protocol.pm
Expand Up @@ -578,12 +578,6 @@ sub login
my $res = {};
my $ver = $req->{'ver'};

## check for version mismatches
## non-Unicode installations can't handle versions >=1

return fail($err,207, "This installation does not support Unicode clients")
if $ver>=1 and not $LJ::UNICODE;

# do not let locked people log in
return fail($err, 308) if $u->is_locked;

Expand Down Expand Up @@ -1045,8 +1039,6 @@ sub common_event_validation
my $uowner = $flags->{u_owner} || $flags->{u};
return fail($err,207,'Posting in a community with international or special characters require a Unicode-capable LiveJournal client. Download one at http://www.livejournal.com/download/.')
if ! $uowner->is_person;
} else {
return fail($err,207, "This installation does not support Unicode clients") unless $LJ::UNICODE;
}

# validate that the text is valid UTF-8
Expand Down Expand Up @@ -2455,9 +2447,8 @@ sub getevents
}

# load properties. Even if the caller doesn't want them, we need
# them in Unicode installations to recognize older 8bit non-UF-8
# them in Unicode installations to recognize older 8bit non-UTF-8
# entries.
unless ($req->{'noprops'} && !$LJ::UNICODE)
{
### do the properties now
$count = 0;
Expand Down Expand Up @@ -2520,12 +2511,12 @@ sub getevents

# now that we have the subject, the event and the props,
# auto-translate them to UTF-8 if they're not in UTF-8.
if ($LJ::UNICODE && $req->{ver} >= 1 && $evt->{props}->{unknown8bit}) {
if ( $req->{ver} >= 1 && $evt->{props}->{unknown8bit} ) {
LJ::item_toutf8($uowner, \$t->[0], \$t->[1], $evt->{props});
$evt->{converted_with_loss} = 1;
}

if ($LJ::UNICODE && $req->{'ver'} < 1 && !$evt->{'props'}->{'unknown8bit'}) {
if ( $req->{'ver'} < 1 && !$evt->{'props'}->{'unknown8bit'} ) {
unless ( LJ::is_ascii($t->[0]) &&
LJ::is_ascii($t->[1]) &&
LJ::is_ascii(join(' ', values %{$evt->{'props'}}) )) {
Expand Down
4 changes: 2 additions & 2 deletions cgi-bin/LJ/S2.pm
Expand Up @@ -2133,7 +2133,7 @@ sub Entry_from_entryobj
#load and prepare subject and text of entry
my $subject = LJ::CleanHTML::quote_html( $entry_obj->subject_html, $get->{nohtml} );
my $text = $no_entry_body ? "" : LJ::CleanHTML::quote_html( $entry_obj->event_raw, $get->{nohtml} );
LJ::item_toutf8( $journal, \$subject, \$text, $entry_obj->props ) if $LJ::UNICODE && $entry_obj->props->{unknown8bit};
LJ::item_toutf8( $journal, \$subject, \$text, $entry_obj->props ) if $entry_obj->props->{unknown8bit};

my $suspend_msg = $entry_obj && $entry_obj->should_show_suspend_msg_to( $remote ) ? 1 : 0;

Expand Down Expand Up @@ -2356,7 +2356,7 @@ sub Page
include_meta_viewport => $r->cookie( 'no_mobile' ) ? 0 : 1,
};

if ($LJ::UNICODE && $opts && $opts->{'saycharset'}) {
if ( $opts && $opts->{'saycharset'} ) {
$p->{'head_content'} .= '<meta http-equiv="Content-Type" content="text/html; charset=' . $opts->{'saycharset'} . "\" />\n";
}

Expand Down
5 changes: 2 additions & 3 deletions cgi-bin/LJ/S2/EntryPage.pm
Expand Up @@ -62,9 +62,8 @@ sub EntryPage
if ($u->should_block_robots || $entry->should_block_robots) {
$p->{'head_content'} .= LJ::robot_meta_tags();
}
if ($LJ::UNICODE) {
$p->{'head_content'} .= '<meta http-equiv="Content-Type" content="text/html; charset='.$opts->{'saycharset'}."\" />\n";
}

$p->{'head_content'} .= '<meta http-equiv="Content-Type" content="text/html; charset='.$opts->{'saycharset'}."\" />\n";

my $prev_url = S2::Builtin::LJ::Entry__get_link( $opts->{ctx}, $s2entry, "nav_prev" )->{url};
$p->{head_content} .= qq{<link rel="prev" href="$prev_url" />\n} if $prev_url;
Expand Down
6 changes: 3 additions & 3 deletions cgi-bin/LJ/S2/ReplyPage.pm
Expand Up @@ -170,15 +170,15 @@ sub ReplyPage
LJ::load_talk_props2($u, [ $re_talkid ])->{$re_talkid} || {};
$parpost->{'dtid'} = $dtalkid;

if($LJ::UNICODE && $parpost->{'props'}->{'unknown8bit'}) {
if( $parpost->{'props'}->{'unknown8bit'} ) {
LJ::item_toutf8($u, \$parpost->{'subject'}, \$parpost->{'body'}, {});
}

my $datetime = DateTime_unix(LJ::mysqldate_to_time($parpost->{'datepost'}));

my $comment_userpic;
my $s2poster;

my $pu = $parentcomment->poster;
if ( $pu ) {
return $opts->{handler_return} = 403 if $pu->is_suspended; # do not show comments by suspended users
Expand Down
5 changes: 2 additions & 3 deletions cgi-bin/LJ/S2/YearPage.pm
Expand Up @@ -30,9 +30,8 @@ sub YearPage
if ($u->should_block_robots) {
$p->{'head_content'} .= LJ::robot_meta_tags();
}
if ($LJ::UNICODE) {
$p->{'head_content'} .= '<meta http-equiv="Content-Type" content="text/html; charset='.$opts->{'saycharset'}."\" />\n";
}

$p->{'head_content'} .= '<meta http-equiv="Content-Type" content="text/html; charset='.$opts->{'saycharset'}."\" />\n";

my $get = $opts->{'getargs'};

Expand Down
27 changes: 8 additions & 19 deletions cgi-bin/LJ/Talk.pm
Expand Up @@ -258,7 +258,7 @@ sub get_journal_item
LJ::load_log_props2($u->{'userid'}, [ $itemid ], \%logprops);
$item->{'props'} = $logprops{$itemid} || {};

if ($LJ::UNICODE && $logprops{$itemid}->{'unknown8bit'}) {
if ( $logprops{$itemid}->{'unknown8bit'} ) {
LJ::item_toutf8($u, \$item->{'subject'}, \$item->{'event'},
$item->{'logprops'}->{$itemid});
}
Expand Down Expand Up @@ -1233,14 +1233,12 @@ sub load_comments
}
}

if ($LJ::UNICODE) {
foreach (@posts_to_load) {
if ($posts->{$_}->{'props'}->{'unknown8bit'}) {
LJ::item_toutf8($u, \$posts->{$_}->{'subject'},
\$posts->{$_}->{'body'},
{});
}
}
foreach (@posts_to_load) {
if ($posts->{$_}->{'props'}->{'unknown8bit'}) {
LJ::item_toutf8($u, \$posts->{$_}->{'subject'},
\$posts->{$_}->{'body'},
{});
}
}

# load users who posted
Expand Down Expand Up @@ -2129,7 +2127,7 @@ sub icon_dropdown {
my %res;
if ( $remote ) {
LJ::do_request({ mode => "login",
ver => ($LJ::UNICODE ? "1" : "0"),
ver => $LJ::PROTOCOL_VER,
user => $remote->{'user'},
getpickws => 1,
}, \%res, { "noauth" => 1, "userid" => $remote->{'userid'} });
Expand Down Expand Up @@ -3296,15 +3294,6 @@ sub init {
return $err->("<?badinput?>") unless LJ::text_in($form);

$init->{unknown8bit} = 0;
unless (LJ::is_ascii($form->{'body'}) && LJ::is_ascii($form->{'subject'})) {
if ($LJ::UNICODE) {
# no need to check if they're well-formed, we did that above
} else {
# so rest of site can change chars to ? marks until
# default user's encoding is set. (legacy support)
$init->{unknown8bit} = 1;
}
}

my ($bl, $cl) = LJ::text_length($form->{'body'});
if ($cl > LJ::CMAX_COMMENT) {
Expand Down
21 changes: 4 additions & 17 deletions cgi-bin/LJ/TextUtil.pm
Expand Up @@ -405,9 +405,6 @@ sub text_out
{
my $rtext = shift;

# if we're not Unicode, do nothing
return unless $LJ::UNICODE;

# is this valid UTF-8 already?
return if LJ::is_utf8($$rtext);

Expand All @@ -427,7 +424,7 @@ sub text_out
sub text_in
{
my $text = shift;
return 1 unless $LJ::UNICODE;

if (ref ($text) eq "HASH") {
return ! (grep { !LJ::is_utf8($_) } values %{$text});
}
Expand Down Expand Up @@ -478,9 +475,8 @@ sub text_convert {

# <LJFUNC>
# name: LJ::text_length
# des: returns both byte length and character length of a string. In a non-Unicode
# environment, this means byte length twice. In a Unicode environment,
# the function assumes that its argument is a valid UTF-8 string.
# des: returns both byte length and character length of a string.
# The function assumes that its argument is a valid UTF-8 string.
# args: text
# des-text: the string to measure
# returns: a list of two values, (byte_length, char_length).
Expand All @@ -490,9 +486,6 @@ sub text_length
{
my $text = shift;
my $bl = length($text);
unless ($LJ::UNICODE) {
return ($bl, $bl);
}
my $cl = 0;
my $utf_char = "([\x00-\x7f]|[\xc0-\xdf].|[\xe0-\xef]..|[\xf0-\xf7]...)";

Expand All @@ -503,8 +496,7 @@ sub text_length
# <LJFUNC>
# name: LJ::text_trim
# des: truncate string according to requirements on byte length, char
# length, or both. "char length" means number of UTF-8 characters if
# [ljconfig[unicode]] is set, or the same thing as byte length otherwise.
# length, or both. "char length" means number of UTF-8 characters.
# args: text, byte_max, char_max
# des-text: the string to trim
# des-byte_max: maximum allowed length in bytes; if 0, there's no restriction
Expand All @@ -517,11 +509,6 @@ sub text_trim
$text = defined $text ? LJ::trim( $text ) : '';
return $text unless $byte_max or $char_max;

if (!$LJ::UNICODE) {
$byte_max = $char_max if $char_max and $char_max < $byte_max;
$byte_max = $char_max unless $byte_max;
return LJ::trim( substr( $text, 0, $byte_max ) );
}
my $cur = 0;
my $utf_char = "([\x00-\x7f]|[\xc0-\xdf].|[\xe0-\xef]..|[\xf0-\xf7]...)";

Expand Down
2 changes: 1 addition & 1 deletion cgi-bin/LJ/Web.pm
Expand Up @@ -866,7 +866,7 @@ sub create_qr_div {
{
my %res;
LJ::do_request({ mode => "login",
ver => ($LJ::UNICODE ? "1" : "0"),
ver => $LJ::PROTOCOL_VER,
user => $remote->user,
getpickws => 1,
getpickwurls => 1,
Expand Down
6 changes: 1 addition & 5 deletions cgi-bin/lj-bml-blocks.pl
Expand Up @@ -57,11 +57,7 @@
BML::register_block("DL", "DR", $dl);
}

if ($LJ::UNICODE) {
BML::register_block("METACTYPE", "S", '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">')
} else {
BML::register_block("METACTYPE", "S", '<meta http-equiv="Content-Type" content="text/html">')
}
BML::register_block("METACTYPE", "S", '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">')


1;
6 changes: 4 additions & 2 deletions cgi-bin/ljlib.pl
Expand Up @@ -149,8 +149,10 @@ sub Unicode::MapUTF8::AUTOLOAD {
LJ::MemCache::init();

# $LJ::PROTOCOL_VER is the version of the client-server protocol
# used uniformly by server code which uses the protocol.
$LJ::PROTOCOL_VER = ($LJ::UNICODE ? "1" : "0");
# used uniformly by server code which uses the protocol. We used
# to set this to "0" if $LJ::UNICODE was false, but now we assume
# we always want to use Unicode.
$LJ::PROTOCOL_VER = "1";

# declare views for user journals
%LJ::viewinfo = (
Expand Down
3 changes: 0 additions & 3 deletions etc/config.pl
Expand Up @@ -221,9 +221,6 @@
@LANGS = qw( en_DW ) if -d "$HOME/ext/dw-nonfree";
}

# support unicode (posts in multiple languages)? leave enabled.
$UNICODE = 1;


###
### Database Configuration
Expand Down
10 changes: 5 additions & 5 deletions htdocs/export_do.bml
Expand Up @@ -43,10 +43,10 @@ _c?>
}

$encoding ||= $POST{'encoding'};
$encoding ||= $LJ::UNICODE ? 'utf-8' : 'iso-8859-1';
$encoding ||= 'utf-8';

if ($LJ::UNICODE && lc($encoding) ne "utf-8" &&
! Unicode::MapUTF8::utf8_supported_charset($encoding)) {
if ( lc($encoding) ne "utf-8" &&
! Unicode::MapUTF8::utf8_supported_charset($encoding) ) {
push @errors, $ML{'.error.encoding'};
}

Expand Down Expand Up @@ -137,7 +137,7 @@ _c?>
my $eprops = $props{$e->{'ritemid'}};

# convert to UTF-8 if necessary
if ($LJ::UNICODE && $eprops->{'unknown8bit'} && !$opts->{'notranslation'}) {
if ( $eprops->{'unknown8bit'} && !$opts->{'notranslation'} ) {
my $error;
$e->{'subject'} = LJ::text_convert($e->{'subject'}, $u, \$error);
$e->{'event'} = LJ::text_convert($e->{'event'}, $u, \$error);
Expand All @@ -161,7 +161,7 @@ _c?>
# Unicode environment. In a pre-Unicode environment the chosen encoding
# is merely a label.

if ($LJ::UNICODE && lc($opts->{'encoding'}) ne 'utf-8' && !$opts->{'notranslation'}) {
if ( lc($opts->{'encoding'}) ne 'utf-8' && !$opts->{'notranslation'} ) {
$entry = Unicode::MapUTF8::from_utf8({-string=>$entry,
-charset=>$opts->{'encoding'}});
}
Expand Down

0 comments on commit 0d1f340

Please sign in to comment.