Skip to content

Commit

Permalink
Issue #938: reimplemented NativeIDN, added IDNTest.
Browse files Browse the repository at this point in the history
	Change on 2018/02/05 by tball <tball@google.com>

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=184560256
  • Loading branch information
tomball committed Feb 5, 2018
1 parent 18223f1 commit 0fd97e1
Show file tree
Hide file tree
Showing 3 changed files with 191 additions and 6 deletions.
Expand Up @@ -16,6 +16,8 @@

package libcore.icu;

import java.util.regex.Pattern;

/**
* Java version of Android's NativeIDN class, rewritten for J2ObjC because the
* Android version uses native code that depends on ICU's uidna functions, which
Expand All @@ -36,12 +38,40 @@ public final class NativeIDN {
private static final int INITIAL_N = 128;
private static final char DELIMITER = '-';

// RFC 3490 separator characters.
private static final Pattern SEPARATORS_REGEX = Pattern.compile("[.\u3002\uFF0E\uFF61]");

private static final String PUNYCODE_PREFIX = "xn--";

/**
* Convert a Unicode string to Punycode/ASCII. The flags parameter is
* ignored; it's used by the ICU functions, but the spec doesn't describe
* a need for them.
* Convert a Unicode string to IDN.
*/
public static String toASCII(String s, int flags) {
String[] parts = SEPARATORS_REGEX.split(s);
for (int i = 0; i < parts.length; i++) {
if (nonASCII(parts[i])) {
parts[i] = PUNYCODE_PREFIX + encode(parts[i]);
}
}
return String.join(".", parts);
}

/**
* Returns true if a string contains any non-ASCII characters.
*/
private static boolean nonASCII(String s) {
for (int i = 0; i < s.length(); i++) {
if (s.charAt(i) > 0x7F) {
return true;
}
}
return false;
}

/**
* Encodes a Unicode string to Punycode ASCII.
*/
private static String encode(String s) {
int n = INITIAL_N;
int delta = 0;
int bias = INITIAL_BIAS;
Expand Down Expand Up @@ -114,11 +144,23 @@ public static String toASCII(String s, int flags) {
}

/**
* Convert a Punycode/ASCII string to Unicode. The flags parameter is ignored;
* it's used by the ICU functions, but the spec doesn't describe a need
* for them.
* Convert an IDN-formatted ASCII string to Unicode.
*/
public static String toUnicode(String s, int flags) {
String[] parts = SEPARATORS_REGEX.split(s);
for (int i = 0; i < parts.length; i++) {
if (parts[i].startsWith(PUNYCODE_PREFIX)) {
parts[i] = decode(parts[i].substring(PUNYCODE_PREFIX.length()));
}
}
return String.join(".", parts);

}

/**
* Convert a Punycode/ASCII string to Unicode.
*/
private static String decode(String s) {
int n = INITIAL_N;
int i = 0;
int bias = INITIAL_BIAS;
Expand Down
@@ -0,0 +1,142 @@
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.harmony.tests.java.net;

import java.net.IDN;

import junit.framework.TestCase;

public class IDNTest extends TestCase {

/**
* {@link java.net.IDN#toASCII(String)}
* @since 1.6
*/
public void test_ToASCII_LString() {
try {
IDN.toASCII(null);
fail("should throw NullPointerException");
} catch (NullPointerException e) {
// expected
}

assertEquals("www.xn--gwtq9nb2a.jp", IDN
.toASCII("www.\u65E5\u672C\u5E73.jp"));
assertEquals(
"www.xn--vckk7bxa0eza9ezc9d.com",
IDN
.toASCII("www.\u30CF\u30F3\u30C9\u30DC\u30FC\u30EB\u30B5\u30E0\u30BA.com"));
assertEquals("www.xn--frgbolaget-q5a.nu", IDN
.toASCII("www.f\u00E4rgbolaget.nu"));
assertEquals("www.xn--bcher-kva.de", IDN.toASCII("www.b\u00FCcher.de"));
assertEquals("www.xn--brndendekrlighed-vobh.com", IDN
.toASCII("www.br\u00E6ndendek\u00E6rlighed.com"));
assertEquals("www.xn--rksmrgs-5wao1o.se", IDN
.toASCII("www.r\u00E4ksm\u00F6rg\u00E5s.se"));
assertEquals("www.xn--9d0bm53a3xbzui.com", IDN
.toASCII("www.\uC608\uBE44\uAD50\uC0AC.com"));
assertEquals("xn--lck1c3crb1723bpq4a.com", IDN
.toASCII("\u7406\u5BB9\u30CA\u30AB\u30E0\u30E9.com"));
assertEquals("xn--l8je6s7a45b.org", IDN
.toASCII("\u3042\u30FC\u308B\u3044\u3093.org"));
assertEquals("www.xn--frjestadsbk-l8a.net", IDN
.toASCII("www.f\u00E4rjestadsbk.net"));
assertEquals("www.xn--mkitorppa-v2a.edu", IDN
.toASCII("www.m\u00E4kitorppa.edu"));
}

/**
* {@link java.net.IDN#toASCII(String, int)}
* @since 1.6
*/
public void test_ToASCII_LString_I() {
try {
IDN.toASCII("www.br\u00E6ndendek\u00E6rlighed.com",
IDN.USE_STD3_ASCII_RULES);
} catch (IllegalArgumentException e) {
// expected
}

try {
IDN.toASCII("www.r\u00E4ksm\u00F6rg\u00E5s.se",
IDN.USE_STD3_ASCII_RULES);
} catch (IllegalArgumentException e) {
// expected
}

try {
IDN.toASCII("www.f\u00E4rjestadsbk.net", IDN.ALLOW_UNASSIGNED
| IDN.USE_STD3_ASCII_RULES);
} catch (IllegalArgumentException e) {
// expected
}

assertEquals("www.xn--gwtq9nb2a.jp", IDN.toASCII(
"www.\u65E5\u672C\u5E73.jp", 0));
assertEquals(
"www.xn--vckk7bxa0eza9ezc9d.com",
IDN
.toASCII(
"www.\u30CF\u30F3\u30C9\u30DC\u30FC\u30EB\u30B5\u30E0\u30BA.com",
0));
assertEquals("www.xn--frgbolaget-q5a.nu", IDN.toASCII(
"www.f\u00E4rgbolaget.nu", IDN.ALLOW_UNASSIGNED));
assertEquals("www.xn--bcher-kva.de", IDN.toASCII("www.b\u00FCcher.de",
IDN.ALLOW_UNASSIGNED));
assertEquals("www.google.com", IDN.toASCII("www.google\u002Ecom",
IDN.USE_STD3_ASCII_RULES));
}

/**
* {@link java.net.IDN#toUnicode(String)}
* @since 1.6
*/
public void test_ToUnicode_LString() {
try {
IDN.toUnicode(null);
fail("should throw NullPointerException");
} catch (NullPointerException e) {
// expected
}

assertEquals("", IDN.toUnicode(""));
assertEquals("www.bcher.de", IDN.toUnicode("www.bcher.de"));
assertEquals("www.b\u00FCcher.de", IDN.toUnicode("www.b\u00FCcher.de"));
assertEquals("www.\u65E5\u672C\u5E73.jp", IDN
.toUnicode("www.\u65E5\u672C\u5E73.jp"));
assertEquals("www.\u65E5\u672C\u5E73.jp", IDN.toUnicode("www\uFF0Exn--gwtq9nb2a\uFF61jp"));
assertEquals("www.\u65E5\u672C\u5E73.jp", IDN.toUnicode("www.xn--gwtq9nb2a.jp"));
}

/**
* {@link java.net.IDN#toUnicode(String, int)}
* @since 1.6
*/
public void test_ToUnicode_LString_I() {
assertEquals("", IDN.toUnicode("", IDN.ALLOW_UNASSIGNED));
assertEquals("www.f\u00E4rgbolaget.nu", IDN.toUnicode(
"www.f\u00E4rgbolaget.nu", IDN.USE_STD3_ASCII_RULES));
assertEquals("www.r\u00E4ksm\u00F6rg\u00E5s.nu", IDN.toUnicode(
"www.r\u00E4ksm\u00F6rg\u00E5s\u3002nu",
IDN.USE_STD3_ASCII_RULES));
// RI bug. It cannot parse "www.xn--gwtq9nb2a.jp" when
// USE_STD3_ASCII_RULES is set.
assertEquals("www.\u65E5\u672C\u5E73.jp", IDN.toUnicode(
"www\uFF0Exn--gwtq9nb2a\uFF61jp", IDN.USE_STD3_ASCII_RULES));

}
}
1 change: 1 addition & 0 deletions jre_emul/test_sources.mk
Expand Up @@ -110,6 +110,7 @@ SUPPORT_SOURCES = \
org/apache/harmony/security/tests/support/spec/MyEncodedKeySpec.java \
org/apache/harmony/testframework/serialization/SerializationTest.java \
org/apache/harmony/tests/java/lang/reflect/GenericReflectionTestsBase.java \
org/apache/harmony/tests/java/net/IDNTest.java \
org/apache/harmony/tests/java/nio/channels/MockDatagramChannel.java \
org/apache/harmony/tests/java/nio/channels/MockServerSocketChannel.java \
org/apache/harmony/tests/java/nio/channels/MockSocketChannel.java \
Expand Down

0 comments on commit 0fd97e1

Please sign in to comment.