Skip to content

Commit

Permalink
refactor encodeUTF8
Browse files Browse the repository at this point in the history
  • Loading branch information
wenshao committed Jul 27, 2017
1 parent 7effec0 commit fcc919a
Showing 1 changed file with 25 additions and 22 deletions.
47 changes: 25 additions & 22 deletions src/main/java/com/alibaba/fastjson/util/IOUtils.java
Expand Up @@ -586,33 +586,33 @@ public static byte[] decodeBase64(String s) {
return dArr; return dArr;
} }


public static int encodeUTF8(char[] sa, int sp, int len, byte[] da) { public static int encodeUTF8(char[] chars, int offset, int len, byte[] bytes) {
int sl = sp + len; int sl = offset + len;
int dp = 0; int dp = 0;
int dlASCII = dp + Math.min(len, da.length); int dlASCII = dp + Math.min(len, bytes.length);


// ASCII only optimized loop // ASCII only optimized loop
while (dp < dlASCII && sa[sp] < '\u0080') { while (dp < dlASCII && chars[offset] < '\u0080') {
da[dp++] = (byte) sa[sp++]; bytes[dp++] = (byte) chars[offset++];
} }


while (sp < sl) { while (offset < sl) {
char c = sa[sp++]; char c = chars[offset++];
if (c < 0x80) { if (c < 0x80) {
// Have at most seven bits // Have at most seven bits
da[dp++] = (byte) c; bytes[dp++] = (byte) c;
} else if (c < 0x800) { } else if (c < 0x800) {
// 2 bytes, 11 bits // 2 bytes, 11 bits
da[dp++] = (byte) (0xc0 | (c >> 6)); bytes[dp++] = (byte) (0xc0 | (c >> 6));
da[dp++] = (byte) (0x80 | (c & 0x3f)); bytes[dp++] = (byte) (0x80 | (c & 0x3f));
} else if (c >= '\uD800' && c < ('\uDFFF' + 1)) { //Character.isSurrogate(c) but 1.7 } else if (c >= '\uD800' && c < ('\uDFFF' + 1)) { //Character.isSurrogate(c) but 1.7
final int uc; final int uc;
int ip = sp - 1; int ip = offset - 1;
if (Character.isHighSurrogate(c)) { if (Character.isHighSurrogate(c)) {
if (sl - ip < 2) { if (sl - ip < 2) {
uc = -1; uc = -1;
} else { } else {
char d = sa[ip + 1]; char d = chars[ip + 1];
if (Character.isLowSurrogate(d)) { if (Character.isLowSurrogate(d)) {
uc = Character.toCodePoint(c, d); uc = Character.toCodePoint(c, d);
} else { } else {
Expand All @@ -628,24 +628,27 @@ public static int encodeUTF8(char[] sa, int sp, int len, byte[] da) {
} }


if (uc < 0) { if (uc < 0) {
da[dp++] = (byte) '?'; bytes[dp++] = (byte) '?';
} else { } else {
da[dp++] = (byte) (0xf0 | ((uc >> 18))); bytes[dp++] = (byte) (0xf0 | ((uc >> 18)));
da[dp++] = (byte) (0x80 | ((uc >> 12) & 0x3f)); bytes[dp++] = (byte) (0x80 | ((uc >> 12) & 0x3f));
da[dp++] = (byte) (0x80 | ((uc >> 6) & 0x3f)); bytes[dp++] = (byte) (0x80 | ((uc >> 6) & 0x3f));
da[dp++] = (byte) (0x80 | (uc & 0x3f)); bytes[dp++] = (byte) (0x80 | (uc & 0x3f));
sp++; // 2 chars offset++; // 2 chars
} }
} else { } else {
// 3 bytes, 16 bits // 3 bytes, 16 bits
da[dp++] = (byte) (0xe0 | ((c >> 12))); bytes[dp++] = (byte) (0xe0 | ((c >> 12)));
da[dp++] = (byte) (0x80 | ((c >> 6) & 0x3f)); bytes[dp++] = (byte) (0x80 | ((c >> 6) & 0x3f));
da[dp++] = (byte) (0x80 | (c & 0x3f)); bytes[dp++] = (byte) (0x80 | (c & 0x3f));
} }
} }
return dp; return dp;
} }


/**
* @deprecated
*/
public static int decodeUTF8(byte[] sa, int sp, int len, char[] da) { public static int decodeUTF8(byte[] sa, int sp, int len, char[] da) {
final int sl = sp + len; final int sl = sp + len;
int dp = 0; int dp = 0;
Expand Down

0 comments on commit fcc919a

Please sign in to comment.