Permalink
Browse files

punycode, feat: support toUnicode and toASCII methods (#351)

  • Loading branch information...
asionius authored and xicilion committed Sep 14, 2017
1 parent f5d798e commit a4b70743a97244e4c827e9ef4832c0700d311d23
Showing with 306 additions and 58 deletions.
  1. +37 −1 fibjs/include/ifs/punycode.h
  2. +119 −0 fibjs/src/net/punycode.cpp
  3. +12 −0 idl/zh-cn/punycode.idl
  4. +138 −57 test/punycode_test.js
@@ -23,6 +23,8 @@ class punycode_base : public object_base {
// punycode_base
static result_t encode(exlib::string domain, exlib::string& retVal);
static result_t decode(exlib::string domain, exlib::string& retVal);
static result_t toASCII(exlib::string domain, exlib::string& retVal);
static result_t toUnicode(exlib::string domain, exlib::string& retVal);
public:
static void s__new(const v8::FunctionCallbackInfo<v8::Value>& args)
@@ -38,6 +40,8 @@ class punycode_base : public object_base {
public:
static void s_encode(const v8::FunctionCallbackInfo<v8::Value>& args);
static void s_decode(const v8::FunctionCallbackInfo<v8::Value>& args);
static void s_toASCII(const v8::FunctionCallbackInfo<v8::Value>& args);
static void s_toUnicode(const v8::FunctionCallbackInfo<v8::Value>& args);
};
}
@@ -46,7 +50,9 @@ inline ClassInfo& punycode_base::class_info()
{
static ClassData::ClassMethod s_method[] = {
{ "encode", s_encode, true },
{ "decode", s_decode, true }
{ "decode", s_decode, true },
{ "toASCII", s_toASCII, true },
{ "toUnicode", s_toUnicode, true }
};
static ClassData s_cd = {
@@ -88,6 +94,36 @@ inline void punycode_base::s_decode(const v8::FunctionCallbackInfo<v8::Value>& a
METHOD_RETURN();
}
inline void punycode_base::s_toASCII(const v8::FunctionCallbackInfo<v8::Value>& args)
{
exlib::string vr;
METHOD_ENTER();
METHOD_OVER(1, 1);
ARG(exlib::string, 0);
hr = toASCII(v0, vr);
METHOD_RETURN();
}
inline void punycode_base::s_toUnicode(const v8::FunctionCallbackInfo<v8::Value>& args)
{
exlib::string vr;
METHOD_ENTER();
METHOD_OVER(1, 1);
ARG(exlib::string, 0);
hr = toUnicode(v0, vr);
METHOD_RETURN();
}
}
#endif
View
@@ -7,6 +7,8 @@
#include "ifs/punycode.h"
#include "utf8.h"
#include "qstring.h"
#include "parse.h"
#include <limits.h>
namespace fibjs {
@@ -237,4 +239,121 @@ result_t punycode_base::decode(exlib::string domain, exlib::string& retVal)
return 0;
}
result_t punycode_base::toASCII(exlib::string domain, exlib::string& retVal)
{
result_t hr;
exlib::string left;
exlib::string result;
exlib::string str;
exlib::wstring32 wdomain;
exlib::wstring32 wStr;
size_t length;
bool notAscii = false;
bool finished = false;
_parser p(domain);
size_t p1 = 0;
p.skipUntil('@');
if (p.get() == '@')
{
p.skip();
result.append(p.string, p.pos);
} else
p.pos = 0;
p.getLeft(left);
wdomain = utf8to32String(left);
length = wdomain.length();
for (size_t i = 0; i < length; i++)
{
if (i + 1 == length || wdomain[i] == '\x2E' || wdomain[i] == 0x3002 || wdomain[i] == 0xff0e || wdomain[i] == 0xff61)
{
if (i + 1 == length)
{
finished = true;
i++;
}
for (size_t j = p1; j < i; j++)
if (wdomain[j] > '\x7E' || wdomain[j] < '\x20')
{
notAscii = true;
break;
}
wStr = wdomain.substr(p1, i - p1);
str = utf32to8String(wStr);
if (notAscii) {
hr = encode(str, str);
if (hr < 0)
return CHECK_ERROR(hr);
result += "xn--";
}
result += str;
if (!finished)
result += ".";
notAscii = false;
p1 = i + 1;
}
}
retVal = result;
return 0;
}
result_t punycode_base::toUnicode(exlib::string domain, exlib::string& retVal)
{
result_t hr;
exlib::string left;
exlib::string result;
exlib::string str;
size_t length;
bool finished = false;
_parser p(domain);
size_t p1 = 0;
p.skipUntil('@');
if (p.get() == '@')
{
p.skip();
result.append(p.string, p.pos);
}
else
p.pos = 0;
p.getLeft(left);
length = left.length();
for (size_t i = 0; i < length; i++)
{
if (left[i] == '\x2E' || i + 1 == length)
{
if (i + 1 == length)
{
i++;
finished = true;
}
if (qstrcmp(&left[p1], "xn--", 4) == 0)
{
str = left.substr(p1 + 4, i - p1 - 4);
str.tolower();
hr = decode(str, str);
if (hr < 0)
return CHECK_ERROR(hr);
} else
str = left.substr(p1, i - p1);
result += str;
if (!finished)
result += ".";
p1 = i + 1;
}
}
retVal = result;
return 0;
}
}
View
@@ -21,4 +21,16 @@ module punycode
@return 返回解码后的 Unicode 字符串
*/
static String decode(String domain);
/*! @brief 转换一个代表了一个域名的Unicode字符串为一个只含有 ASCII 字符的字符串。只有代表了域名的部分的非 ASCII 字符串会被转换。也就是说,如果你调用了一个已经被转换为ASCII的字符串,也是没有问题的。
@param domain 给定Unicode 字符串
@return 返回编码后的 ASCII 字符串
*/
static String toASCII(String domain);
/*! @brief 转换一个代表了一个域名的Punycode字符串为一个Unicode字符串。只有代表了域名的部分的Punycode字符串会被转换。也就是说,如果你调用了一个已经被转换为Unicode的字符串,也是没有问题的。
@param domain 给定 ASCII 字符串
@return 返回解码后的 Unicode 字符串
*/
static String toUnicode(String domain);
};
Oops, something went wrong.

0 comments on commit a4b7074

Please sign in to comment.