Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Factor parseDomainFromURL from MistBlock into StringUtils

git-svn-id: https://svn.opensource.yandex.net/xscript/trunk@718 b01ef89b-65f2-463d-9415-e8412542ae63
  • Loading branch information...
commit 7fe3dd0372c4d3c1c6214a00c99c0a2cd03b151c 1 parent 76a2a23
bacek authored
View
9 include/xscript/string_utils.h
@@ -3,6 +3,7 @@
#include <string>
#include <ostream>
+#include <boost/cstdint.hpp> // for boost::int32_t
#include <xscript/range.h>
@@ -38,6 +39,14 @@ namespace StringUtils {
std::string tolower(const std::string& str);
std::string toupper(const std::string& str);
const char* nextUTF8(const char* data);
+
+ /**
+ * Parse domain part from url.
+ * \param url - url to parse. Copy-on-pass.
+ * \param level - [optional] cut to this level. If level == 0 returns full domain.
+ * \return domain part of url.
+ */
+ std::string parseDomainFromURL(std::string url, boost::int32_t level = 0);
};
View
61 library/string_utils.cpp
@@ -1,6 +1,7 @@
#include <libxml/tree.h>
#include <stdexcept>
#include <memory>
+#include <algorithm>
#include "xscript/string_utils.h"
#include "xscript/encoder.h"
#include "xscript/algorithm.h"
@@ -156,4 +157,64 @@ StringUtils::nextUTF8(const char* data) {
return data + NEXT_UTF8[static_cast<unsigned char>(*data)];
}
+std::string StringUtils::parseDomainFromURL(std::string url, boost::int32_t level) {
+
+ if (0 > level) {
+ throw std::invalid_argument("bad param: level");
+ }
+
+ std::string::size_type pos;
+ pos = url.find('?');
+ if (std::string::npos != pos) {
+ url.erase(pos);
+ }
+
+ pos = url.find("://");
+ if (std::string::npos != pos) {
+ url.erase(0, pos + 3);
+ }
+
+ pos = std::min(url.find('/'), url.find(':'));
+
+ if (std::string::npos != pos) {
+ url.erase(pos);
+ }
+
+ if (url.empty() || '.' == *url.begin() || '.' == *url.rbegin()) {
+ throw std::invalid_argument("bad param: domain='" + url + "'");
+ }
+
+ boost::int32_t max = std::count(url.begin(), url.end(), '.');
+ if (0 == level) {
+ level = max + 1;
+ }
+
+ char c = url[url.rfind('.') + 1];
+ if (c >= '0' && c <= '9') {
+ throw std::invalid_argument("bad param: domain='" + url + "'");
+ }
+
+ //if (max < level - 1) {
+ // log()->warn("max available domain level is less than required mist:set_state_domain");
+ //}
+ if (max) {
+ std::string::size_type end = std::string::npos, tmp = 0;
+ for (boost::int32_t i = 0; i <= max; ++i) {
+ pos = url.rfind('.', --tmp);
+ if (tmp == pos) {
+ throw std::invalid_argument("bad param: domain='" + url + "'");
+ }
+ tmp = pos;
+ if (i < level) {
+ end = pos + 1;
+ }
+ }
+ if (end) {
+ url.erase(0, end);
+ }
+ }
+
+ return url;
+}
+
}
View
77 tests/test_string.cpp
@@ -10,6 +10,8 @@
#include <dmalloc.h>
#endif
+using namespace xscript;
+
class StringTest : public CppUnit::TestFixture {
public:
void testAmp();
@@ -24,6 +26,10 @@ class StringTest : public CppUnit::TestFixture {
void testUrldecodeEmpty();
void testUrldecodeLatin();
void testUrldecodeBadSuffix();
+ void testParseDomain();
+ void testParseDomainEmpty();
+ void testParseDomainFile();
+ void testParseDomainInvalid();
private:
CPPUNIT_TEST_SUITE(StringTest);
@@ -39,6 +45,12 @@ class StringTest : public CppUnit::TestFixture {
CPPUNIT_TEST(testUrldecodeEmpty);
CPPUNIT_TEST(testUrldecodeLatin);
CPPUNIT_TEST(testUrldecodeBadSuffix);
+
+ CPPUNIT_TEST(testParseDomain);
+ CPPUNIT_TEST_EXCEPTION(testParseDomainEmpty, std::exception);
+ CPPUNIT_TEST_EXCEPTION(testParseDomainFile, std::exception);
+ CPPUNIT_TEST_EXCEPTION(testParseDomainInvalid, std::exception);
+
CPPUNIT_TEST_SUITE_END();
};
@@ -89,10 +101,9 @@ StringTest::testEscape() {
CPPUNIT_ASSERT_EQUAL(std::string("&lt;td colspan=&quot;2&quot;&gt;"), XmlUtils::escape("<td colspan=\"2\">"));
}
+
void
StringTest::testParams() {
-
- using namespace xscript;
std::vector<StringUtils::NamedValue> v;
std::string str("test=yes&successful=try%20again");
@@ -107,32 +118,24 @@ StringTest::testParams() {
void
StringTest::testUrlencode() {
-
- using namespace xscript;
std::string str("ÒÁÚ Ä×Á ÔÒÉ ÞÅÔÙÒÅ ÐÑÔØ"), res = StringUtils::urlencode(str);
CPPUNIT_ASSERT_EQUAL(std::string("%D2%C1%DA%20%C4%D7%C1%20%D4%D2%C9%20%DE%C5%D4%D9%D2%C5%20%D0%D1%D4%D8"), res);
}
void
StringTest::testUrlencodeEmpty() {
-
- using namespace xscript;
std::string str, res = StringUtils::urlencode(str);
CPPUNIT_ASSERT_EQUAL(std::string(""), res);
}
void
StringTest::testUrlencodeLatin() {
-
- using namespace xscript;
std::string str("abcd efgh"), res = StringUtils::urlencode(str);
CPPUNIT_ASSERT_EQUAL(std::string("abcd%20efgh"), res);
}
void
StringTest::testUrldecode() {
-
- using namespace xscript;
std::string str("%D2%C1%DA%20%C4%D7%C1%20%D4%D2%C9%20%DE%C5%D4%D9%D2%C5%20%D0%D1%D4%D8"), res = StringUtils::urldecode(str);
CPPUNIT_ASSERT_EQUAL(std::string("ÒÁÚ Ä×Á ÔÒÉ ÞÅÔÙÒÅ ÐÑÔØ"), res);
@@ -140,24 +143,66 @@ StringTest::testUrldecode() {
void
StringTest::testUrldecodeEmpty() {
-
- using namespace xscript;
std::string str, res = StringUtils::urldecode(str);
CPPUNIT_ASSERT_EQUAL(std::string(""), res);
}
void
StringTest::testUrldecodeLatin() {
-
- using namespace xscript;
std::string str("abcd%20efgh"), res = StringUtils::urldecode(str);
CPPUNIT_ASSERT_EQUAL(std::string("abcd efgh"), res);
}
void
StringTest::testUrldecodeBadSuffix() {
-
- using namespace xscript;
std::string str("abcd%20efgh%"), res = StringUtils::urldecode(str);
CPPUNIT_ASSERT_EQUAL(std::string("abcd efgh%"), res);
}
+
+void
+StringTest::testParseDomain() {
+ // Parse domain
+ CPPUNIT_ASSERT_EQUAL(
+ std::string("hghltd.yandex.net"),
+ StringUtils::parseDomainFromURL("http://hghltd.yandex.net:1234/yandbtm?url=http%3A%2F%2Fwww.yandex.ru%2F&amp;text=%FF%ED%E4%E5%EA%F1")
+ );
+ // Parse domain with cut to level
+ CPPUNIT_ASSERT_EQUAL(
+ std::string("net"),
+ StringUtils::parseDomainFromURL("http://hghltd.yandex.net:1234/yandbtm?url=http%3A%2F%2Fwww.yandex.ru%2F&amp;text=%FF%ED%E4%E5%EA%F1", 1)
+ );
+
+ // no_level_no_scheme
+ CPPUNIT_ASSERT_EQUAL(
+ std::string("www.yandex.ru"),
+ StringUtils::parseDomainFromURL("www.yandex.ru:8090/yandbtm?url=http%3A%2F%2Fwww.yandex.ru%2F&amp;text=%FF%ED%E4%E5%EA%F1")
+ );
+
+ // yandex.ru
+ CPPUNIT_ASSERT_EQUAL(
+ std::string("yandex.ru"),
+ StringUtils::parseDomainFromURL("http://www.yandex.ru/", 2)
+ );
+
+ // yandex.ru
+ CPPUNIT_ASSERT_EQUAL(
+ std::string("yandex.ru"),
+ StringUtils::parseDomainFromURL("www.yandex.ru/", 2)
+ );
+}
+
+void
+StringTest::testParseDomainEmpty() {
+ StringUtils::parseDomainFromURL("");
+}
+
+void
+StringTest::testParseDomainFile() {
+ StringUtils::parseDomainFromURL("file:///home/bacek/bad/bad/boy.xml");
+}
+
+void
+StringTest::testParseDomainInvalid() {
+ StringUtils::parseDomainFromURL("http://.www.yandex.ru/index.html");
+}
+
Please sign in to comment.
Something went wrong with that request. Please try again.