Skip to content

Commit

Permalink
Added function that converts Punycode encoded domains to Unicode (std…
Browse files Browse the repository at this point in the history
…::string) (#64)

* Moved some more files around

* Added tests to convert from ascii encoded domains to unicode

* Refactored tests

* Renamed domain_to_unicode
  • Loading branch information
glynos committed Apr 13, 2020
1 parent 352dd31 commit f2dd72f
Show file tree
Hide file tree
Showing 31 changed files with 230 additions and 178 deletions.
31 changes: 26 additions & 5 deletions include/skyr/core/url_record.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <string>
#include <cstdint>
#include <optional>
#include <skyr/core/url_schemes.hpp>

namespace skyr {
inline namespace v1 {
Expand Down Expand Up @@ -65,22 +66,42 @@ class url_record {
/// Tests if the URL is a special scheme
/// \returns `true` if the URL scheme is a special scheme, `false`
/// otherwise
[[nodiscard]] auto is_special() const noexcept -> bool;
[[nodiscard]] auto is_special() const noexcept -> bool {
return skyr::is_special(scheme);
}

/// Tests if the URL includes credentials
/// \returns `true` if the URL username or password is not an
/// empty string, `false` otherwise
[[nodiscard]] auto includes_credentials() const noexcept -> bool;
[[nodiscard]] auto includes_credentials() const noexcept -> bool {
return !username.empty() || !password.empty();
}

/// Tests if the URL cannot have a username, password or port
/// \returns `true` if the URL cannot have a username, password
/// or port
[[nodiscard]] auto cannot_have_a_username_password_or_port() const noexcept -> bool;
[[nodiscard]] auto cannot_have_a_username_password_or_port() const noexcept -> bool {
return
(!host || host.value().empty()) ||
cannot_be_a_base_url ||
(scheme == "file");
}

/// Swaps two `url_record` objects
/// \param other Another `url_record` object
void swap(url_record &other) noexcept;

void swap(url_record &other) noexcept {
using std::swap;
swap(scheme, other.scheme);
swap(username, other.username);
swap(password, other.password);
swap(host, other.host);
swap(port, other.port);
swap(path, other.path);
swap(query, other.query);
swap(fragment, other.fragment);
swap(cannot_be_a_base_url, other.cannot_be_a_base_url);
swap(validation_error, other.validation_error);
}
};

/// Swaps two `url_record` objects
Expand Down
60 changes: 36 additions & 24 deletions src/core/url_schemes.cpp → include/skyr/core/url_schemes.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,53 +3,65 @@
// (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)

#ifndef SKYR_URL_SCHEMES_INC
#define SKYR_URL_SCHEMES_INC

#include <vector>
#include <utility>
#include <string>
#include <string_view>
#include <cstdint>
#include <optional>
#include <algorithm>
#include "url_schemes.hpp"

namespace skyr {
inline namespace v1 {
namespace details {
auto special_schemes() noexcept -> const default_port_list & {
using default_port_list = std::vector<std::pair<std::string, std::optional<std::uint16_t>>>;

inline auto special_schemes() noexcept -> const default_port_list & {
static const auto schemes = default_port_list{
{"ftp", 21},
{"file", std::nullopt},
{"http", 80},
{"https", 443},
{"ws", 80},
{"wss", 443},
};
{"ftp", 21},
{"file", std::nullopt},
{"http", 80},
{"https", 443},
{"ws", 80},
{"wss", 443},
};
return schemes;
}
} // namespace details

auto default_port(std::string_view scheme) noexcept -> std::optional<std::uint16_t> {
auto schemes = special_schemes();
/// \param scheme
/// \returns
inline auto is_special(std::string_view scheme) noexcept -> bool {
const auto &schemes = details::special_schemes();
auto first = begin(schemes), last = end(schemes);
auto it = std::find_if(
first, last,
[&scheme](const auto &special_scheme) -> bool {
return scheme == special_scheme.first;
});
if (it != last) {
return it->second;
}
return std::nullopt;
return (it != last);
}

auto is_special(std::string_view scheme) noexcept -> bool {
auto schemes = special_schemes();
/// \param scheme
/// \returns
inline auto default_port(std::string_view scheme) noexcept -> std::optional<std::uint16_t> {
const auto &schemes = details::special_schemes();
auto first = begin(schemes), last = end(schemes);
auto it = std::find_if(
first, last,
[&scheme](const auto &special_scheme) -> bool {
return scheme == special_scheme.first;
});
return (it != last);
}

auto is_default_port(std::string_view scheme, std::uint16_t port) noexcept -> bool {
auto dport = default_port(scheme);
return dport && (dport.value() == port);
if (it != last) {
return it->second;
}
return std::nullopt;
}
} // namespace details
} // namespace v1
} // namespace skyr


#endif // SKYR_URL_SCHEMES_INC
18 changes: 12 additions & 6 deletions include/skyr/domain/domain.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,31 @@

namespace skyr {
inline namespace v1 {
/// Converts a UTF-8 encoded domain to ASCII using
/// Converts a UTF-32 encoded domain to ASCII using
/// [IDNA processing](https://www.domain.org/reports/tr46/#Processing)
///
/// \param domain A domain
/// \param be_strict Tells the processor to be strict
/// \returns An ASCII domain, or an error
auto domain_to_ascii(
std::string_view domain,
bool be_strict = false) -> tl::expected<std::string, std::error_code>;
std::u32string_view domain,
bool be_strict=false) -> tl::expected<std::string, std::error_code>;

/// Converts a UTF-32 encoded domain to ASCII using
/// Converts a UTF-8 encoded domain to ASCII using
/// [IDNA processing](https://www.domain.org/reports/tr46/#Processing)
///
/// \param domain A domain
/// \param be_strict Tells the processor to be strict
/// \returns An ASCII domain, or an error
auto domain_to_ascii(
std::u32string_view domain,
bool be_strict = false) -> tl::expected<std::string, std::error_code>;
std::string_view domain,
bool be_strict=false) -> tl::expected<std::string, std::error_code>;

/// Converts a Punycode encoded domain to UTF-8
///
/// \param ascii A Punycode encoded domain
/// \returns A valid UTF-8 encoded domain, or an error
auto domain_to_unicode(std::string_view ascii) -> tl::expected<std::string, std::error_code>;
} // namespace v1
} // namespace skyr

Expand Down
4 changes: 2 additions & 2 deletions include/skyr/network/ipv4_address.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,12 @@ class ipv4_address {
/// Constructor
/// \param address Sets the IPv4 address to `address`
explicit ipv4_address(unsigned int address)
: address_(details::to_network_byte_order(address)) {}
: address_(to_network_byte_order(address)) {}

/// The address value
/// \returns The address value
[[nodiscard]] auto address() const noexcept {
return details::from_network_byte_order(address_);
return from_network_byte_order(address_);
}

/// The address in bytes in network byte order
Expand Down
2 changes: 1 addition & 1 deletion include/skyr/network/ipv6_address.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class ipv6_address {
/// \param address Sets the IPv6 address to `address`
explicit ipv6_address(std::array<unsigned short, 8> address) {
for (auto i = 0UL; i < address.size(); ++i) {
address_[i] = details::to_network_byte_order(address[i]);
address_[i] = to_network_byte_order(address[i]);
}
}

Expand Down
14 changes: 8 additions & 6 deletions include/skyr/platform/endianness.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@

namespace skyr {
inline namespace v1 {
namespace details {
inline auto is_big_endian() noexcept {
const auto word = 0x0001;
auto bytes = static_cast<const unsigned char *>(static_cast<const void *>(&word));
return bytes[0] != 0x01;
}

namespace details {
template <typename intT>
inline auto swap_endianness(
intT v, typename std::enable_if<std::is_integral<intT>::value>::type * = nullptr) noexcept -> intT {
Expand All @@ -28,15 +28,17 @@ inline auto swap_endianness(
}
return *static_cast<const intT *>(static_cast<const void *>(bytes.data()));
}
} // namespace details

inline auto to_network_byte_order(unsigned int v) noexcept {
return (is_big_endian()) ? v : swap_endianness(v);
template <class intT>
inline auto to_network_byte_order(intT v) noexcept {
return (is_big_endian()) ? v : details::swap_endianness(v);
}

inline auto from_network_byte_order(unsigned int v) noexcept {
return (is_big_endian()) ? v : swap_endianness(v);
template <class intT>
inline auto from_network_byte_order(intT v) noexcept {
return (is_big_endian()) ? v : details::swap_endianness(v);
}
} // namespace details
} // namespace v1
} // namespace skyr

Expand Down
28 changes: 13 additions & 15 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,34 +9,36 @@ configure_file(
)

set(skyr_SRCS
core/url_parser_context.hpp
core/url_parser_context.cpp
core/url_parse.cpp
core/url_parse_impl.hpp
core/url_serialize.cpp
core/url_error.cpp
unicode/errors.cpp
domain/errors.cpp
domain/domain.cpp
domain/punycode.cpp
domain/idna.cpp
core/url_parser_context.hpp
core/url_parser_context.cpp
core/url_record.cpp
percent_encoding/errors.cpp
network/ipv4_address.cpp
network/ipv6_address.cpp
string/ascii.hpp
string/join.hpp
string/locale.hpp
string/split.hpp
string/starts_with.hpp
core/url_parse.cpp
core/url_parse_impl.hpp
core/url_serialize.cpp
core/url_schemes.hpp
core/url_schemes.cpp
url/url.cpp
core/url_error.cpp
url/url_search_parameters.cpp
filesystem/path.cpp
percent_encoding/errors.cpp

${PROJECT_SOURCE_DIR}/include/skyr/config.hpp
${PROJECT_SOURCE_DIR}/include/skyr/version.hpp
${PROJECT_SOURCE_DIR}/include/skyr/core/url_record.hpp
${PROJECT_SOURCE_DIR}/include/skyr/core/url_parse.hpp
${PROJECT_SOURCE_DIR}/include/skyr/core/url_serialize.hpp
${PROJECT_SOURCE_DIR}/include/skyr/core/url_schemes.hpp
${PROJECT_SOURCE_DIR}/include/skyr/core/url_error.hpp
${PROJECT_SOURCE_DIR}/include/skyr/traits/string_traits.hpp
${PROJECT_SOURCE_DIR}/include/skyr/unicode/errors.hpp
${PROJECT_SOURCE_DIR}/include/skyr/unicode/core.hpp
Expand All @@ -49,6 +51,7 @@ set(skyr_SRCS
${PROJECT_SOURCE_DIR}/include/skyr/unicode/ranges/transforms/byte_transform.hpp
${PROJECT_SOURCE_DIR}/include/skyr/unicode/ranges/transforms/u16_transform.hpp
${PROJECT_SOURCE_DIR}/include/skyr/unicode/ranges/transforms/u32_transform.hpp
${PROJECT_SOURCE_DIR}/include/skyr/unicode/details/to_bytes.hpp
${PROJECT_SOURCE_DIR}/include/skyr/domain/errors.hpp
${PROJECT_SOURCE_DIR}/include/skyr/domain/idna.hpp
${PROJECT_SOURCE_DIR}/include/skyr/domain/punycode.hpp
Expand All @@ -61,12 +64,7 @@ set(skyr_SRCS
${PROJECT_SOURCE_DIR}/include/skyr/percent_encoding/percent_encode.hpp
${PROJECT_SOURCE_DIR}/include/skyr/network/ipv4_address.hpp
${PROJECT_SOURCE_DIR}/include/skyr/network/ipv6_address.hpp
${PROJECT_SOURCE_DIR}/include/skyr/core/url_record.hpp
${PROJECT_SOURCE_DIR}/include/skyr/core/url_parse.hpp
${PROJECT_SOURCE_DIR}/include/skyr/core/url_serialize.hpp
${PROJECT_SOURCE_DIR}/include/skyr/core/url_error.hpp
${PROJECT_SOURCE_DIR}/include/skyr/query/query_iterator.hpp
${PROJECT_SOURCE_DIR}/include/skyr/unicode/details/to_bytes.hpp
${PROJECT_SOURCE_DIR}/include/skyr/platform/endianness.hpp
${PROJECT_SOURCE_DIR}/include/skyr/url/url_record.hpp
${PROJECT_SOURCE_DIR}/include/skyr/url/url_parse.hpp
Expand Down
11 changes: 6 additions & 5 deletions src/core/url_parser_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#include <skyr/domain/domain.hpp>
#include <skyr/percent_encoding/percent_decode_range.hpp>
#include "url_parser_context.hpp"
#include "url_schemes.hpp"
#include "skyr/core/url_schemes.hpp"
#include "string/starts_with.hpp"
#include "string/locale.hpp"

Expand Down Expand Up @@ -288,11 +288,11 @@ auto url_parser_context::parse_scheme(char byte) -> tl::expected<url_parse_actio
buffer.push_back(lower);
} else if (byte == ':') {
if (state_override) {
if (url.is_special() && !details::is_special(buffer)) {
if (url.is_special() && !is_special(buffer)) {
return tl::make_unexpected(url_parse_errc::cannot_override_scheme);
}

if (!url.is_special() && details::is_special(buffer)) {
if (!url.is_special() && is_special(buffer)) {
return tl::make_unexpected(url_parse_errc::cannot_override_scheme);
}

Expand All @@ -308,7 +308,7 @@ auto url_parser_context::parse_scheme(char byte) -> tl::expected<url_parse_actio
url.scheme = buffer;

if (state_override) {
if (url.port == details::default_port(url.scheme)) {
if (url.port == default_port(url.scheme)) {
url.port = std::nullopt;
}
return url_parse_action::success;
Expand Down Expand Up @@ -611,7 +611,8 @@ auto url_parser_context::parse_port(char byte) -> tl::expected<url_parse_action,
return tl::make_unexpected(port.error());
}

if (details::is_default_port(url.scheme, port.value())) {
auto dport = default_port(url.scheme);
if (dport && (dport.value() == port.value())) {
url.port = std::nullopt;
}
else {
Expand Down
40 changes: 0 additions & 40 deletions src/core/url_record.cpp

This file was deleted.

Loading

0 comments on commit f2dd72f

Please sign in to comment.