diff --git a/ada_url/__init__.py b/ada_url/__init__.py index 6fb5059..afd5275 100644 --- a/ada_url/__init__.py +++ b/ada_url/__init__.py @@ -1,5 +1,6 @@ from ada_url.ada_adapter import ( URL, + HostType, check_url, idna, idna_to_ascii, @@ -12,6 +13,7 @@ __all__ = [ 'URL', + 'HostType', 'check_url', 'idna', 'idna_to_ascii', diff --git a/ada_url/ada.cpp b/ada_url/ada.cpp index ce4e630..ad7a1a7 100644 --- a/ada_url/ada.cpp +++ b/ada_url/ada.cpp @@ -1,4 +1,4 @@ -/* auto-generated on 2023-07-23 15:03:22 -0400. Do not edit! */ +/* auto-generated on 2023-08-30 11:44:21 -0400. Do not edit! */ /* begin file src/ada.cpp */ #include "ada.h" /* begin file src/checkers.cpp */ @@ -116,10 +116,11 @@ ada_really_inline constexpr bool verify_dns_length( ADA_PUSH_DISABLE_ALL_WARNINGS /* begin file src/ada_idna.cpp */ -/* auto-generated on 2023-05-07 19:12:14 -0400. Do not edit! */ +/* auto-generated on 2023-08-29 15:28:19 -0400. Do not edit! */ /* begin file src/idna.cpp */ /* begin file src/unicode_transcoding.cpp */ +#include #include #include @@ -226,38 +227,22 @@ size_t utf8_length_from_utf32(const char32_t* buf, size_t len) { // We are not BOM aware. const uint32_t* p = reinterpret_cast(buf); size_t counter{0}; - for (size_t i = 0; i < len; i++) { - /** ASCII **/ - if (p[i] <= 0x7F) { - counter++; - } - /** two-byte **/ - else if (p[i] <= 0x7FF) { - counter += 2; - } - /** three-byte **/ - else if (p[i] <= 0xFFFF) { - counter += 3; - } - /** four-bytes **/ - else { - counter += 4; - } + for (size_t i = 0; i != len; ++i) { + ++counter; // ASCII + counter += static_cast(p[i] > 0x7F); // two-byte + counter += static_cast(p[i] > 0x7FF); // three-byte + counter += static_cast(p[i] > 0xFFFF); // four-bytes } return counter; } size_t utf32_length_from_utf8(const char* buf, size_t len) { const int8_t* p = reinterpret_cast(buf); - size_t counter{0}; - for (size_t i = 0; i < len; i++) { + return std::count_if(p, std::next(p, len), [](int8_t c) { // -65 is 0b10111111, anything larger in two-complement's // should start a new code point. - if (p[i] > -65) { - counter++; - } - } - return counter; + return c > -65; + }); } size_t utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) { @@ -9525,14 +9510,14 @@ bool constexpr begins_with(std::u32string_view view, if (view.size() < prefix.size()) { return false; } - return view.substr(0, prefix.size()) == prefix; + return std::equal(prefix.begin(), prefix.end(), view.begin()); } bool constexpr begins_with(std::string_view view, std::string_view prefix) { if (view.size() < prefix.size()) { return false; } - return view.substr(0, prefix.size()) == prefix; + return std::equal(prefix.begin(), prefix.end(), view.begin()); } bool constexpr is_ascii(std::u32string_view view) { @@ -10144,13 +10129,12 @@ ada_really_inline constexpr bool is_lowercase_hex(const char c) noexcept { return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f'); } +constexpr static char hex_to_binary_table[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 10, 11, + 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 11, 12, 13, 14, 15}; unsigned constexpr convert_hex_to_binary(const char c) noexcept { - // this code can be optimized. - if (c <= '9') { - return c - '0'; - } - char del = c >= 'a' ? 'a' : 'A'; - return 10 + (c - del); + return hex_to_binary_table[c - '0']; } std::string percent_decode(const std::string_view input, size_t first_percent) { @@ -10159,8 +10143,9 @@ std::string percent_decode(const std::string_view input, size_t first_percent) { if (first_percent == std::string_view::npos) { return std::string(input); } - std::string dest(input.substr(0, first_percent)); + std::string dest; dest.reserve(input.length()); + dest.append(input.substr(0, first_percent)); const char* pointer = input.data() + first_percent; const char* end = input.data() + input.size(); // Optimization opportunity: if the following code gets @@ -10197,9 +10182,10 @@ std::string percent_encode(const std::string_view input, return std::string(input); } - std::string result(input.substr(0, std::distance(input.begin(), pointer))); + std::string result; result.reserve(input.length()); // in the worst case, percent encoding might // produce 3 characters. + result.append(input.substr(0, std::distance(input.begin(), pointer))); for (; pointer != input.end(); pointer++) { if (character_sets::bit_at(character_set, *pointer)) { @@ -11231,6 +11217,7 @@ bool url::parse_ipv4(std::string_view input) { } else { host = ada::serializers::ipv4(ipv4); // We have to reserialize the address. } + host_type = IPV4; return true; } @@ -11460,6 +11447,7 @@ bool url::parse_ipv6(std::string_view input) { } host = ada::serializers::ipv6(address); ada_log("parse_ipv6 ", *host); + host_type = IPV6; return true; } @@ -12569,7 +12557,6 @@ result_type parse_url(std::string_view user_input, // If c is U+002F (/) and remaining starts with U+002F (/), // then set state to special authority ignore slashes state and increase // pointer by 1. - state = ada::state::SPECIAL_AUTHORITY_IGNORE_SLASHES; std::string_view view = helpers::substring(url_data, input_position); if (ada::checkers::begins_with(view, "//")) { input_position += 2; @@ -14021,6 +14008,7 @@ bool url_aggregator::parse_ipv4(std::string_view input) { update_base_hostname( ada::serializers::ipv4(ipv4)); // We have to reserialize the address. } + host_type = IPV4; ADA_ASSERT_TRUE(validate()); return true; } @@ -14256,6 +14244,7 @@ bool url_aggregator::parse_ipv6(std::string_view input) { update_base_hostname(ada::serializers::ipv6(address)); ada_log("parse_ipv6 ", get_hostname()); ADA_ASSERT_TRUE(validate()); + host_type = IPV6; return true; } @@ -14890,6 +14879,11 @@ void ada_free(ada_url result) noexcept { delete r; } +ada_url ada_copy(ada_url input) noexcept { + ada::result& r = get_instance(input); + return new ada::result(r); +} + bool ada_is_valid(ada_url result) noexcept { ada::result& r = get_instance(result); return r.has_value(); @@ -15007,6 +15001,14 @@ ada_string ada_get_protocol(ada_url result) noexcept { return ada_string_create(out.data(), out.length()); } +uint8_t ada_get_host_type(ada_url result) noexcept { + ada::result& r = get_instance(result); + if (!r) { + return 0; + } + return r->host_type; +} + bool ada_set_href(ada_url result, const char* input, size_t length) noexcept { ada::result& r = get_instance(result); if (!r) { @@ -15076,6 +15078,13 @@ bool ada_set_pathname(ada_url result, const char* input, return r->set_pathname(std::string_view(input, length)); } +/** + * Update the search/query of the URL. + * + * If a URL has `?` as the search value, passing empty string to this function + * does not remove the attribute. If you need to remove it, please use + * `ada_clear_search` method. + */ void ada_set_search(ada_url result, const char* input, size_t length) noexcept { ada::result& r = get_instance(result); if (r) { @@ -15083,6 +15092,13 @@ void ada_set_search(ada_url result, const char* input, size_t length) noexcept { } } +/** + * Update the hash/fragment of the URL. + * + * If a URL has `#` as the hash value, passing empty string to this function + * does not remove the attribute. If you need to remove it, please use + * `ada_clear_hash` method. + */ void ada_set_hash(ada_url result, const char* input, size_t length) noexcept { ada::result& r = get_instance(result); if (r) { @@ -15090,6 +15106,39 @@ void ada_set_hash(ada_url result, const char* input, size_t length) noexcept { } } +void ada_clear_port(ada_url result) noexcept { + ada::result& r = get_instance(result); + if (r) { + r->clear_port(); + } +} + +/** + * Removes the hash of the URL. + * + * Despite `ada_set_hash` method, this function allows the complete + * removal of the hash attribute, even if it has a value of `#`. + */ +void ada_clear_hash(ada_url result) noexcept { + ada::result& r = get_instance(result); + if (r) { + r->clear_hash(); + } +} + +/** + * Removes the search of the URL. + * + * Despite `ada_set_search` method, this function allows the complete + * removal of the search attribute, even if it has a value of `?`. + */ +void ada_clear_search(ada_url result) noexcept { + ada::result& r = get_instance(result); + if (r) { + r->clear_search(); + } +} + bool ada_has_credentials(ada_url result) noexcept { ada::result& r = get_instance(result); if (!r) { diff --git a/ada_url/ada.h b/ada_url/ada.h index 3f15319..eeae41e 100644 --- a/ada_url/ada.h +++ b/ada_url/ada.h @@ -1,4 +1,4 @@ -/* auto-generated on 2023-07-23 15:03:22 -0400. Do not edit! */ +/* auto-generated on 2023-08-30 11:44:21 -0400. Do not edit! */ /* begin file include/ada.h */ /** * @file ada.h @@ -8,7 +8,7 @@ #define ADA_H /* begin file include/ada/ada_idna.h */ -/* auto-generated on 2023-05-07 19:12:14 -0400. Do not edit! */ +/* auto-generated on 2023-08-29 15:28:19 -0400. Do not edit! */ /* begin file include/idna.h */ #ifndef ADA_IDNA_H #define ADA_IDNA_H @@ -1008,6 +1008,7 @@ ada_really_inline bool bit_at(const uint8_t a[], const uint8_t i) { #define ADA_CHECKERS_INL_H +#include #include #include @@ -1058,7 +1059,7 @@ ada_really_inline constexpr bool begins_with(std::string_view view, std::string_view prefix) { // in C++20, you have view.begins_with(prefix) return view.size() >= prefix.size() && - (view.substr(0, prefix.size()) == prefix); + std::equal(prefix.begin(), prefix.end(), view.begin()); } } // namespace ada::checkers @@ -1406,6 +1407,25 @@ constexpr ada::scheme::type get_scheme_type(std::string_view scheme) noexcept; namespace ada { +/** + * Type of URL host as an enum. + */ +enum url_host_type : uint8_t { + /** + * Represents common URLs such as "https://www.google.com" + */ + DEFAULT = 0, + /** + * Represents ipv4 addresses such as "http://127.0.0.1" + */ + IPV4 = 1, + /** + * Represents ipv6 addresses such as + * "http://[2001:db8:3333:4444:5555:6666:7777:8888]" + */ + IPV6 = 2, +}; + /** * @brief Base class of URL implementations * @@ -1428,6 +1448,11 @@ struct url_base { */ bool has_opaque_path{false}; + /** + * URL hosts type + */ + url_host_type host_type = url_host_type::DEFAULT; + /** * @private */ @@ -1768,8 +1793,8 @@ inline int fast_digit_count(uint32_t x) noexcept { #define TL_EXPECTED_HPP #define TL_EXPECTED_VERSION_MAJOR 1 -#define TL_EXPECTED_VERSION_MINOR 0 -#define TL_EXPECTED_VERSION_PATCH 1 +#define TL_EXPECTED_VERSION_MINOR 1 +#define TL_EXPECTED_VERSION_PATCH 0 #include #include @@ -1802,6 +1827,16 @@ inline int fast_digit_count(uint32_t x) noexcept { #define TL_EXPECTED_GCC55 #endif +#if !defined(TL_ASSERT) +// can't have assert in constexpr in C++11 and GCC 4.9 has a compiler bug +#if (__cplusplus > 201103L) && !defined(TL_EXPECTED_GCC49) +#include +#define TL_ASSERT(x) assert(x) +#else +#define TL_ASSERT(x) +#endif +#endif + #if (defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ <= 9 && \ !defined(__clang__)) // GCC < 5 doesn't support overloading on const&& for member functions @@ -1957,6 +1992,7 @@ template #ifdef TL_EXPECTED_EXCEPTIONS_ENABLED throw std::forward(e); #else + (void)e; #ifdef _MSC_VER __assume(0); #else @@ -2597,7 +2633,7 @@ struct expected_operations_base : expected_storage_base { geterr().~unexpected(); construct(std::move(rhs).get()); } else { - assign_common(rhs); + assign_common(std::move(rhs)); } } @@ -2960,7 +2996,7 @@ struct default_constructor_tag { }; // expected_default_ctor_base will ensure that expected has a deleted default -// constructor if T is not default constructible. +// consturctor if T is not default constructible. // This specialization is for when T is default constructible template , return map_error_impl(std::move(*this), std::forward(f)); } #endif +#endif +#if defined(TL_EXPECTED_CXX14) && !defined(TL_EXPECTED_GCC49) && \ + !defined(TL_EXPECTED_GCC54) && !defined(TL_EXPECTED_GCC55) + template + TL_EXPECTED_11_CONSTEXPR auto transform_error(F &&f) & { + return map_error_impl(*this, std::forward(f)); + } + template + TL_EXPECTED_11_CONSTEXPR auto transform_error(F &&f) && { + return map_error_impl(std::move(*this), std::forward(f)); + } + template + constexpr auto transform_error(F &&f) const & { + return map_error_impl(*this, std::forward(f)); + } + template + constexpr auto transform_error(F &&f) const && { + return map_error_impl(std::move(*this), std::forward(f)); + } +#else + template + TL_EXPECTED_11_CONSTEXPR decltype(map_error_impl(std::declval(), + std::declval())) + transform_error(F &&f) & { + return map_error_impl(*this, std::forward(f)); + } + template + TL_EXPECTED_11_CONSTEXPR decltype(map_error_impl(std::declval(), + std::declval())) + transform_error(F &&f) && { + return map_error_impl(std::move(*this), std::forward(f)); + } + template + constexpr decltype(map_error_impl(std::declval(), + std::declval())) + transform_error(F &&f) const & { + return map_error_impl(*this, std::forward(f)); + } + +#ifndef TL_EXPECTED_NO_CONSTRR + template + constexpr decltype(map_error_impl(std::declval(), + std::declval())) + transform_error(F &&f) const && { + return map_error_impl(std::move(*this), std::forward(f)); + } +#endif #endif template expected TL_EXPECTED_11_CONSTEXPR or_else(F &&f) & { @@ -3697,27 +3780,37 @@ class expected : private detail::expected_move_assign_base, } } - constexpr const T *operator->() const { return valptr(); } - TL_EXPECTED_11_CONSTEXPR T *operator->() { return valptr(); } + constexpr const T *operator->() const { + TL_ASSERT(has_value()); + return valptr(); + } + TL_EXPECTED_11_CONSTEXPR T *operator->() { + TL_ASSERT(has_value()); + return valptr(); + } template ::value> * = nullptr> constexpr const U &operator*() const & { + TL_ASSERT(has_value()); return val(); } template ::value> * = nullptr> TL_EXPECTED_11_CONSTEXPR U &operator*() & { + TL_ASSERT(has_value()); return val(); } template ::value> * = nullptr> constexpr const U &&operator*() const && { + TL_ASSERT(has_value()); return std::move(val()); } template ::value> * = nullptr> TL_EXPECTED_11_CONSTEXPR U &&operator*() && { + TL_ASSERT(has_value()); return std::move(val()); } @@ -3753,10 +3846,22 @@ class expected : private detail::expected_move_assign_base, return std::move(val()); } - constexpr const E &error() const & { return err().value(); } - TL_EXPECTED_11_CONSTEXPR E &error() & { return err().value(); } - constexpr const E &&error() const && { return std::move(err().value()); } - TL_EXPECTED_11_CONSTEXPR E &&error() && { return std::move(err().value()); } + constexpr const E &error() const & { + TL_ASSERT(!has_value()); + return err().value(); + } + TL_EXPECTED_11_CONSTEXPR E &error() & { + TL_ASSERT(!has_value()); + return err().value(); + } + constexpr const E &&error() const && { + TL_ASSERT(!has_value()); + return std::move(err().value()); + } + TL_EXPECTED_11_CONSTEXPR E &&error() && { + TL_ASSERT(!has_value()); + return std::move(err().value()); + } template constexpr T value_or(U &&v) const & { @@ -4479,9 +4584,10 @@ ada_really_inline constexpr bool is_single_dot_path_segment( ada_really_inline constexpr bool is_lowercase_hex(const char c) noexcept; /** - * @details Convert hex to binary. + * @details Convert hex to binary. Caller is responsible to ensure that + * the parameter is an hexadecimal digit (0-9, A-F, a-f). */ -unsigned constexpr convert_hex_to_binary(char c) noexcept; +ada_really_inline unsigned constexpr convert_hex_to_binary(char c) noexcept; /** * first_percent should be = input.find('%') @@ -4735,6 +4841,10 @@ struct url_aggregator : url_base { /** @return true if the URL has a search component */ [[nodiscard]] inline bool has_search() const noexcept override; + inline void clear_port(); + inline void clear_hash(); + inline void clear_search() override; + private: friend ada::url_aggregator ada::parser::parse_url( std::string_view, const ada::url_aggregator *); @@ -4809,12 +4919,9 @@ struct url_aggregator : url_base { inline void update_base_port(uint32_t input); inline void append_base_pathname(const std::string_view input); inline uint32_t retrieve_base_port() const; - inline void clear_port(); inline void clear_hostname(); - inline void clear_hash(); - inline void clear_pathname() override; - inline void clear_search() override; inline void clear_password(); + inline void clear_pathname() override; inline bool has_dash_dot() const noexcept; void delete_dash_dot(); inline void consume_prepared_path(std::string_view input); @@ -6343,7 +6450,9 @@ inline void url_aggregator::clear_hostname() { " with " + components.to_string() + "\n" + to_diagram()); #endif ADA_ASSERT_TRUE(has_authority()); - ADA_ASSERT_TRUE(has_empty_hostname()); + ADA_ASSERT_EQUAL(has_empty_hostname(), true, + "hostname should have been cleared on buffer=" + buffer + + " with " + components.to_string() + "\n" + to_diagram()); ADA_ASSERT_TRUE(validate()); } @@ -6609,6 +6718,7 @@ struct url_search_params { * @see https://url.spec.whatwg.org/#dom-urlsearchparams-has */ inline bool has(std::string_view key) noexcept; + inline bool has(std::string_view key, std::string_view value) noexcept; /** * @see https://url.spec.whatwg.org/#dom-urlsearchparams-set @@ -6733,6 +6843,15 @@ inline bool url_search_params::has(const std::string_view key) noexcept { return entry != params.end(); } +inline bool url_search_params::has(std::string_view key, + std::string_view value) noexcept { + auto entry = + std::find_if(params.begin(), params.end(), [&key, &value](auto ¶m) { + return param.first == key && param.second == value; + }); + return entry != params.end(); +} + inline std::string url_search_params::to_string() { auto character_set = ada::character_sets::WWW_FORM_URLENCODED_PERCENT_ENCODE; std::string out{}; @@ -6807,14 +6926,14 @@ inline void url_search_params::sort() { #ifndef ADA_ADA_VERSION_H #define ADA_ADA_VERSION_H -#define ADA_VERSION "2.6.0" +#define ADA_VERSION "2.6.5" namespace ada { enum { ADA_VERSION_MAJOR = 2, ADA_VERSION_MINOR = 6, - ADA_VERSION_REVISION = 0, + ADA_VERSION_REVISION = 5, }; } // namespace ada diff --git a/ada_url/ada_adapter.py b/ada_url/ada_adapter.py index 246ce8a..574c392 100644 --- a/ada_url/ada_adapter.py +++ b/ada_url/ada_adapter.py @@ -1,3 +1,5 @@ +from enum import IntEnum + from ada_url._ada_wrapper import ffi, lib URL_ATTRIBUTES = ( @@ -12,11 +14,47 @@ 'search', 'hash', ) -PARSE_ATTRIBUTES = URL_ATTRIBUTES + ('origin',) +PARSE_ATTRIBUTES = URL_ATTRIBUTES + ('origin', 'host_type') +# These are the attributes that have corresponding ada_get_* functions GET_ATTRIBUTES = frozenset(PARSE_ATTRIBUTES) + +# These are the attributes that have corresponding ada_set_* functons SET_ATTRIBUTES = frozenset(URL_ATTRIBUTES) +# These are the attributes that can be cleared with one of the ada_clear_* functions +CLEAR_ATTRIBUTES = frozenset(('port', 'hash', 'search')) + +# These are the attributes that must be cleared by setting the empty string +UNSET_ATTRIBUTES = frozenset(('username', 'password', 'pathname')) + +_marker = object() + + +class HostType(IntEnum): + """ + Enum for URL host types: + + * ``DEFAULT`` hosts like ``https://example.org`` are ``0``. + * ``IPV4`` hosts like ``https://192.0.2.1`` are ``1``. + * ``IPV6`` hosts like ``https://[2001:db8::]`` are ``2``. + + .. code-block:: python + + >>> from ada_url import HostType + >>> HostType.DEFAULT + + >>> HostType.IPV4 + + >>> HostType.IPV6 + + + """ + + DEFAULT = 0 + IPV4 = 1 + IPV6 = 2 + def _get_obj(constructor, destructor, *args): obj = constructor(*args) @@ -58,7 +96,8 @@ class URL: * ``pathname`` * ``search`` - You can additionally read the ``origin`` attribute. + You can additionally read the ``origin`` and ``host_type`` attributes. + ``host_type`` is a :class:`HostType` enum. The class also exposes a static method that checks whether the input *url* (and optional *base*) can be parsed: @@ -96,6 +135,31 @@ def __init__(self, url, base=None): if not lib.ada_is_valid(self.urlobj): raise ValueError('Invalid input') + def __copy__(self): + cls = self.__class__ + ret = cls.__new__(cls) + ret.__dict__.update(self.__dict__) + super(URL, ret).__init__() + return ret + + def __deepcopy__(self, memo): + cls = self.__class__ + ret = cls.__new__(cls) + super(URL, ret).__init__() + ret.urlobj = lib.ada_copy(self.urlobj) + + return ret + + def __delattr__(self, attr): + if attr in CLEAR_ATTRIBUTES: + clear_func = getattr(lib, f'ada_clear_{attr}') + clear_func(self.urlobj) + elif attr in UNSET_ATTRIBUTES: + set_func = getattr(lib, f'ada_set_{attr}') + set_func(self.urlobj, b'', 0) + else: + raise AttributeError(f'cannot remove {attr}') + def __dir__(self): return super().__dir__() + list(PARSE_ATTRIBUTES) @@ -103,13 +167,17 @@ def __getattr__(self, attr): if attr in GET_ATTRIBUTES: get_func = getattr(lib, f'ada_get_{attr}') data = get_func(self.urlobj) - ret = _get_str(data) if attr == 'origin': + ret = _get_str(data) lib.ada_free_owned_string(data) + elif attr == 'host_type': + ret = data + else: + ret = _get_str(data) return ret - return super().__getattr__(self, attr) + raise AttributeError(f'no attribute named {attr}') def __setattr__(self, attr, value): if attr in SET_ATTRIBUTES: @@ -242,11 +310,13 @@ def parse_url(s, attributes=PARSE_ATTRIBUTES): 'pathname': '/api', 'search': '?q=1', 'hash': '#frag' - 'origin': 'https://example.org:8080' + 'origin': 'https://example.org:8080', + 'host_type': 0 } The names of the dictionary keys correspond to the components of the "URL class" in the WHATWG URL spec. + ``host_type`` is a :class:`HostType` enum. Pass in a sequence of *attributes* to limit which keys are returned. @@ -273,9 +343,13 @@ def parse_url(s, attributes=PARSE_ATTRIBUTES): for attr in attributes: get_func = getattr(lib, f'ada_get_{attr}') data = get_func(urlobj) - ret[attr] = _get_str(data) if attr == 'origin': + ret[attr] = _get_str(data) lib.ada_free_owned_string(data) + elif attr == 'host_type': + ret[attr] = HostType(data) + else: + ret[attr] = _get_str(data) return ret @@ -285,18 +359,19 @@ def replace_url(s, **kwargs): Start with the URL represented by *s*, replace the attributes given in the *kwargs* mapping, and return a normalized URL with the result. - Raises ``ValueError`` if the input URL or one of the components is not valid. + Provide an empty string to unset an attribute. .. code-block:: python >>> from ada_url import replace_url >>> base_url = 'https://user_1:password_1@example.org/resource' - >>> replace_url(base_url, username='user_2', protocol='http:') - 'http://user_2:password_1@example.org/resource' + >>> replace_url(base_url, username='user_2', password='', protocol='http:') + 'http://user_2@example.org/resource' Unrecognized attributes are ignored. ``href`` is replaced first if it is given. ``hostname`` is replaced before ``host`` if both are given. + ``ValueError`` is raised if the input URL or one of the components is not valid. """ try: s_bytes = s.encode('utf-8') @@ -307,9 +382,11 @@ def replace_url(s, **kwargs): if not lib.ada_is_valid(urlobj): raise ValueError('Invalid URL') from None + # We process attributes in the order given by the documentation, e.g. + # href before anything else. for attr in URL_ATTRIBUTES: - value = kwargs.get(attr) - if value is None: + value = kwargs.get(attr, _marker) + if value is _marker: continue try: @@ -317,10 +394,14 @@ def replace_url(s, **kwargs): except Exception: raise ValueError(f'Invalid value for {attr}') from None - set_func = getattr(lib, f'ada_set_{attr}') - set_result = set_func(urlobj, value_bytes, len(value_bytes)) - if (set_result is not None) and (not set_result): - raise ValueError(f'Invalid value for {attr}') from None + if (not value_bytes) and (attr in CLEAR_ATTRIBUTES): + clear_func = getattr(lib, f'ada_clear_{attr}') + clear_func(urlobj) + else: + set_func = getattr(lib, f'ada_set_{attr}') + set_result = set_func(urlobj, value_bytes, len(value_bytes)) + if (set_result is not None) and (not set_result): + raise ValueError(f'Invalid value for {attr}') from None return _get_str(lib.ada_get_href(urlobj)) diff --git a/ada_url/ada_c.h b/ada_url/ada_c.h index 6e22584..0d01e57 100644 --- a/ada_url/ada_c.h +++ b/ada_url/ada_c.h @@ -51,6 +51,7 @@ bool ada_can_parse_with_base(const char* input, size_t input_length, void ada_free(ada_url result); void ada_free_owned_string(ada_owned_string owned); +ada_url ada_copy(ada_url input); bool ada_is_valid(ada_url result); @@ -67,6 +68,7 @@ ada_string ada_get_hostname(ada_url result); ada_string ada_get_pathname(ada_url result); ada_string ada_get_search(ada_url result); ada_string ada_get_protocol(ada_url result); +uint8_t ada_get_host_type(ada_url result); // url_aggregator setters // if ada_is_valid(result)) is false, the setters have no effect @@ -82,6 +84,11 @@ bool ada_set_pathname(ada_url result, const char* input, size_t length); void ada_set_search(ada_url result, const char* input, size_t length); void ada_set_hash(ada_url result, const char* input, size_t length); +// url_aggregator clear methods +void ada_clear_port(ada_url result); +void ada_clear_hash(ada_url result); +void ada_clear_search(ada_url result); + // url_aggregator functions // if ada_is_valid(result) is false, functions below will return false bool ada_has_credentials(ada_url result); diff --git a/docs/index.rst b/docs/index.rst index c0948b4..6d86aed 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -49,4 +49,13 @@ API Documentation ================= .. automodule:: ada_url - :members: + +.. autoclass:: URL +.. autoclass:: HostType() +.. autofunction:: check_url +.. autofunction:: join_url +.. autofunction:: normalize_url +.. autofunction:: parse_url +.. autofunction:: replace_url +.. autofunction:: idna + diff --git a/tests/test_ada_url.py b/tests/test_ada_url.py index c52562e..90f8f8b 100644 --- a/tests/test_ada_url.py +++ b/tests/test_ada_url.py @@ -1,6 +1,8 @@ +from copy import copy, deepcopy from unittest import TestCase from ada_url import ( + HostType, URL, check_url, idna, @@ -35,6 +37,31 @@ def test_class_get(self): with self.assertRaises(AttributeError): urlobj.bogus + def test_class_host_type(self): + # host_type should return an IntEnum, which can be compared to a Python int + for url, expected in ( + ('http://localhost:3000', HostType.DEFAULT), + ('http://0.0.0.0', HostType.IPV4), + ('http://[2001:db8:3333:4444:5555:6666:7777:8888]', HostType.IPV6), + ): + with self.subTest(url=url): + urlobj = URL(url) + self.assertEqual(urlobj.host_type, int(expected)) + self.assertEqual(urlobj.host_type, expected) + + def test_copy_vs_deepcopy(self): + obj = URL('http://example.org:8080') + copied_obj = copy(obj) + deepcopied_obj = deepcopy(obj) + + obj.port = '8081' + self.assertEqual(copied_obj.port, '8081') + self.assertEqual(deepcopied_obj.port, '8080') + + deepcopied_obj.port = '8082' + self.assertEqual(copied_obj.port, '8081') + self.assertEqual(deepcopied_obj.port, '8082') + def test_class_set(self): url = 'https://username:password@www.google.com:8080/' urlobj = URL(url) @@ -56,6 +83,42 @@ def test_class_set(self): expected = 'wss://changed-host:9090/new-pathname?new-search#new-hash' self.assertEqual(actual, expected) + def test_class_delete(self): + url = 'https://user_1:password_1@example.org:8080/dir/../api?q=1#frag' + urlobj = URL(url) + + del urlobj.port + self.assertEqual( + urlobj.href, 'https://user_1:password_1@example.org/api?q=1#frag' + ) + + del urlobj.hash + self.assertEqual(urlobj.href, 'https://user_1:password_1@example.org/api?q=1') + + del urlobj.pathname + self.assertEqual(urlobj.href, 'https://user_1:password_1@example.org/?q=1') + + del urlobj.search + self.assertEqual(urlobj.href, 'https://user_1:password_1@example.org/') + + with self.assertRaises(AttributeError): + del urlobj.href + + def test_unset(self): + url = 'https://user_1:password_1@example.org:8080/dir/../api?q=1#frag' + for attr, expected in ( + ('username', 'https://:password_1@example.org:8080/api?q=1#frag'), + ('password', 'https://user_1@example.org:8080/api?q=1#frag'), + ('port', 'https://user_1:password_1@example.org/api?q=1#frag'), + ('pathname', 'https://user_1:password_1@example.org:8080/?q=1#frag'), + ('search', 'https://user_1:password_1@example.org:8080/api#frag'), + ('hash', 'https://user_1:password_1@example.org:8080/api?q=1'), + ): + with self.subTest(attr=attr): + urlobj = URL(url) + urlobj.__delattr__(attr) + self.assertEqual(urlobj.href, expected) + def test_class_with_base(self): url = '../example.txt' base = 'https://example.org/path/' @@ -228,6 +291,7 @@ def test_parse_url(self): 'search': '?q=1', 'hash': '#frag', 'origin': 'https://example.org:8080', + 'host_type': 0, } self.assertEqual(actual, expected) @@ -262,7 +326,13 @@ def test_replace_url(self): actual = replace_url(s, **kwargs) self.assertEqual(actual, expected) - def test_replace_blank(self): + def test_replace_url_clear(self): + s = 'https://user_1:password_1@example.org:8443/api?q=1#frag' + actual = replace_url(s, port='', hash='', search='') + expected = 'https://user_1:password_1@example.org/api' + self.assertEqual(actual, expected) + + def test_replace_url_unset(self): s = 'https://user:pass@example.org' actual = replace_url(s, username='', password='') expected = 'https://example.org/'