Skip to content

Commit

Permalink
Add explicit noexcept to nogil functions
Browse files Browse the repository at this point in the history
  • Loading branch information
phoerious committed Apr 4, 2024
1 parent 5cb82b8 commit 1ca876c
Show file tree
Hide file tree
Showing 8 changed files with 71 additions and 71 deletions.
16 changes: 8 additions & 8 deletions fastwarc/fastwarc/stream_io.pxd
Expand Up @@ -79,9 +79,9 @@ cdef class GZipStream(CompressingStream):
cdef int compression_level

cpdef void prepopulate(self, bint deflate, const string& initial_data)
cdef void _init_z_stream(self, bint zlib) nogil
cdef void _free_z_stream(self) nogil
cdef void _reinit_z_stream(self, bint deflate) nogil
cdef void _init_z_stream(self, bint zlib) noexcept nogil
cdef void _free_z_stream(self) noexcept nogil
cdef void _reinit_z_stream(self, bint deflate) noexcept nogil
cdef bint _refill_working_buf(self, size_t size) except -1


Expand All @@ -97,7 +97,7 @@ cdef class LZ4Stream(CompressingStream):
cdef size_t stream_pos

cpdef void prepopulate(self, const string& initial_data)
cdef void _free_ctx(self) nogil
cdef void _free_ctx(self) noexcept nogil


cdef class BrotliStream(CompressingStream):
Expand All @@ -123,8 +123,8 @@ cdef class BufferedReader:
cdef bint stream_started
cdef bint stream_is_compressed

cdef inline void set_limit(self, size_t offset) nogil
cdef inline void reset_limit(self) nogil
cdef inline void set_limit(self, size_t offset) noexcept nogil
cdef inline void reset_limit(self) noexcept nogil
cdef bint detect_stream_type(self) except 0

cpdef string read(self, size_t size=*) except *
Expand All @@ -134,5 +134,5 @@ cdef class BufferedReader:
cpdef void close(self) except *

cdef bint _fill_buf(self) except -1
cdef inline string_view* _get_buf(self) nogil
cdef inline size_t _consume_buf(self, size_t size) nogil
cdef inline string_view* _get_buf(self) noexcept nogil
cdef inline size_t _consume_buf(self, size_t size) noexcept nogil
16 changes: 8 additions & 8 deletions fastwarc/fastwarc/stream_io.pyx
Expand Up @@ -388,7 +388,7 @@ cdef class GZipStream(CompressingStream):
cpdef size_t tell(self) except -1:
return self.stream_pos

cdef void _init_z_stream(self, bint deflate) nogil:
cdef void _init_z_stream(self, bint deflate) noexcept nogil:
"""
Reset internal state and initialize ``z_stream``.
Expand Down Expand Up @@ -436,7 +436,7 @@ cdef class GZipStream(CompressingStream):
self.zst.avail_in = self.working_buf_filled
self.stream_pos = max(0u, self.raw_stream.tell() - self.working_buf_filled)

cdef void _free_z_stream(self) nogil:
cdef void _free_z_stream(self) noexcept nogil:
"""Release internal state and reset working buffer."""
if self.stream_state == CompressingStreamState.UNINIT:
return
Expand All @@ -448,7 +448,7 @@ cdef class GZipStream(CompressingStream):
self.working_buf_filled = 0u
self.stream_state = CompressingStreamState.UNINIT

cdef void _reinit_z_stream(self, bint deflate) nogil:
cdef void _reinit_z_stream(self, bint deflate) noexcept nogil:
"""Re-initialize zstream, but retain working buffer. Use to restart stream with different window parameters."""

cdef string working_buf_tmp = self.working_buf.substr(0, self.working_buf_filled)
Expand Down Expand Up @@ -752,7 +752,7 @@ cdef class LZ4Stream(CompressingStream):

self._free_ctx()

cdef void _free_ctx(self) nogil:
cdef void _free_ctx(self) noexcept nogil:
if self.cctx != NULL:
LZ4F_freeCompressionContext(self.cctx)
self.cctx = NULL
Expand Down Expand Up @@ -938,7 +938,7 @@ cdef class BufferedReader:

return True

cdef string_view* _get_buf(self) nogil:
cdef string_view* _get_buf(self) noexcept nogil:
"""
Get buffer contents. Does take a set limit into account.
Expand All @@ -955,7 +955,7 @@ cdef class BufferedReader:
self.limited_buf_view.remove_suffix(self.limited_buf_view.size() - remaining)
return &self.limited_buf_view

cdef size_t _consume_buf(self, size_t size) nogil:
cdef size_t _consume_buf(self, size_t size) noexcept nogil:
"""
Consume up to ``size`` bytes from internal buffer. Takes a set limit into account.
Expand All @@ -978,7 +978,7 @@ cdef class BufferedReader:
self.buf_view.remove_prefix(size)
return size

cdef inline void set_limit(self, size_t offset) nogil:
cdef inline void set_limit(self, size_t offset) noexcept nogil:
"""
Set a stream limit in bytes. Any read beyond this limit will act as if the stream reached EOF.
A set limit can be reset by calling :meth:`reset_limit()`.
Expand All @@ -988,7 +988,7 @@ cdef class BufferedReader:
self.limit = offset
self.limit_consumed = 0

cdef inline void reset_limit(self) nogil:
cdef inline void reset_limit(self) noexcept nogil:
"""Reset any previously set stream limit."""
self.limit = strnpos

Expand Down
4 changes: 2 additions & 2 deletions fastwarc/fastwarc/warc.pyx
Expand Up @@ -41,7 +41,7 @@ from fastwarc.stream_io cimport BufferedReader, BytesIOStream, CompressingStream
from fastwarc.stream_io import ReaderStaleError


cdef const char* _enum_record_type_to_str(WarcRecordType record_type) nogil:
cdef const char* _enum_record_type_to_str(WarcRecordType record_type) noexcept nogil:
if record_type == warcinfo:
return b'warcinfo'
elif record_type == response:
Expand All @@ -62,7 +62,7 @@ cdef const char* _enum_record_type_to_str(WarcRecordType record_type) nogil:
return b'unknown'


cdef WarcRecordType _str_record_type_to_enum(const string& record_type) nogil:
cdef WarcRecordType _str_record_type_to_enum(const string& record_type) noexcept nogil:
cdef string record_type_lower = str_to_lower(record_type)
if record_type_lower == b'warcinfo':
return warcinfo
Expand Down
24 changes: 12 additions & 12 deletions resiliparse/resiliparse/extract/html2text.pyx
Expand Up @@ -86,7 +86,7 @@ cdef extern from * nogil:
shared_ptr[string] text_contents


cdef string _get_collapsed_string(const string& input_str) nogil:
cdef string _get_collapsed_string(const string& input_str) noexcept nogil:
"""
Collapse newlines and consecutive white space in a string to single spaces.
Takes into account previously extracted text from ``ctx.text``.
Expand All @@ -109,17 +109,17 @@ cdef string _get_collapsed_string(const string& input_str) nogil:
cdef string LIST_BULLET = <const char*>b'\xe2\x80\xa2'


cdef inline void _ensure_text_contents(vector[shared_ptr[ExtractNode]]& extract_nodes) nogil:
cdef inline void _ensure_text_contents(vector[shared_ptr[ExtractNode]]& extract_nodes) noexcept nogil:
if not deref(extract_nodes.back()).text_contents:
deref(extract_nodes.back()).text_contents = make_shared[string]()


cdef inline void _ensure_space(string& in_str, char space_char) nogil:
cdef inline void _ensure_space(string& in_str, char space_char) noexcept nogil:
if in_str.empty() or not isspace(in_str.back()):
in_str.push_back(space_char)


cdef void _extract_cb(vector[shared_ptr[ExtractNode]]& extract_nodes, ExtractContext& ctx, bint is_end_tag) nogil:
cdef void _extract_cb(vector[shared_ptr[ExtractNode]]& extract_nodes, ExtractContext& ctx, bint is_end_tag) noexcept nogil:
cdef shared_ptr[ExtractNode] last_node_shared
cdef ExtractNode* last_node = NULL
cdef bint is_block = ctx.node.type == LXB_DOM_NODE_TYPE_ELEMENT and is_block_element(ctx.node.local_name)
Expand Down Expand Up @@ -203,7 +203,7 @@ cdef void _extract_cb(vector[shared_ptr[ExtractNode]]& extract_nodes, ExtractCon
deref(last_node.text_contents).append(element_text)


cdef inline string _indent_newlines(const string& element_text, size_t depth) nogil:
cdef inline string _indent_newlines(const string& element_text, size_t depth) noexcept nogil:
cdef string indent = string(2 * depth, <char>b' ')
cdef string tmp_text
tmp_text.reserve(element_text.size() + 10 * indent.size())
Expand All @@ -215,7 +215,7 @@ cdef inline string _indent_newlines(const string& element_text, size_t depth) no
return tmp_text


cdef string _serialize_extract_nodes(vector[shared_ptr[ExtractNode]]& extract_nodes, const ExtractOpts& opts) nogil:
cdef string _serialize_extract_nodes(vector[shared_ptr[ExtractNode]]& extract_nodes, const ExtractOpts& opts) noexcept nogil:
cdef size_t i
cdef string output
cdef string element_text
Expand Down Expand Up @@ -286,7 +286,7 @@ cdef string _serialize_extract_nodes(vector[shared_ptr[ExtractNode]]& extract_no
return output


cdef inline bint _is_unprintable_pua(lxb_dom_node_t* node) nogil:
cdef inline bint _is_unprintable_pua(lxb_dom_node_t* node) noexcept nogil:
"""Whether text node contains only a single unprintable code point from the private use area."""
if node.first_child and (node.first_child.next or node.first_child.type != LXB_DOM_NODE_TYPE_TEXT):
# Node has more than one child
Expand Down Expand Up @@ -337,13 +337,13 @@ cdef RE2 logo_cls_regex = RE2(rb'(?:brand(?:ing)?[_-]*)?logo(?:$|\s)', re_opts)
cdef RE2 print_cls_regex = RE2(rb'(?:^|\s)print[_-]', re_opts)


cdef inline bint regex_search_not_empty(const string_view s, const RE2& r) nogil:
cdef inline bint regex_search_not_empty(const string_view s, const RE2& r) noexcept nogil:
if s.empty():
return False
return PartialMatch(s, r())


cdef inline bint _is_link_cluster(lxb_dom_node_t* node, double max_link_ratio, size_t max_length) nogil:
cdef inline bint _is_link_cluster(lxb_dom_node_t* node, double max_link_ratio, size_t max_length) noexcept nogil:
"""
Check if element contains an excessive number of links compared to the whole content length.
Expand Down Expand Up @@ -375,7 +375,7 @@ cdef stl_set[string] blacklist_aria_roles = [b'alert', b'banner', b'checkbox', b


# noinspection DuplicatedCode
cdef inline bint _is_main_content_node(lxb_dom_node_t* node, size_t body_depth, bint allow_comments) nogil:
cdef inline bint _is_main_content_node(lxb_dom_node_t* node, size_t body_depth, bint allow_comments) noexcept nogil:
"""
Perform a rule-based check whether the given element is a "main-content" element.
Expand Down Expand Up @@ -569,7 +569,7 @@ cdef inline bint _is_main_content_node(lxb_dom_node_t* node, size_t body_depth,
return True


cdef inline lxb_status_t _exists_cb(lxb_dom_node_t *node, lxb_css_selector_specificity_t *spec, void *ctx) nogil:
cdef inline lxb_status_t _exists_cb(lxb_dom_node_t *node, lxb_css_selector_specificity_t *spec, void *ctx) noexcept nogil:
(<bint*>ctx)[0] = True
return LXB_STATUS_STOP

Expand Down Expand Up @@ -669,7 +669,7 @@ cdef string _extract_plain_text_impl(HTMLTree tree,
bint form_fields,
bint noscript,
bint comments,
string skip_selector) nogil:
string skip_selector) noexcept nogil:
"""Internal extractor implementation not requiring GIL."""

cdef ExtractContext ctx
Expand Down
32 changes: 16 additions & 16 deletions resiliparse/resiliparse/parse/html.pxd
Expand Up @@ -17,53 +17,53 @@ from resiliparse_inc.string_view cimport string_view
from resiliparse_inc.lexbor cimport *


cdef inline bint check_node(DOMNode node) nogil:
cdef inline bint check_node(DOMNode node) noexcept nogil:
"""Check whether node is initialized and valid."""
return node is not None and node.tree is not None and node.node != NULL

cdef void create_css_parser(lxb_css_memory_t** memory, lxb_css_parser_t** parser) nogil
cdef void destroy_css_parser(lxb_css_memory_t* memory, lxb_css_parser_t* parser) nogil
cdef void create_css_selectors(lxb_css_parser_t* parser) nogil
cdef void destroy_css_selectors(lxb_css_parser_t* parser) nogil
cdef void create_css_parser(lxb_css_memory_t** memory, lxb_css_parser_t** parser) noexcept nogil
cdef void destroy_css_parser(lxb_css_memory_t* memory, lxb_css_parser_t* parser) noexcept nogil
cdef void create_css_selectors(lxb_css_parser_t* parser) noexcept nogil
cdef void destroy_css_selectors(lxb_css_parser_t* parser) noexcept nogil
cdef lxb_css_selector_list_t* parse_css_selectors(lxb_css_parser_t* css_parser, const lxb_char_t* selector,
size_t selector_len) except NULL nogil

cdef lxb_dom_node_t* next_node(const lxb_dom_node_t* root_node, lxb_dom_node_t* node,
size_t* depth=*, bint* end_tag=*) nogil
size_t* depth=*, bint* end_tag=*) noexcept nogil

cdef inline lxb_dom_node_t* next_element_node(const lxb_dom_node_t* root_node, lxb_dom_node_t* node,
size_t* depth=NULL, bint* end_tag=NULL) nogil:
size_t* depth=NULL, bint* end_tag=NULL) noexcept nogil:
node = next_node(root_node, node, depth, end_tag)
while node and node.type != LXB_DOM_NODE_TYPE_ELEMENT:
node = next_node(root_node, node, depth, end_tag)
return node

cdef inline string_view get_node_attr_sv(lxb_dom_node_t* node, const string& attr) nogil:
cdef inline string_view get_node_attr_sv(lxb_dom_node_t* node, const string& attr) noexcept nogil:
"""Get node attribute value as string_view."""
cdef size_t node_attr_len
cdef const lxb_char_t* node_attr_data = lxb_dom_element_get_attribute(
<lxb_dom_element_t*>node, <lxb_char_t*>attr.data(), attr.size(), &node_attr_len)
return string_view(<const char*>node_attr_data, node_attr_len)

cdef string get_node_text(lxb_dom_node_t* node) nogil
cdef string get_node_text(lxb_dom_node_t* node) noexcept nogil

cdef lxb_dom_node_t* get_element_by_id_impl(lxb_dom_node_t* node,
const char* id_value, size_t id_value_len,
bint case_insensitive=*) nogil
bint case_insensitive=*) noexcept nogil
cdef lxb_dom_collection_t* get_elements_by_attr_impl(lxb_dom_node_t* node,
const char* attr_name, size_t attr_name_len,
const char* attr_value, size_t attr_value_len,
size_t init_size=*, bint case_insensitive=*) nogil
size_t init_size=*, bint case_insensitive=*) noexcept nogil
cdef lxb_dom_collection_t* get_elements_by_class_name_impl(lxb_dom_node_t* node, const char* class_name,
size_t class_name_len, size_t init_size=*) nogil
size_t class_name_len, size_t init_size=*) noexcept nogil
cdef lxb_dom_collection_t* get_elements_by_tag_name_impl(lxb_dom_node_t* node,
const char* tag_name, size_t tag_name_len) nogil
const char* tag_name, size_t tag_name_len) noexcept nogil
cdef lxb_dom_node_t* query_selector_impl(lxb_dom_node_t* node, HTMLTree tree,
const char* selector, size_t select_len) except <lxb_dom_node_t*>-1 nogil
cdef lxb_dom_collection_t* query_selector_all_impl(lxb_dom_node_t* node, HTMLTree tree,
const char* selector, size_t selector_len,
size_t init_size=*) except <lxb_dom_collection_t*>-1 nogil
cdef bint matches_impl(lxb_dom_node_t* node, HTMLTree tree, const char* selector, size_t selector_len) nogil
cdef bint matches_impl(lxb_dom_node_t* node, HTMLTree tree, const char* selector, size_t selector_len) noexcept nogil

cdef extern from "html.h" nogil:
cdef lxb_tag_id_t BLOCK_ELEMENTS[]
Expand Down Expand Up @@ -155,6 +155,6 @@ cdef class HTMLTree:
cpdef DOMNode create_element(self, str tag_name)
cpdef DOMNode create_text_node(self, str text)

cdef void init_css_parser(self) nogil
cdef void init_css_parser(self) noexcept nogil

cdef bint is_block_element(lxb_tag_id_t tag_id) nogil
cdef bint is_block_element(lxb_tag_id_t tag_id) noexcept nogil

0 comments on commit 1ca876c

Please sign in to comment.