Skip to content

Commit

Permalink
feat: add example stubs (3) (#12801)
Browse files Browse the repository at this point in the history
* feat: add example stubs

* fix: add required annotations

* fix: mypy issues

* fix: use Py36-compatible Portocol

* Minor reformatting

* adding further type specifications and removing internal methods

* black formatting

* widen type to iterable

* add private methods that are being used by the built-in convertors

* revert changes to corpus.py

* fixes

* fixes

* fix typing of PlainTextCorpus

---------

Co-authored-by: Basile Dura <basile@bdura.me>
Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
  • Loading branch information
3 people committed Aug 2, 2023
1 parent 0fe43f4 commit 0737443
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 2 deletions.
8 changes: 7 additions & 1 deletion spacy/tokens/doc.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ from typing import (
List,
Optional,
Protocol,
Sequence,
Tuple,
Union,
overload,
Expand Down Expand Up @@ -134,7 +135,12 @@ class Doc:
def text(self) -> str: ...
@property
def text_with_ws(self) -> str: ...
ents: Tuple[Span]
# Ideally the getter would output Tuple[Span]
# see https://github.com/python/mypy/issues/3004
@property
def ents(self) -> Sequence[Span]: ...
@ents.setter
def ents(self, value: Sequence[Span]) -> None: ...
def set_ents(
self,
entities: List[Span],
Expand Down
2 changes: 1 addition & 1 deletion spacy/training/corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def create_plain_text_reader(
path: Optional[Path],
min_length: int = 0,
max_length: int = 0,
) -> Callable[["Language"], Iterable[Doc]]:
) -> Callable[["Language"], Iterable[Example]]:
"""Iterate Example objects from a file or directory of plain text
UTF-8 files with one line per doc.
Expand Down
66 changes: 66 additions & 0 deletions spacy/training/example.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple

from ..tokens import Doc, Span
from ..vocab import Vocab
from .alignment import Alignment

def annotations_to_doc(
vocab: Vocab,
tok_annot: Dict[str, Any],
doc_annot: Dict[str, Any],
) -> Doc: ...
def validate_examples(
examples: Iterable[Example],
method: str,
) -> None: ...
def validate_get_examples(
get_examples: Callable[[], Iterable[Example]],
method: str,
): ...

class Example:
x: Doc
y: Doc

def __init__(
self,
predicted: Doc,
reference: Doc,
*,
alignment: Optional[Alignment] = None,
): ...
def __len__(self) -> int: ...
@property
def predicted(self) -> Doc: ...
@predicted.setter
def predicted(self, doc: Doc) -> None: ...
@property
def reference(self) -> Doc: ...
@reference.setter
def reference(self, doc: Doc) -> None: ...
def copy(self) -> Example: ...
@classmethod
def from_dict(cls, predicted: Doc, example_dict: Dict[str, Any]) -> Example: ...
@property
def alignment(self) -> Alignment: ...
def get_aligned(self, field: str, as_string=False): ...
def get_aligned_parse(self, projectivize=True): ...
def get_aligned_sent_starts(self): ...
def get_aligned_spans_x2y(
self, x_spans: Iterable[Span], allow_overlap=False
) -> List[Span]: ...
def get_aligned_spans_y2x(
self, y_spans: Iterable[Span], allow_overlap=False
) -> List[Span]: ...
def get_aligned_ents_and_ner(self) -> Tuple[List[Span], List[str]]: ...
def get_aligned_ner(self) -> List[str]: ...
def get_matching_ents(self, check_label: bool = True) -> List[Span]: ...
def to_dict(self) -> Dict[str, Any]: ...
def split_sents(self) -> List[Example]: ...
@property
def text(self) -> str: ...
def __str__(self) -> str: ...
def __repr__(self) -> str: ...

def _parse_example_dict_data(example_dict): ...
def _fix_legacy_dict_data(example_dict): ...

0 comments on commit 0737443

Please sign in to comment.