explosion · rmitsch · Aug 25, 2023 · Jun 15, 2023 · Jun 15, 2023 · Jun 20, 2023
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -19,7 +19,7 @@ env:
 jobs:
   run:
     strategy:
-      fail-fast: true
+      fail-fast: false
       matrix:
         os: [ubuntu-latest, windows-latest, macos-latest]
         python_version: ["3.11"]
@@ -39,7 +39,7 @@ jobs:
 
     steps:
       - uses: actions/checkout@v3
-        
+
       - name: Set up Python
         uses: actions/setup-python@v4
         with:

diff --git a/README.md b/README.md
@@ -12,17 +12,17 @@ This package integrates Large Language Models (LLMs) into [spaCy](https://spacy.
 
 - Serializable `llm` **component** to integrate prompts into your spaCy pipeline
 - **Modular functions** to define the [**task**](https://spacy.io/api/large-language-models#tasks) (prompting and parsing) and [**model**](https://spacy.io/api/large-language-models#models)
-- Interfaces with the APIs of 
+- Interfaces with the APIs of
   - **[OpenAI](https://platform.openai.com/docs/api-reference/)**
   - **[Cohere](https://docs.cohere.com/reference/generate)**
   - **[Anthropic](https://docs.anthropic.com/claude/reference/)**
 - Supports open-source LLMs hosted on Hugging Face 🤗:
   - **[Falcon](https://huggingface.co/tiiuae)**
   - **[Dolly](https://huggingface.co/databricks)**
-  - **[Llama 2](https://huggingface.co/meta-llama)**  
+  - **[Llama 2](https://huggingface.co/meta-llama)**
   - **[OpenLLaMA](https://huggingface.co/openlm-research)**
   - **[StableLM](https://huggingface.co/stabilityai)**
-- Integration with [LangChain](https://github.com/hwchase17/langchain) 🦜️🔗 - all `langchain` models and features can be used in `spacy-llm`  
+- Integration with [LangChain](https://github.com/hwchase17/langchain) 🦜️🔗 - all `langchain` models and features can be used in `spacy-llm`
 - Tasks available out of the box:
   - Named Entity Recognition
   - Text classification
@@ -58,13 +58,18 @@ python -m pip install spacy-llm
 ## 🐍 Quickstart
 
 The task and the model have to be supplied to the `llm` pipeline component using [spaCy's config
-system](https://spacy.io/api/data-formats#config). 
+system](https://spacy.io/api/data-formats#config).
 
-Let's run some text classification using a GPT-4 model from OpenAI. If you're using hosted APIs (as opposed to local 
+Let's run some text classification using a GPT-4 model from OpenAI. If you're using hosted APIs (as opposed to local
 models like Falcon, Dolly or LLaMA), ensure to that your API keys are set as environmental variables.
 
-Create a config file `config.cfg` containing at least the following
-(or see the full example [here](usage_examples/textcat_openai)):
+Create a new API key from openai.com or fetch an existing one, and ensure the
+keys are set as environmental variables. For more background information, see
+the [OpenAI](/api/large-language-models#gpt-3-5) section.
+
+Create a config file `config.cfg` containing at least the following (or see the
+full example
+[here](https://github.com/explosion/spacy-llm/tree/main/usage_examples/textcat_openai)):
 
 ```ini
 [nlp]
@@ -93,11 +98,13 @@ from spacy_llm.util import assemble
 nlp = assemble("config.cfg")
 doc = nlp("You look gorgeous!")
 print(doc.cats)
+# {"COMPLIMENT": 1.0, "INSULT": 0.0}
 ```
 
-That's it! There's a lot of other features - prompt templating, more tasks, logging etc. For more information on how to 
+That's it! There's a lot of other features - prompt templating, more tasks, logging etc. For more information on how to
 use those, check out https://spacy.io/api/large-language-models.
 
+
 ## 🚀 Ongoing work
 
 In the near future, we will
@@ -110,7 +117,7 @@ PRs are always welcome!
 
 ## 📝️ Reporting issues
 
-If you have questions regarding the usage of `spacy-llm`, or want to give us feedback after giving it a spin, please use 
+If you have questions regarding the usage of `spacy-llm`, or want to give us feedback after giving it a spin, please use
 the [discussion board](https://github.com/explosion/spacy-llm/discussions).
 Bug reports can be filed on the [spaCy issue tracker](https://github.com/explosion/spacy-llm/issues). Thank you!
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -20,7 +20,11 @@ filterwarnings = [
     "ignore:^.*You are trying to use a chat model. This way of initializing it is no longer supported. Instead, please use.*:UserWarning",
     "ignore:^.*Xformers is not installed correctly.*:",
     "ignore:^.*The 'warn' method is deprecated, use 'warning' instead.*:DeprecationWarning",
-    "ignore:^.*Support for class-based `config` is deprecated.*:"
+    "ignore:^.*Support for class-based `config` is deprecated.*:",
+    "ignore:^.*The `dict` method is deprecated; use `model_dump` instead.*",
+    "ignore:^.*The `parse_obj` method is deprecated; use `model_validate` instead.*",
+    "ignore:^.*`__get_validators__` is deprecated.*",
+    "ignore:^.*The `construct` method is deprecated.*"
 ]
 markers = [
     "external: interacts with a (potentially cost-incurring) third-party API",

diff --git a/spacy_llm/compat.py b/spacy_llm/compat.py
@@ -4,7 +4,8 @@
 if sys.version_info[:2] >= (3, 8):  # Python 3.8+
     from typing import Literal, Protocol, runtime_checkable
 else:
-    from typing_extensions import Literal, Protocol, runtime_checkable  # noqa: F401
+    from typing_extensions import runtime_checkable  # noqa: F401
+    from typing_extensions import Literal, Protocol  # noqa: F401
 
 if sys.version_info[:2] >= (3, 9):  # Python 3.9+
     from typing import TypedDict  # noqa: F401

diff --git a/spacy_llm/registry/reader.py b/spacy_llm/registry/reader.py
@@ -39,7 +39,8 @@ def _fewshot_reader(eg_path: Path) -> Iterable[Dict[str, Any]]:
     else:
         if not eg_path.exists():
             raise ValueError(
-                "Specified file path doesn't exist. Please ensure to provide a valid file path."
+                f"Specified file path: {str(eg_path)} doesn't exist. "
+                "Please ensure to provide a valid file path."
             )
 
         suffix = eg_path.suffix.replace("yaml", "yml")

diff --git a/spacy_llm/tasks/__init__.py b/spacy_llm/tasks/__init__.py
@@ -1,22 +1,20 @@
 from .builtin_task import BuiltinTask
 from .lemma import LemmaTask, make_lemma_task
-from .ner import NERTask, make_ner_task, make_ner_task_v2
+from .ner import NERTask, make_ner_task_v3
 from .noop import NoopTask, make_noop_task
 from .rel import RELTask, make_rel_task
 from .sentiment import SentimentTask, make_sentiment_task
-from .spancat import SpanCatTask, make_spancat_task, make_spancat_task_v2
+from .spancat import SpanCatTask, make_spancat_task_v3
 from .summarization import SummarizationTask, make_summarization_task
 from .textcat import TextCatTask, make_textcat_task
 
 __all__ = [
     "make_lemma_task",
-    "make_ner_task",
-    "make_ner_task_v2",
+    "make_ner_task_v3",
     "make_noop_task",
     "make_rel_task",
     "make_sentiment_task",
-    "make_spancat_task",
-    "make_spancat_task_v2",
+    "make_spancat_task_v3",
     "make_summarization_task",
     "make_textcat_task",
     "BuiltinTask",

diff --git a/spacy_llm/tasks/ner/__init__.py b/spacy_llm/tasks/ner/__init__.py
@@ -1,5 +1,11 @@
-from .registry import make_ner_task, make_ner_task_v2
+from .registry import make_ner_task, make_ner_task_v2, make_ner_task_v3
 from .task import NERTask
 from .util import NERExample
 
-__all__ = ["make_ner_task", "make_ner_task_v2", "NERExample", "NERTask"]
+__all__ = [
+    "make_ner_task",
+    "make_ner_task_v2",
+    "make_ner_task_v3",
+    "NERExample",
+    "NERTask",
+]
diff --git a/spacy_llm/tasks/ner/registry.py b/spacy_llm/tasks/ner/registry.py
@@ -5,8 +5,11 @@
 from ...ty import ExamplesConfigType, FewshotExample, Scorer, TaskResponseParser
 from ...util import split_labels
 from ..span import parse_responses as parse_span_responses
-from .task import DEFAULT_NER_TEMPLATE_V1, DEFAULT_NER_TEMPLATE_V2, NERTask, SpanTask
-from .util import NERExample, score
+from ..span import parse_responses_cot as parse_span_responses_cot
+from ..span.util import check_label_consistency, check_label_consistency_cot
+from .task import DEFAULT_NER_TEMPLATE_V1, DEFAULT_NER_TEMPLATE_V2
+from .task import DEFAULT_NER_TEMPLATE_V3, NERTask, SpanTask
+from .util import NERCoTExample, NERExample, score
 
 
 @registry.llm_tasks("spacy.NER.v1")
@@ -54,6 +57,8 @@ def make_ner_task(
         single_match=single_match,
         label_definitions=None,
         scorer=scorer or score,
+        description=None,
+        check_label_consistency=check_label_consistency,
     )
 
 
@@ -111,4 +116,66 @@ def make_ner_task_v2(
         case_sensitive_matching=case_sensitive_matching,
         single_match=single_match,
         scorer=scorer or score,
+        description=None,
+        check_label_consistency=check_label_consistency,
+    )
+
+
+@registry.llm_tasks("spacy.NER.v3")
+def make_ner_task_v3(
+    parse_responses: Optional[TaskResponseParser[SpanTask]] = None,
+    prompt_example_type: Optional[Type[FewshotExample]] = None,
+    labels: Union[List[str], str] = [],
+    template: str = DEFAULT_NER_TEMPLATE_V3,
+    label_definitions: Optional[Dict[str, str]] = None,
+    examples: ExamplesConfigType = None,
+    normalizer: Optional[Callable[[str], str]] = None,
+    alignment_mode: Literal["strict", "contract", "expand"] = "contract",
+    case_sensitive_matching: bool = False,
+    scorer: Optional[Scorer] = None,
+    description: Optional[str] = None,
+):
+    """NER.v3 task factory, with chain-of-thought prompting.
+
+    parse_responses (Optional[TaskResponseParser[SpanTask]]): Callable for parsing LLM responses for this task.
+    prompt_example_type (Optional[Type[FewshotExample]]): Type to use for fewshot examples.
+    labels (Union[str, List[str]]): List of labels to pass to the template,
+        either an actual list or a comma-separated string.
+        Leave empty to populate it at initialization time (only if examples are provided).
+    template (str): Prompt template passed to the model.
+    description (str): todo
+    label_definitions (Optional[Dict[str, str]]): Map of label -> description
+        of the label to help the language model output the entities wanted.
+        It is usually easier to provide these definitions rather than
+        full examples, although both can be provided.
+    examples (Optional[Callable[[], Iterable[Any]]]): Optional callable that reads a file containing task examples for
+        few-shot learning. If None is passed, then zero-shot learning will be used.
+    normalizer (Optional[Callable[[str], str]]): optional normalizer function.
+    alignment_mode (str): "strict", "contract" or "expand".
+    case_sensitive_matching (bool): Whether to search without case sensitivity.
+    single_match (bool): If False, allow one substring to match multiple times in
+        the text. If True, returns the first hit.
+    scorer (Optional[Scorer]): Scorer function.
+    """
+    labels_list = split_labels(labels)
+    raw_examples = examples() if callable(examples) else examples
+    example_type = prompt_example_type or NERCoTExample
+    span_examples = (
+        [example_type(**eg) for eg in raw_examples] if raw_examples else None
+    )
+
+    return NERTask(
+        parse_responses=parse_responses or parse_span_responses_cot,
+        prompt_example_type=example_type,
+        labels=labels_list,
+        template=template,
+        label_definitions=label_definitions,
+        prompt_examples=span_examples,
+        normalizer=normalizer,
+        alignment_mode=alignment_mode,
+        case_sensitive_matching=case_sensitive_matching,
+        single_match=False,
+        scorer=scorer or score,
+        description=description,
+        check_label_consistency=check_label_consistency_cot,
     )
diff --git a/spacy_llm/tasks/ner/task.py b/spacy_llm/tasks/ner/task.py
@@ -6,12 +6,14 @@
 from spacy.util import filter_spans
 
 from ...compat import Literal, Self
-from ...ty import Scorer, TaskResponseParser
-from ..span import SpanExample, SpanTask
+from ...ty import FewshotExample, Scorer, TaskResponseParser
+from ..span import SpanTask
+from ..span.task import SpanTaskLabelCheck
 from ..templates import read_template
 
 DEFAULT_NER_TEMPLATE_V1 = read_template("ner.v1")
 DEFAULT_NER_TEMPLATE_V2 = read_template("ner.v2")
+DEFAULT_NER_TEMPLATE_V3 = read_template("ner.v3")
 
 
 class NERTask(SpanTask):
@@ -20,14 +22,16 @@ def __init__(
         labels: List[str],
         template: str,
         parse_responses: TaskResponseParser[Self],
-        prompt_example_type: Type[SpanExample],
+        prompt_example_type: Type[FewshotExample],
         label_definitions: Optional[Dict[str, str]],
-        prompt_examples: Optional[List[SpanExample]],
+        prompt_examples: Optional[List[FewshotExample]],
         normalizer: Optional[Callable[[str], str]],
         alignment_mode: Literal["strict", "contract", "expand"],
         case_sensitive_matching: bool,
         single_match: bool,
         scorer: Scorer,
+        description: Optional[str],
+        check_label_consistency: SpanTaskLabelCheck[Self],
     ):
         """Default NER task.
 
@@ -47,6 +51,8 @@ def __init__(
         single_match (bool): If False, allow one substring to match multiple times in
             the text. If True, returns the first hit.
         scorer (Scorer): Scorer function.
+        description (str): todo
+        check_label_consistency (SpanTaskLabelCheck): Callable to check label consistency.
         """
         super().__init__(
             labels=labels,
@@ -59,6 +65,9 @@ def __init__(
             alignment_mode=alignment_mode,
             case_sensitive_matching=case_sensitive_matching,
             single_match=single_match,
+            description=description,
+            allow_overlap=False,
+            check_label_consistency=check_label_consistency,
         )
         self._scorer = scorer
 

diff --git a/spacy_llm/tasks/ner/util.py b/spacy_llm/tasks/ner/util.py
@@ -6,6 +6,7 @@
 
 from ...compat import Self
 from ..span import SpanExample
+from ..span.examples import SpanCoTExample
 
 
 class NERExample(SpanExample):
@@ -18,6 +19,15 @@ def generate(cls, example: Example, **kwargs) -> Self:
         return cls(text=example.reference.text, entities=entities)
 
 
+class NERCoTExample(SpanCoTExample):
+    @classmethod
+    def generate(cls, example: Example, **kwargs) -> Self:
+        return cls(
+            text=example.reference.text,
+            spans=SpanCoTExample._extract_span_reasons(example.reference.ents),
+        )
+
+
 def score(examples: Iterable[Example], **kwargs) -> Dict[str, Any]:
     """Score NER accuracy in examples.
     examples (Iterable[Example]): Examples to score.

diff --git a/spacy_llm/tasks/rel/util.py b/spacy_llm/tasks/rel/util.py
@@ -30,13 +30,13 @@ class EntityItem(BaseModel):
 
 
 class RELExample(FewshotExample):
-    class Config:
-        arbitrary_types_allowed = True
-
     text: str
     ents: List[EntityItem]
     relations: List[RelationItem]
 
+    class Config:
+        arbitrary_types_allowed = True
+
     @classmethod
     def generate(cls, example: Example, **kwargs) -> Self:
         entities = [
@@ -48,10 +48,8 @@ def generate(cls, example: Example, **kwargs) -> Self:
             for ent in example.reference.ents
         ]
 
-        rel_example = RELExample(
+        return cls.construct(
             text=example.reference.text,
             ents=entities,
             relations=example.reference._.rel,
         )
-
-        return rel_example
diff --git a/spacy_llm/tasks/span/__init__.py b/spacy_llm/tasks/span/__init__.py
@@ -1,5 +1,14 @@
-from .parser import parse_responses
+from .examples import SpanExample, SpanReason
+from .parser import parse_responses, parse_responses_cot
+from .registry import make_label_check, make_label_check_cot
 from .task import SpanTask
-from .util import SpanExample
 
-__all__ = ["parse_responses", "SpanExample", "SpanTask"]
+__all__ = [
+    "make_label_check",
+    "make_label_check_cot",
+    "parse_responses",
+    "parse_responses_cot",
+    "SpanExample",
+    "SpanReason",
+    "SpanTask",
+]