In [None]:
!python3 -m spacy download en_core_web_lg

In [1]:
import spacy
nlp = spacy.load('en_core_web_lg')

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
nlp.add_pipe(
    'llm',
    config={
        "model": {"@llm_models": "spacy.OpenLLaMA.v1", "name": "open_llama_13b"},
        "task": {
            '@llm_tasks': 'spacy.NER.v3',
            "labels": ['COUNTRY', 'CITY', 'WORK', 'PERSON', 'ORGANIZATION', 'ERA'],
            "description": "Entities are named persons, places, things, ideas, durations, or time periods that would be the title of a Wikipedia article.",
            "label_definitions": {
                "COUNTRY": "A country, territory, or region that has an ISO country code.",
                "CITY": "An incorporated or unincorporated area or municipality.",
                "WORK": "A work of art, such as a painting or sculpture, a play, novel, poem, photograph, document, movie or other motion picture.",
                "PERSON": "A human being (e.g. Barack Obama, Taylor Swift, Plato)",
                "ORGANIZATION": "A collective of people such as a corporation, interest group, board, or union.",
                "ERA": "A colloquially recognized period of time with a rough start or end used for historical purposes (i.e. the Victorian Era).",
            },
            "examples": {
                "@misc": "spacy.FewShotReader.v1",
                "path": "/mnt/bitgraph/examples/ner_examples.json"
            }
        },
    }
)
import logging
import spacy_llm
spacy_llm.logger.addHandler(logging.StreamHandler())
spacy_llm.logger.setLevel(logging.DEBUG)

You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 - if you loaded a llama tokenizer from a GGUF file you can ignore this message
You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama_fast.LlamaTokenizerFast'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggin

In [4]:
for doc in nlp.pipe(['What is the capital of Afghanistan?', 'Who directed Pulp Fiction?', 'What is the capital of France?', 'What English settlement was John Smith associated with?', 'Who were some key historical figures during the Victorian Era?', 'Who is the current president of the Teamsters union?', 'How do the populist platforms of Theodore Roosevelt and Andrew Jackson differ?', 'How were the 1950s in New York City different from the 1980s?']):   
    s = ""
    for ent in doc.ents:
        s += f"{ent}, {ent.vector.shape}; "
    print(s)

Generated prompt for doc: What is the capital of Afghanistan?
['You are an expert Named Entity Recognition (NER) system.\nYour task is to accept Text as input and extract named entities.\nEntities must have one of the following labels: CITY, COUNTRY, ERA, ORGANIZATION, PERSON, WORK.\nIf a span is not an entity label it: `==NONE==`.\n\n\nEntities are named persons, places, things, ideas, durations, or time periods that would be the title of a Wikipedia article.\nBelow are definitions of each label to help aid you in what kinds of named entities to extract for each label.\nAssume these definitions are written by an expert and follow them closely.\nCOUNTRY: A country, territory, or region that has an ISO country code.\nCITY: An incorporated or unincorporated area or municipality.\nWORK: A work of art, such as a painting or sculpture, a play, novel, poem, photograph, document, movie or other motion picture.\nPERSON: A human being (e.g. Barack Obama, Taylor Swift, Plato)\nORGANIZATION: A co

Afghanistan, (300,); 
Pulp Fiction, (300,); 
capital, (300,); France, (300,); 


Teamsters, (300,); 
populist, (300,); 
1950s, (300,); 
