In [None]:
!pip install llama-cpp-python

Collecting llama-cpp-python
  Downloading llama_cpp_python-0.3.1.tar.gz (63.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.9/63.9 MB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting diskcache>=5.6.1 (from llama-cpp-python)
  Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)
Downloading diskcache-5.6.3-py3-none-any.whl (45 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.5/45.5 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: llama-cpp-python
  Building wheel for llama-cpp-python (pyproject.toml) ... [?25l[?25hdone
  Created wheel for llama-cpp-python: filename=llama_cpp_python-0.3.1-cp310-cp310-linux_x86_64.whl size=3485348 sha256=25544ab17718afcc4d458

In [None]:
from llama_cpp import Llama

llm = Llama.from_pretrained(
    repo_id="RichardErkhov/numind_-_NuExtract-tiny-v1.5-gguf",
    filename="*NuExtract-tiny-v1.5.Q5_K_M.gguf",
    verbose=False
)

In [None]:
import json

def predict(llm, texts, entities, batch_size=1, max_length=10_000, max_new_tokens=4_000):
    # Create template dictionary with entities as keys and empty lists as values
    template_dict = {entity: [] for entity in entities}

    # Convert to JSON string with proper formatting
    template = json.dumps(template_dict, indent=4)

    # Create prompts using the template
    prompts = [
        f"""<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>"""
        for text in texts
    ]

    outputs = []
    for i in range(0, len(prompts), batch_size):
        output = llm(
            prompts[i],
            max_tokens=max_new_tokens,
            echo=True
        )
        json_str = output["choices"][0]["text"].split("<|output|>")[1]
        entities_dict = json.loads(json_str)
        output_entities = []
        for label, entities in entities_dict.items():
            for entity in entities:
                start = text.find(entity)
                end = start + len(entity)
                output_entities.append({"label": label, "text": entity, "start": start, "end": end})
        outputs.append(output_entities)
    return outputs

text = """Libretto by Marius Petipa, based on the 1822 novella "Trilby, ou Le Lutin d'Argail" by Charles Nodier, first presented by the Ballet of the Moscow Imperial Bolshoi Theatre on January 25/February 6 (Julian/Gregorian calendar dates), 1870, in Moscow with Polina Karpakova as Trilby and Ludiia Geiten as Miranda and restaged by Petipa for the Imperial Ballet at the Imperial Bolshoi Kamenny Theatre on January 17–29, 1871 in St. Petersburg with Adèle Grantzow as Trilby and Lev Ivanov as Count Leopold."""
entities = ["person", "book", "location", "date", "male actor", "female actor", "character"]
predict(llm, [text], entities)


[[{'label': 'book',
   'text': "Trilby, ou Le Lutin d'Argail",
   'start': 54,
   'end': 82},
  {'label': 'location',
   'text': 'Moscow Imperial Bolshoi Theatre',
   'start': 140,
   'end': 171},
  {'label': 'location',
   'text': 'St. Petersburg with Adèle Grantzow as Trilby',
   'start': 422,
   'end': 466},
  {'label': 'location',
   'text': 'Imperial Bolshoi Kamenny Theatre',
   'start': 363,
   'end': 395},
  {'label': 'date', 'text': 'January 25/February 6', 'start': 175, 'end': 196},
  {'label': 'date', 'text': 'January 17–29', 'start': 399, 'end': 412},
  {'label': 'date', 'text': 'January 25/February 6', 'start': 175, 'end': 196},
  {'label': 'date', 'text': 'January 25/February 6', 'start': 175, 'end': 196},
  {'label': 'male actor',
   'text': 'Polina Karpakova',
   'start': 253,
   'end': 269},
  {'label': 'male actor', 'text': 'Ludiia Geiten', 'start': 284, 'end': 297},
  {'label': 'female actor', 'text': 'Ad’e Grantzow', 'start': -1, 'end': 12},
  {'label': 'female actor