In [None]:
!pip install openai

In [7]:
import os, json
from openai import OpenAI

# — Configuration —
client = OpenAI(
    base_url="https://api.inference.net/v1",
    api_key=os.getenv("INFERENCE_API_KEY"),
)

In [8]:
# Sentences taken from classic NER demos
sentences = [
    "Apple is looking at buying U.K. startup for $1 billion.",
    "Barack Obama was born in Hawaii.",
    "San Francisco considers banning sidewalk delivery robots.",
    "Amazon plans to open a new headquarters in Arlington by 2025.",
]

# Strict JSON schema for entities
entity_schema = {
    "name": "entity_extraction_result",
    "strict": True,
    "schema": {
        "type": "object",
        "properties": {
            "text":     {"type": "string"},
            "entities": {
                "type": "array",
                "items": {
                    "type": "object",
                    "properties": {
                        "entity": {"type": "string"},
                        "type":   {"type": "string", "enum": [
                            "person", "organization", "location", "date", "money", "misc"
                        ]},
                    },
                    "required": ["entity", "type"],
                    "additionalProperties": False,
                },
            },
        },
        "required": ["text", "entities"],
        "additionalProperties": False,
    },
}

In [10]:
def extract_entities(text: str):
    messages = [
        {
            "role": "system",
            "content": (
                "You are a named-entity recognition assistant. "
                "Your task is to identify and extract named entities from the given text. "
                "Extract entities of the following types:\n"
                "- PERSON: Names of people, including first names, last names, and full names\n"
                "- ORGANIZATION: Companies, institutions, agencies, government bodies, non-profits\n"
                "- LOCATION: Geographic locations including cities, countries, states, regions, landmarks\n"
                "- DATE: Dates, times, years, temporal expressions\n"
                "- MONEY: Monetary amounts, currencies, financial values\n"
                "- MISC: Other notable entities that don't fit the above categories but are significant\n\n"
                "Instructions:\n"
                "1. Read the input text carefully\n"
                "2. Identify all entities that match the categories above\n"
                "3. For each entity, determine its exact text span and category\n"
                "4. Return the original text and all identified entities in the specified JSON format\n"
                "5. Be precise - only extract clear, unambiguous entities\n"
                "6. If no entities are found, return an empty entities array\n\n"
                "Respond ONLY with valid JSON matching the given schema."
            ),
        },
        {"role": "user", "content": text},
    ]

    resp = client.chat.completions.create(
        model="meta-llama/llama-3.2-3b-instruct/fp-16",
        messages=messages,
        response_format={"type": "json_schema", "json_schema": entity_schema},
    )
    return json.loads(resp.choices[0].message.content)

In [5]:
if __name__ == "__main__":
    for s in sentences:
        result = extract_entities(s)
        print(f"Text: {result['text']}")
        print(f"Entities: {[(e['entity'], e['type']) for e in result['entities']]}")
        print("-" * 30)
        

Text: Apple is looking at buying U.K. startup for $1 billion
Entities: [('U.K.', 'location'), ('$1 billion', 'money')]
------------------------------
Text: Barack Obama was born in Hawaii.
Entities: [('Barack Obama', 'person'), ('Hawaii', 'location')]
------------------------------
Text: San Francisco considers banning sidewalk delivery robots.
Entities: [('San Francisco', 'location')]
------------------------------
Text: Amazon plans to open a new headquarters in Arlington by 2025.
Entities: [('Arlington', 'location'), ('2025', 'date')]
------------------------------
