In [28]:
import os
from openai import OpenAI
from pydantic import BaseModel

In [30]:
# make sure we have API key setup

assert os.environ["OPENAI_API_KEY"]

In [41]:
client = OpenAI(
    api_key=os.environ["OPENAI_API_KEY"]
)

In [32]:
s = """
Samuel Harris Altman (born April 22, 1985) is an American entrepreneur 
and investor best known as the CEO of OpenAI since 2019 (he was briefly 
fired and reinstated in November 2023).
"""

print(s)


Samuel Harris Altman (born April 22, 1985) is an American entrepreneur 
and investor best known as the CEO of OpenAI since 2019 (he was briefly 
fired and reinstated in November 2023).



In [42]:
json_schema = {
    "name": "NamedEntities",
    "schema": {
        "type": "object",
        "properties": {
            "entities": {
                "type": "array",
                "description": "List of entity names and their corresponding types",
                "items": {
                    "type": "object",
                    "properties": {
                        "name": {
                            "type": "string",
                            "description": "The actual name as specified in the text, e.g. a person's name, or the name of the country"
                        },
                        "type": {
                            "type": "string",
                            "description": "The entity type, such as 'Person' or 'Organization'",
                            "enum": ["Person", "Organization", "Location", "DateTime"]
                        }
                    },
                    "required": ["name", "type"],
                    "additionalProperties": False
                }
            }
        },
        "required": ["entities"],
        "additionalProperties": False
    },
    "strict": True
}

print(json_schema)

{'name': 'NamedEntities', 'schema': {'type': 'object', 'properties': {'entities': {'type': 'array', 'description': 'List of entity names and their corresponding types', 'items': {'type': 'object', 'properties': {'name': {'type': 'string', 'description': "The actual name as specified in the text, e.g. a person's name, or the name of the country"}, 'type': {'type': 'string', 'description': "The entity type, such as 'Person' or 'Organization'", 'enum': ['Person', 'Organization', 'Location', 'DateTime']}}, 'required': ['name', 'type'], 'additionalProperties': False}}}, 'required': ['entities'], 'additionalProperties': False}, 'strict': True}


In [43]:
completion = client.beta.chat.completions.parse(
    model="gpt-4o-2024-08-06",
    messages=[
        {
            "role": "system",
            "content": """You are a Named Entity Recognition (NER) system.
                Your job is to identify and return all entity names and their 
                types for a given piece of text. You are to strictly conform
                only to the following entity types: Person, Location, Organization
                and DateTime. If uncertain about entity type, please ignore it.""",
        },
        {
            "role": "user",
            "content": s
        }
    ],
    response_format={
        "type": "json_schema",
        "json_schema": json_schema,
    }
)

In [44]:
print(completion.choices[0].message.content)

{"entities":[{"name":"Samuel Harris Altman","type":"Person"},{"name":"April 22, 1985","type":"DateTime"},{"name":"American","type":"Location"},{"name":"entrepreneur","type":"Organization"},{"name":"investor","type":"Organization"},{"name":"CEO","type":"Organization"},{"name":"OpenAI","type":"Organization"},{"name":"2019","type":"DateTime"},{"name":"November 2023","type":"DateTime"}]}


In [49]:
import pprint
import json

In [51]:
pprint.pprint(json.loads(completion.choices[0].message.content), indent=4)

{   'entities': [   {'name': 'Samuel Harris Altman', 'type': 'Person'},
                    {'name': 'April 22, 1985', 'type': 'DateTime'},
                    {'name': 'American', 'type': 'Location'},
                    {'name': 'entrepreneur', 'type': 'Organization'},
                    {'name': 'investor', 'type': 'Organization'},
                    {'name': 'CEO', 'type': 'Organization'},
                    {'name': 'OpenAI', 'type': 'Organization'},
                    {'name': '2019', 'type': 'DateTime'},
                    {'name': 'November 2023', 'type': 'DateTime'}]}
