In [None]:
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
import os

import dspy

In [3]:
from pydantic import BaseModel, Field


class EntityRelationExtraction(BaseModel):
    triples: list[str] = Field(description="The `subject | predicate | object` triples extracted from the text.")

schema = EntityRelationExtraction.model_json_schema()
schema

{'properties': {'triples': {'description': 'The `subject | predicate | object` triples extracted from the text.',
   'items': {'type': 'string'},
   'title': 'Triples',
   'type': 'array'}},
 'required': ['triples'],
 'title': 'EntityRelationExtraction',
 'type': 'object'}

In [4]:
# lm = dspy.LM(
#     "openai/llama-3-8b",
#     temperature=0,
#     cache=False,
#     api_base="http://0.0.0.0:8008/v1",
#     api_key="tgi",
# )

In [5]:
# SFT
lm = dspy.LM(
    "openai/llama-3-8b",
    temperature=0,
    cache=False,
    api_base="http://0.0.0.0:8208/v1",
    api_key="tgi",
)

In [6]:
# lm = dspy.LM(
#     "openai/qwen-2.5-32b",
#     temperature=0,
#     cache=False,
#     api_base=os.getenv("GROQ_API_BASE"),
#     api_key=os.getenv("GROQ_API_KEY"),
# )

In [7]:
# lm = dspy.LM(
#     "hosted_vllm/llama-3-8b",
#     temperature=0,
#     cache=False,
#     api_base="http://0.0.0.0:8008/v1",
#     api_key="tgi",
# )

In [8]:
text = """
Ankara is the capital of Turkey.
Claude Shannon is the father of information theory.
""".strip()
response = lm(
    messages=[
        {"role": "user", "content": text},
    ],
)

response

['Ankara | capital | Turkey\nClaude Shannon | field | Information theory\nClaude Shannon | occupation | Father of information theory']

In [15]:
text = """
Ankara is the capital of Turkey.
Claude Shannon is the father of information theory.
""".strip()
response = lm(
    messages=[
        {
            "role": "system",
            "content": f"You are an entity relation extraction model. You will be given a text and you will need to extract the entities and relations from the text.\n\nJSON Schema:\n{schema}",
        },
        {"role": "user", "content": text},
    ],
    response_format={"type": "json_object", "value": schema},
)

try:
    output = EntityRelationExtraction.model_validate_json(response[0])
    print(repr(output))
except Exception as e:
    print(e)
    print(response[0])

EntityRelationExtraction(triples=['Ankara | is the capital of | Turkey', 'Claude Shannon | is the father of | information theory'])


In [8]:
text = """
Ankara is the capital of Turkey.
Claude Shannon is the father of information theory.
""".strip()
response = lm(
    messages=[
        {
            "role": "system",
            "content": f"You are an entity relation extraction model. You will be given a text and you will need to extract the entities and relations from the text.\n\nJSON Schema:\n{schema}",
        },
        {"role": "user", "content": text},
    ],
    response_format={ "type": "json_schema", "json_schema": schema , "strict": True }
)

try:
    output = EntityRelationExtraction.model_validate_json(response[0])
    print(repr(output))
except Exception as e:
    print(e)
    print(response[0])

BadRequestError: litellm.BadRequestError: OpenAIException - Failed to deserialize the JSON body into the target type: response_format.type: unknown variant `json_schema`, expected one of `json`, `json_object`, `regex` at line 1 column 656

In [11]:
text = """
Ankara is the capital of Turkey.
Claude Shannon is the father of information theory.
""".strip()
response = lm(
    messages=[
        {
            "role": "system",
            "content": f"You are an entity relation extraction model. You will be given a text and you will need to extract the entities and relations from the text.\n\nJSON Schema:\n{schema}",
        },
        {"role": "user", "content": text},
    ],
    response_format=EntityRelationExtraction,
)

try:
    output = EntityRelationExtraction.model_validate_json(response[0])
    print(output)
except Exception as e:
    print(e)
    print(response[0])

BadRequestError: litellm.BadRequestError: Hosted_vllmException - Failed to deserialize the JSON body into the target type: response_format.type: unknown variant `json_schema`, expected one of `json`, `json_object`, `regex` at line 1 column 656

In [None]:
# text = """
# Ankara is the capital of Turkey.
# Claude Shannon is the father of information theory.
# """.strip()
# response = lm(
#     messages=[
#         {
#             "role": "system",
#             "content": f"You are an entity relation extraction model. You will be given a text and you will need to extract the entities and relations from the text.\n\nJSON Schema:\n{schema}",
#         },
#         {"role": "user", "content": text},
#     ],
#     tools= [
#         {
#             "type": "function",
#             "function": {
#                 "name": "save",
#                 "description": "Save extracted triples",
#                 "parameters": schema,
#             }
#         }
#     ],
#     tool_choice="save"
# )

# response

  Expected `str` but got `dict` with value `{'triples': ['Ankara', 'i..., 'information theory']}` - serialized value may not be as expected
  return self.__pydantic_serializer__.to_python(


[None]