In [None]:
from dotenv import load_dotenv

load_dotenv()

True

In [21]:
import json
import os

import dspy
from pydantic import BaseModel, Field


def jprint(obj):
    print(json.dumps(obj, indent=2))

In [22]:
class EntityRelationExtraction(BaseModel):
    triples: list[str] = Field(description="The `subject | predicate | object` triples extracted from the text.")

schema = EntityRelationExtraction.model_json_schema()
jprint(schema)

{
  "properties": {
    "triples": {
      "description": "The `subject | predicate | object` triples extracted from the text.",
      "items": {
        "type": "string"
      },
      "title": "Triples",
      "type": "array"
    }
  },
  "required": [
    "triples"
  ],
  "title": "EntityRelationExtraction",
  "type": "object"
}


In [4]:
# lm = dspy.LM(
#     "openai/llama-3-8b",
#     temperature=0,
#     cache=False,
#     api_base="http://0.0.0.0:8008/v1",
#     api_key="tgi",
# )

In [5]:
# # SFT
# lm = dspy.LM(
#     "openai/llama-3-8b",
#     temperature=0,
#     cache=False,
#     api_base="http://0.0.0.0:8208/v1",
#     api_key="tgi",
# )

In [6]:
# lm = dspy.LM(
#     "openai/qwen-2.5-32b",
#     temperature=0,
#     cache=False,
#     api_base=os.getenv("GROQ_API_BASE"),
#     api_key=os.getenv("GROQ_API_KEY"),
# )

In [7]:
lm = dspy.LM(
    "groq/llama-3.1-8b-instant",
    temperature=0,
    cache=False,
    api_base=os.getenv("GROQ_API_BASE"),
    api_key=os.getenv("GROQ_API_KEY"),
)

In [8]:
# lm = dspy.LM(
#     "hosted_vllm/llama-3-8b",
#     temperature=0,
#     cache=False,
#     api_base="http://0.0.0.0:8008/v1",
#     api_key="tgi",
# )

### No instructions

In [14]:
text = """
Ankara is the capital of Turkey.
Claude Shannon is the father of information theory.
""".strip()
response = lm(
    messages=[
        {"role": "user", "content": text},
    ],
)

response

['That\'s correct. \n\n1. Ankara is indeed the capital of Turkey. It has been the capital since 1923, when the Turkish government moved from Istanbul.\n\n2. Claude Shannon is widely regarded as the father of information theory. He was an American mathematician, electrical engineer, and cryptographer who made significant contributions to the field of information theory. His 1948 paper "A Mathematical Theory of Communication" laid the foundation for modern information theory and has had a lasting impact on the development of digital communication systems.']

### Instructions + schema but no response format

In [13]:
text = """
Ankara is the capital of Turkey.
Claude Shannon is the father of information theory.
""".strip()
response = lm(
    messages=[
        {
            "role": "system",
            "content": f"You are an entity relation extraction model. You will be given a text and you will need to extract the entities and relations from the text.\n\nJSON Schema:\n{schema}",
        },
        {"role": "user", "content": text},
    ],
)

print(response[0])

Here's the extracted entities and relations in JSON format:

{
  "properties": {
    "triples": {
      "description": "The `subject | predicate | object` triples extracted from the text.",
      "items": {
        "type": "string"
      },
      "title": "Triples",
      "type": "array"
    }
  },
  "required": [
    "triples"
  ],
  "title": "EntityRelationExtraction",
  "type": "object",
  "triples": [
    {
      "object": "Turkey",
      "predicate": "capital of",
      "subject": "Ankara"
    },
    {
      "object": "information theory",
      "predicate": "father of",
      "subject": "Claude Shannon"
    }
  ]
}


### Instructions + schema + response format = json_object

In [17]:
text = """
Ankara is the capital of Turkey.
Claude Shannon is the father of information theory.
""".strip()
response = lm(
    messages=[
        {
            "role": "system",
            "content": f"You are an entity relation extraction model. You will be given a text and you will need to extract the entities and relations from the text.\n\nJSON Schema:\n{schema}",
        },
        {"role": "user", "content": text},
    ],
    response_format={"type": "json_object", "value": schema},
)
try:
    output = EntityRelationExtraction.model_validate_json(response[0])
    print(repr(output))
except Exception as e:
    print(e)
    print(response[0])

2 validation errors for EntityRelationExtraction
triples.0
  Input should be a valid string [type=string_type, input_value=['Ankara', 'is the capital of', 'Turkey'], input_type=list]
    For further information visit https://errors.pydantic.dev/2.10/v/string_type
triples.1
  Input should be a valid string [type=string_type, input_value=['Claude Shannon', 'is th...', 'information theory'], input_type=list]
    For further information visit https://errors.pydantic.dev/2.10/v/string_type
{
   "triples": [
      [
         "Ankara",
         "is the capital of",
         "Turkey"
      ],
      [
         "Claude Shannon",
         "is the father of",
         "information theory"
      ]
   ]
}


### Instructions + response format = Pydantic

In [19]:
text = """
Ankara is the capital of Turkey.
Claude Shannon is the father of information theory.
""".strip()
response = lm(
    messages=[
        {
            "role": "system",
            "content": "You are an entity relation extraction model. You will be given a text and you will need to extract the entities and relations from the text.",
        },
        {"role": "user", "content": text},
    ],
    response_format=EntityRelationExtraction,
)

try:
    output = EntityRelationExtraction.model_validate_json(response[0])
    print(repr(output))
except Exception as e:
    print(e)
    print(response[0])

EntityRelationExtraction(triples=['Ankara, is the capital of, Turkey.', 'Claude Shannon, is the father of, information theory.'])


### Tool calling

In [20]:
text = """
Ankara is the capital of Turkey.
Claude Shannon is the father of information theory.
""".strip()
response = lm(
    messages=[
        {
            "role": "system",
            "content": f"You are an entity relation extraction model. You will be given a text and you will need to extract the entities and relations from the text.\n\nJSON Schema:\n{schema}",
        },
        {"role": "user", "content": text},
    ],
    tools= [
        {
            "type": "function",
            "function": {
                "name": "save",
                "description": "Save extracted triples",
                "parameters": schema,
            }
        }
    ],
    tool_choice="save"
)

response

BadRequestError: litellm.BadRequestError: GroqException - {"error":{"message":"'tool_choice' : value must be a string (auto or none) or an object like `{'type': 'function', 'function': {'name': 'my_function'}}`","type":"invalid_request_error"}}
