In [3]:
from dotenv import load_dotenv
import os
import sys
sys.path.append("..")

In [4]:
load_dotenv()

True

In [5]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser
from database.qdrant_client import QdrantClient
from typing import List, Dict, Any, Optional
import json

  from .autonotebook import tqdm as notebook_tqdm


2025-04-17 10:30:12,414 | [32mINFO[0m | nl-to-sparql | Logging initialized: console=INFO, file=DEBUG


In [4]:
# llm = ChatOpenAI(
#     model="gpt-4o-mini",
#     temperature=0.
# )

In [5]:
# prompt = ChatPromptTemplate.from_messages([
#     ("system", 
#      """You are a professional developer with experience in writing SPARQL for ontology file. Your task is to transform natural provided query to SPARQL based on the ontology code and previous SPARQL code (optional). Please follow the detailed instruction below:
#      - If query related to computation, first convert the value to string by STR and then convert to number
#      - If can not convert the query to SPARQL, the output is {{"query": "", "step": "query of that step"}}
#      **Provided query**:
#      {query}
#      **Ontology code**:
#      {ontology_code}
#      {sparql_code}
#      The output format must be in the following format:
#      {{"query": "SPARQL query", "step": "query of that step"}}
#      """
#     )
# ])

# # Định nghĩa input
# user_query = "Tìm thời gian bắt đầu của triều đại Triệu?"
# feedback = "[]"

# # Format ra danh sách message
# messages = prompt.format_messages(query=user_query, ontology_code="""<!-- https://CHeVIE.vn/ontologies/TrieuRulingDynastyStartDate -->\n\n    <owl:NamedIndividual rdf:about="https://CHeVIE.vn/ontologies/TrieuRulingDynastyStartDate">\n        <rdf:type rdf:resource="http://www.w3.org/2006/time#Instant"/>\n        <time:inDateTime rdf:resource="https://CHeVIE.vn/ontologies/TrieuRulingDynastyStartDateDescription"/>\n        <rdfs:label xml:lang="en">Start Date of Triệu Ruling Dynasty</rdfs:label>\n    \t<rdfs:label xml:lang="vi">Ngày bắt đầu triều đại Triệu</rdfs:label>\n    </owl:NamedIndividual><!-- https://CHeVIE.vn/ontologies/TrieuRulingDynastyStartDateDescription -->\n\n    <owl:NamedIndividual rdf:about="https://CHeVIE.vn/ontologies/TrieuRulingDynastyStartDateDescription">\n        <rdf:type rdf:resource="http://www.w3.org/2006/time#DateTimeDescription"/>\n        <time:hasTRS rdf:resource="https://CHeVIE.vn/ontologies/GregorianCalendar"/>\n        <time:year rdf:datatype="http://www.w3.org/2001/XMLSchema#gYear">-210</time:year>\n        <rdfs:label xml:lang="en">Start Date of Triệu Ruling Dynasty Description</rdfs:label>\n    \t<rdfs:label xml:lang="vi">Mô tả ngày bắt đầu triều đại Triệu</rdfs:label>\n    </owl:NamedIndividual><!-- https://CHeVIE.vn/ontologies/TrieuRulingDynastyStartDateStatement -->\n\n    <owl:NamedIndividual rdf:about="https://CHeVIE.vn/ontologies/TrieuRulingDynastyStartDateStatement">\n        <rdf:type rdf:resource="https://CHeVIE.vn/ontologies/Statement"/>\n        <_start rdf:resource="https://CHeVIE.vn/ontologies/TrieuRulingDynastyStartDate"/>\n        <prov:wasDerivedFrom rdf:resource="https://CHeVIE.vn/ontologies/TrieuRulingDynastyPeriodReference"/>\n        <rdfs:label xml:lang="en">Statement about Start Date of Triệu Ruling Dynasty</rdfs:label>\n    \t<rdfs:label xml:lang="vi">Tuyên bố về ngày bắt đầu triều đại Triệu</rdfs:label>\n    </owl:NamedIndividual>""")

In [6]:
# ans = llm.invoke(messages)

In [7]:
# ans.content

In [6]:
class ResponseGenerationAgent:
    def __init__(self):
        """
        Initialize the response generation agent
        """

        self.agent = ChatOpenAI(
            model="gpt-4o-mini",
            temperature=0.
        )

        self.qdrant_client = QdrantClient()
    
        self.num_retry = 2
        self.top_k = 3
        self.collection_name = "ontology_embedding"

    def generate(self, steps: List[Dict[str, Any]]):
        if not steps:
            return "I'm sorry, I couldn't generate a proper response based on the information avalable"

        previous_queries = []
        
        for step in steps:
            prompt = self._prepare_step_prompt(
                step_query=step["step"],
                step_query_type=step["sparql_type"],
                previous_queries=previous_queries if step["level"] == "complex" else None
            ) 
            step_query = self.agent.invoke(prompt)
            step_query = json.loads(step_query.content)
            previous_queries.append(step_query)

        return previous_queries

    def _prepare_step_prompt(self, step_query: str, step_query_type: str, previous_queries: Optional[List[Dict[str, Any]]]=None):
        prompt = ChatPromptTemplate.from_messages([
            ("system", 
             """You are a professional developer with experience in writing SPARQL for ontology file. Your task is to transform natural provided query to SPARQL based on the ontology code, query type. Please follow the detailed instruction below:
             - If query related to computation or compare, first convert the value to string by STR and then convert to number. 
             - Please query number correctly not rdfs:label or rdfs:comment
             - If query need to find the numeric, please convert to get exactly number not reference
             - If can not convert the query to SPARQL, the output is {{"query": "", "step": "query of that step"}}
             **Output SPARQL type**:
             {sparql_type}
             **Provided query**:
             {query}
             **Ontology code**:
             {ontology_code}
             {sparql_code}
             The output format must be in the following format:
             {{"query": "SPARQL query", "step": "query of that step"}}
             """
            )
        ])
        if previous_queries is None:
            sparql_code = ""
        else:
            sparql_code = "**SparQL code**:\n" + str(previous_queries)

        ontology_code = self._get_code_part(step_query)

        return prompt.format_messages(
            sparql_type=step_query_type,
            query=step_query,
            ontology_code=ontology_code,
            sparql_code=sparql_code
        )

    def _get_code_part(self, step_query: str) -> str:
        """
        Search code part in ontology for step query

        Args:
            step_query: str: query for each step

        Returns:
            Part of ontology related to step query
        """
        search_results = self.qdrant_client.client.query_points(
            collection_name=self.collection_name,
            query=self.qdrant_client.default_model.encode(step_query),
            limit=self.top_k
        ).points

        code_part = ""
        for search_result in search_results:
            code_part += search_result.payload["code"].strip()
        code_part = code_part.strip()
        return code_part

In [7]:
generator = ResponseGenerationAgent()

2025-04-17 10:30:17,600 | [32mINFO[0m | sentence_transformers.SentenceTransformer | Use pytorch device_name: cuda:0
2025-04-17 10:30:17,602 | [32mINFO[0m | sentence_transformers.SentenceTransformer | Load pretrained SentenceTransformer: all-MiniLM-L6-v2
2025-04-17 10:30:23,973 | [32mINFO[0m | httpx | HTTP Request: GET http://172.26.33.199:2513 "HTTP/1.1 200 OK"


In [8]:
steps =  [{'step': 'Tìm thời gian bắt đầu của triều đại Triệu',
  'sparql_type': 'SELECT',
  'level': 'simple'},
 {'step': 'Find the end date of Triệu Ruling Dynasty',
  'sparql_type': 'SELECT',
  'level': 'simple'},
 {'step': 'Tính khoảng thời gian giữa thời gian bắt đầu và thời gian kết thúc',
  'sparql_type': 'SELECT',
  'level': 'complex'}]

In [9]:
steps = [{'step': 'Kiểm tra thời gian mất của Vua Quang Trung',
  'sparql_type': 'SELECT',
  'level': 'simple'},
 {'step': 'Kiểm tra thời gian ra đời của triều đại Triệu',
  'sparql_type': 'SELECT',
  'level': 'simple'},
 {'step': 'So sánh thời gian mất của Vua Quang Trung với thời gian ra đời của triều đại Triệu',
  'sparql_type': 'ASK',
  'level': 'complex'}]

In [10]:
steps = [{'step': 'Find the starting year of the Triệu dynasty',
  'sparql_type': 'SELECT',
  'level': 'simple'}]

In [13]:
steps = [{'step': 'Find all subclasses of Person',
  'sparql_type': 'SELECT',
  'level': 'simple'}]

In [14]:
ans = generator.generate(steps)

Batches: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 38.14it/s]

2025-04-17 10:36:07,248 | [32mINFO[0m | httpx | HTTP Request: POST http://172.26.33.199:2513/collections/ontology_embedding/points/query "HTTP/1.1 200 OK"





2025-04-17 10:36:08,798 | [32mINFO[0m | httpx | HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [15]:
ans

[{'query': 'SELECT ?subclass WHERE { ?subclass rdfs:subClassOf <https://CHeVIE.vn/ontologies/Person> . }',
  'step': 'Find all subclasses of Person'}]