In [8]:
!pip install llama-index
!pip install llama-index-embeddings-huggingface
%pip install ipython-ngql nebula3-python





[notice] A new release of pip is available: 24.0 -> 24.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip





[notice] A new release of pip is available: 24.0 -> 24.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [9]:
import requests
from typing import Optional, List, Mapping, Any

from llama_index.core import SimpleDirectoryReader, SummaryIndex
from llama_index.core.callbacks import CallbackManager
from llama_index.core.llms import (
    CustomLLM,
    CompletionResponse,
    CompletionResponseGen,
    LLMMetadata,
)
from llama_index.core.llms.callbacks import llm_completion_callback
from llama_index.core import Settings


class LLM(CustomLLM):
    context_window: int = 3900
    num_output: int = 256
    model_name: str = "custom"
    api_url: str = "http://px101.prod.exalead.com:8110/v1/chat/completions"
    base_prompt=""
    headers = {
        'Authorization': 'Bearer vtYvpB9U+iUQwl0K0MZIj+Uo5u6kilAZJdgHGVBEhNc=',
        'Content-Type': 'application/json'
    }

    @property
    def metadata(self) -> LLMMetadata:
        """Get LLM metadata."""
        return LLMMetadata(
            context_window=self.context_window,
            num_output=self.num_output,
            model_name=self.model_name,
        )

    @llm_completion_callback()
    def complete(self,prompt: str, **kwargs: Any) -> CompletionResponse:
        full_prompt = self.base_prompt + prompt

        messages = [{"role": "user", "content": full_prompt}]
        payload = {
            "model":"meta-llama/Meta-Llama-3-8B-Instruct",
            "messages": messages,
            "max_tokens": 1000,
            "top_p": 1,
            "stop": ["string"],
            "response_format": {
                "type": "text", 
                "temperature": 0.7
            }
        }
        response = requests.post(self.api_url, headers=self.headers, json=payload)
        if response.status_code == 200:
            generated_response = response.json()['choices'][0]['message']['content'].strip()
            return CompletionResponse(text=generated_response)
        else:
            return CompletionResponse(text="Error: API request failed")

    @llm_completion_callback()
    def stream_complete(
        self, prompt: str, **kwargs: Any
    ) -> CompletionResponseGen:
        full_prompt = self.base_prompt + prompt

        messages = [{"role": "user", "content": full_prompt}]
        payload = {
            "model":"meta-llama/Meta-Llama-3-8B-Instruct",
            "messages": messages,
            "max_tokens": 1500,
            "top_p": 1,
            "stop": ["string"],
            "response_format": {
                "type": "text", 
                "temperature": 0.7
            }
        }
        response = requests.post(self.api_url, headers=self.headers, json=payload)
        if response.status_code == 200:
            generated_response = response.json()['choices'][0]['message']['content'].strip()
            for token in generated_response:
                yield CompletionResponse(text=token, delta=token)
        else:
            yield CompletionResponse(text="Error", delta="Error")

In [10]:
import numpy as np
from typing import List
from llama_index.core.embeddings import BaseEmbedding
import requests

class CustomAPIEmbeddings(BaseEmbedding):
    _api_key: str = "vtYvpB9U+iUQwl0K0MZIj+Uo5u6kilAZJdgHGVBEhNc="
    _embeddings_url: str = "http://px101.prod.exalead.com:8110/v1/embeddings"
    _headers = {
        'Authorization': 'Bearer ' + _api_key,
        'Content-Type': 'application/json'
    }

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    @classmethod
    def class_name(cls) -> str:
        return "custom_api"

    def _aget_query_embedding(self, query: str) -> List[float]:
        return self._get_embeddings([query], "Represent a document for semantic search:")[0]

    def _get_query_embedding(self, query: str) -> List[float]:
        return self._get_embeddings([query], "Represent a document for semantic search:")[0]

    def _get_text_embedding(self, text: str) -> List[float]:
        return self._get_embeddings([text], "Represent a document for semantic search:")[0]

    def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]:
        return self._get_embeddings(texts, "Represent a document for semantic search:")

    def _get_embeddings(self, texts: List[str], instruction: str) -> List[List[float]]:
        payload = {
            "model": "BAAI/bge-large-en-v1.5",
            "input": texts,
            "encoding_format": "float",
            "instruct": instruction,
        }
        response = requests.post(self._embeddings_url, headers=self._headers, json=payload)
        if response.status_code == 200:
            response_data = response.json()
            embeddings_list = [item['embedding'] for item in response_data['data']]
            return np.array(embeddings_list).tolist()  
        else:
            raise Exception(f"Failed to get embeddings: {response.status_code}, {response.text}")


In [11]:
from llama_index.core import Settings

Settings.llm = LLM()
# embed_model = CustomAPIEmbeddings(embed_batch_size=2)
Settings.embed_model = "local:BAAI/bge-large-en-v1.5"
# Settings.embed_model = embed_model
Settings.chunk_size = 512


In [37]:
import os
os.environ["NEBULA_USER"] = "root"
os.environ["NEBULA_PASSWORD"] = "nebula" 
os.environ[
    "NEBULA_ADDRESS"
] = "127.0.0.1:9669"  

space_name = "poc2"
edge_types, rel_prop_names = ["Created","Modified","LastModifiedBy","Responsible","Type"], ["Created","Modified","LastModifiedBy","Responsible","Type"]
tag_prop_names = ["label, comment", "label, comment, version"]  # Comma-separated strings

tags = ["class","ontology"] 

In [38]:
from llama_index.core import StorageContext
from llama_index.graph_stores.nebula import NebulaGraphStore

graph_store = NebulaGraphStore(
    space_name=space_name,
    edge_types=edge_types,
    rel_prop_names=rel_prop_names,
    tags=tags,
    tag_prop_names =tag_prop_names

)
storage_context = StorageContext.from_defaults(graph_store=graph_store)

In [39]:
# from llama_index.core import SimpleDirectoryReader, KnowledgeGraphIndex
# directory_path = r"C:\Users\AAI47\poc2\flask-api\code\rdf"

# documents = SimpleDirectoryReader(directory_path).load_data()
file_path = r"C:\Users\AAI47\poc2\flask-api\code\rdf\rdf.txt"

reader = SimpleDirectoryReader(input_files=[file_path])
documents = reader.load_data()

In [40]:

from llama_index.core import KnowledgeGraphIndex
from llama_index.core import download_loader

from llama_index.readers.wikipedia import WikipediaReader


print(documents)
kg_index = KnowledgeGraphIndex.from_documents(
    documents,
    storage_context=storage_context,
    max_triplets_per_chunk=5,
    include_embeddings=True,
)

[Document(id_='d82abb80-af47-4c78-9279-5ecd5b1bbd42', embedding=None, metadata={'file_path': 'C:\\Users\\AAI47\\poc2\\flask-api\\code\\rdf\\rdf.txt', 'file_name': 'rdf.txt', 'file_type': 'text/plain', 'file_size': 3609, 'creation_date': '2024-07-09', 'last_modified_date': '2024-07-09'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text='\r\n** RDF Data Model and Ontology **\r\n    @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .\r\n    @prefix prov: <http://www.w3.org/ns/prov#> .\r\n    @prefix dsqt: <http://www.3ds.com/RDF/Corpus/dsqt/> .\r\n    @prefix ds6w: <http://www.3ds.com/vocabularies/ds6w/> .\r\n    @prefix ds6w1: <http://www.w3.org/2002/07/ds6w/> .\r\n    @prefix owl: <http://www.w3.org/2002/07/owl#> .\r\n    @prefix pno: <http://www.3

Query failed. Query: INSERT VERTEX `class`(name)   VALUES "Ds6w":("Ds6w");INSERT VERTEX `class`(name)   VALUES "Owl:ontology":("Owl:ontology");INSERT EDGE `Created`(`Created`)   VALUES "Ds6w"->"Owl:ontology"@5675850276216886:("Is");, Param: {}Error message: Query failed. Query: INSERT VERTEX `class`(name)   VALUES "Ds6w":("Ds6w");INSERT VERTEX `class`(name)   VALUES "Owl:ontology":("Owl:ontology");INSERT EDGE `Created`(`Created`)   VALUES "Ds6w"->"Owl:ontology"@5675850276216886:("Is");, Param: {}Error message: SemanticError: No schema found for `class'
Query failed. Query: INSERT VERTEX `class`(name)   VALUES "Ds6w":("Ds6w");INSERT VERTEX `class`(name)   VALUES "Owl:ontology":("Owl:ontology");INSERT EDGE `Created`(`Created`)   VALUES "Ds6w"->"Owl:ontology"@5675850276216886:("Is");, Param: {}Error message: Query failed. Query: INSERT VERTEX `class`(name)   VALUES "Ds6w":("Ds6w");INSERT VERTEX `class`(name)   VALUES "Owl:ontology":("Owl:ontology");INSERT EDGE `Created`(`Created`)   VALUE

RetryError: RetryError[<Future at 0x1d2951c70d0 state=finished raised ValueError>]

In [None]:
# print(response)
query_engine = kg_index.as_query_engine(
    include_text=False, response_mode="tree_summarize"
)
response = query_engine.query(
    "write a sparql query from this nlq : search for posts created today ",
)
print(response)

Based on the provided context information and the given knowledge sequence, I'll attempt to write a SPARQL query to answer the query "search for posts created today".

First, let's analyze the given knowledge sequence:
```
('Ds6w:lastmodifiedby', 'Rdfs:domain', 'Rdf:unionof(vpmreference document swym:post swym:idea')
('Swym:post', 'Rdf:type', 'Owl:class')
```
We can infer that:

* `Ds6w:lastmodifiedby` is a property of `Rdf:unionof` (which is a union of `vpmreference`, `document`, `swym:post`, and `swym:idea`)
* `Swym:post` is a class (of type `Owl:class`)

Now, let's look at the kg_schema:
```
kg_schema: {'schema': "Node properties: [{'tag': 'Class', 'properties': [('label', 'tring'), ('comment', 'tring')]}, {'tag': 'Ontology', 'properties': [('label', 'tring'), ('comment', 'tring'), ('version', 'tring')]}, {'tag': 'entity', 'properties': [('name', 'tring')]}]\nEdge properties: [{'edge': 'Created', 'properties': [('time_details', 'tring')]}, {'edge': 'LastModifiedBy', 'properties': [(

In [None]:
# from llama_index.core.query_engine import KnowledgeGraphQueryEngine

# from llama_index.core import StorageContext
# from llama_index.graph_stores.nebula import NebulaGraphStore
# from llama_index.core import StorageContext

# query_engine = KnowledgeGraphQueryEngine(
#     storage_context=storage_context,
#     llm=LLM(),
#     verbose=True,
#     graph_query_synthesis_prompt="generate response", 

)

SyntaxError: unmatched ')' (738992439.py, line 13)

In [None]:
# response = query_engine.query(
#     "Tell me about Peter Quill?",
# )


ValidationError: 1 validation error for LLMPredictStartEvent
template
  value is not a valid dict (type=type_error.dict)