In [1]:
import sys
import os
import IPython
from IPython.display import Markdown, display               

In [3]:
from langchain_chroma import Chroma
from langchain_ollama import OllamaEmbeddings
from langchain_ollama import ChatOllama
from langchain_openai import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings

from langchain.document_loaders import TextLoader
from langchain.document_loaders import PyPDFLoader

from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.chains import RetrievalQA,  ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

persist_directory = 'chroma/chromaOnto2/'
embeddings = OllamaEmbeddings(model="mxbai-embed-large") # mxbai-embed-large, llama3.2


LLM = ChatOllama(model="llama3.2", temperature=0.8)
LLM.invoke("Hello world!")


AIMessage(content="Hello! It's nice to meet you. Is there something I can help you with, or would you like to chat?", additional_kwargs={}, response_metadata={'model': 'llama3.2', 'created_at': '2025-07-04T14:27:23.5205003Z', 'done': True, 'done_reason': 'stop', 'total_duration': 9896982900, 'load_duration': 4477301000, 'prompt_eval_count': 28, 'prompt_eval_duration': 1674091200, 'eval_count': 26, 'eval_duration': 3744539100, 'message': Message(role='assistant', content='', images=None, tool_calls=None)}, id='run-a0b44cbd-fef3-4e52-a3d2-7f964a634a9d-0', usage_metadata={'input_tokens': 28, 'output_tokens': 26, 'total_tokens': 54})

In [4]:
from langchain_community.document_loaders import TomlLoader

loader = TomlLoader("OntoRaster.toml")
docs = loader.load()

In [5]:
len(docs)

1

In [6]:
doc = docs[0]
doc.page_content

'{"title": "OntoRaster v2024 Demonstration", "tabGroups": [{"name": "RasSPARQL Functions", "tabs": [{"name": "Q1. Get Dimension", "query": "#####################################################################\\n#### Query 1. Find the dimension of user-specific Raster dataset ####\\n#####################################################################\\n\\nPREFIX :\\t<https://github.com/aghoshpro/OntoRaster/>\\nPREFIX rasdb:\\t<https://github.com/aghoshpro/RasterDataCube/>\\n\\nSELECT ?rasterName ?dimension {\\n?gridCoverage a :Raster .\\n?gridCoverage rasdb:rasterName ?rasterName .\\nFILTER (CONTAINS(?rasterName, \'\')) # Also try \'Munich\', \'Sweden\', \'Bavaria\', \'Tyrol\'\\nBIND (rasdb:rasDimension(?rasterName) AS ?dimension)\\n}\\n"}, {"name": "Q2. Cell Operation", "query": "#################################################################################################################\\n#### Query 2. Perform element-wise operation over raster array cells with user-specific ope

In [6]:
all_page_text=[p.page_content for p in docs]
joined_page_text=" ".join(all_page_text)

# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1500,chunk_overlap = 150)
splits = text_splitter.split_text(joined_page_text)

In [7]:
len(joined_page_text)

40776

In [8]:
splits[0]

'{"title": "OntoRaster v2024 Demonstration", "tabGroups": [{"name": "RasSPARQL Functions", "tabs": [{"name": "Q1. Get Dimension", "query": "#####################################################################\\n#### Query 1. Find the dimension of user-specific Raster dataset ####\\n#####################################################################\\n\\nPREFIX :\\t<https://github.com/aghoshpro/OntoRaster/>\\nPREFIX rasdb:\\t<https://github.com/aghoshpro/RasterDataCube/>\\n\\nSELECT ?rasterName ?dimension {\\n?gridCoverage a :Raster .\\n?gridCoverage rasdb:rasterName ?rasterName .\\nFILTER (CONTAINS(?rasterName, \'\')) # Also try \'Munich\', \'Sweden\', \'Bavaria\', \'Tyrol\'\\nBIND (rasdb:rasDimension(?rasterName) AS ?dimension)\\n}\\n"}, {"name": "Q2. Cell Operation", "query": "#################################################################################################################\\n#### Query 2. Perform element-wise operation over raster array cells with user-specific ope

In [9]:
vectordb = Chroma.from_texts(
    texts=splits,
    embedding=embeddings,
    persist_directory=persist_directory,
    collection_name="OntoRaster"
)

In [16]:
# Build prompt
from langchain.prompts import PromptTemplate
template = """Use the following pieces of context to write a valid SPARQL query with PREFIX declarations as an answer to the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Keep the answer as concise as possible. 
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"],template=template,)

# Run chain
qa_chain = RetrievalQA.from_chain_type(LLM,
                                       retriever=vectordb.as_retriever(),
                                       return_source_documents=True,
                                       chain_type_kwargs={"prompt": QA_CHAIN_PROMPT})

In [15]:
question = "Find the dimension of user-specific Raster dataset"
result = qa_chain.invoke({"query": question})
print(result["result"])

SELECT ?rasterName ?dimension {\n?gridCoverage a :Raster .\n?gridCoverage rasdb:rasterName ?rasterName .\nFILTER (CONTAINS(?rasterName, '')) # Also try 'Munich', 'Sweden', 'Bavaria', 'Tyrol'\nBIND (rasdb:rasDimension(?rasterName) AS ?dimension)\n}\n}


In [11]:
question = "Find the dimension of user-specific Raster dataset over Bavaria"
result = qa_chain.invoke({"query": question})
print(result["result"])

SELECT ?rasterName ?dimension {\n?gridCoverage a :Raster .\n?gridCoverage rasdb:rasterName ?rasterName .\nFILTER (CONTAINS(?rasterName, 'Bavaria')) \nBIND (rasdb:rasDimension(?rasterName) AS ?dimension)\n}


In [12]:
question = "Find spatial average temperature over Munich Region and corresponding raster dataset"
result = qa_chain.invoke({"query": question})
print(result["result"])

I don't know the answer to this question.


In [17]:
question = "Find spatial average temperature over Bolzano Region and corresponding raster dataset"
result = qa_chain.invoke({"query": question})
print('\n')
print(result["result"])



I can help with that. Here is a SPARQL query based on the context provided:
```
PREFIX : <https://github.com/aghoshpro/OntoRaster/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX rasdb: <https://github.com/aghoshpro/RasterDataCube/>

SELECT ?regionName ?tempK ?regionWkt ?regionWktLabel ?regionWktColor
{
  ?region a :Region_ITALY .
  ?region rdfs:label ?regionName .
  ?region geo:asWKT ?regionWkt .
  BIND (?regionName AS ?regionWktLabel) .
  ?gridCoverage a :Raster .
  ?gridCoverage rasdb:rasterName ?rasterName .
  FILTER (?regionName = 'Bolzano') .
  FILTER (CONTAINS(?rasterName, 'Tyrol')) .
  BIND ('2023-03-03T00:00:00+00:00'^^xsd:dateTime AS ?timeStamp) .
  BIND (rasdb:rasSpatialAverage(?timeStamp, ?regionWkt, ?rasterName) AS ?tempK) .
  FILTER (?tempK > 250) .
  BIND (
    IF(?tempK < 260, "blue" ,
       IF(?tempK < 265, "#008AFF",
          IF(?tempK < 270, "magenta",
             IF(?tempK < 275, "red",
          