In [1]:
# Imports & Configuration
import re
import requests
from rdflib import Graph
import google.generativeai as genai


  from .autonotebook import tqdm as notebook_tqdm


In [2]:

# Initialize Knowledge Graph
kg = Graph()
kg.parse("model_card.ttl", format="turtle")  

# Configure Gemini API
GTOKEN = "AIzaSyDQQY8FmOW6erFivgwsHjAdf419PYddNis"
genai.configure(api_key=GTOKEN)
gemini = genai.GenerativeModel('gemini-1.5-pro')

# GraphDB Connection Settings
GRAPHDB_ENDPOINT = "http://Vishals-MacBook-Air.local:7200/repositories/thesis"

PREFIXES = """
PREFIX mcro: <http://purl.obolibrary.org/obo/mcro.owl#>
PREFIX dul: <http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
"""

# Cell 3: Text to SPARQL Conversion
def generate_sparql(question: str) -> str:
    """Convert natural language question to SPARQL query"""
    # Get schema context information
    classes_props = kg.query("""
        SELECT DISTINCT ?resource ?label WHERE {
            { ?resource a rdfs:Class }
            UNION
            { ?resource a rdf:Property }
            OPTIONAL { ?resource rdfs:label ?label }
        }
    """)
    
    # Build context string from ontology terms
    context = "\n".join([
        f"- {row.label or row.resource.split('#')[-1]} ({row.resource})" 
        for row in classes_props
    ][:10]) 

    prompt = f"""
    {PREFIXES}
    
    Convert this question to SPARQL using the following schema terms:
    {context}
    
    Question: {question}
    
    Rules:
    1. Use SELECT DISTINCT for model queries
    2. mcro:Model is the base class
    3. Use FILTER with xsd:decimal for numeric comparisons
    4. Always include prefixes
    5. Return only valid SPARQL within ```sparql blocks
    
    SPARQL:
    """
    
    response = gemini.generate_content(prompt)
    return re.search(r"```sparql(.*?)```", response.text, re.DOTALL).group(1).strip()


In [3]:

# SPARQL Execution
def run_sparql(query: str) -> dict:
    """Execute SPARQL query against GraphDB endpoint"""
    try:
        response = requests.post(
            GRAPHDB_ENDPOINT,
            headers={
                "Accept": "application/sparql-results+json",
                "Content-Type": "application/sparql-query"
            },
            data=query,
            timeout=10
        )
        return response.json() if response.ok else {"error": response.text}
    except Exception as e:
        return {"error": str(e)}


In [4]:

# Results to Natural Language
def format_results(results: dict) -> str:
    if "error" in results:
        return f"Error: {results['error']}"
    
    bindings = results.get('results', {}).get('bindings', [])
    if not bindings:
        return "No results found"
    
    output = []
    # Detect primary variable name from first result
    primary_var = next(iter(bindings[0].keys())) if bindings else None
    
    for row in bindings:
        if primary_var:
            entity = row[primary_var]['value'].split('#')[-1]
            details = [
                f"{k}: {v['value']}" 
                for k, v in row.items() 
                if k != primary_var
            ]
            output.append(f"- {entity}" + (f" ({', '.join(details)})" if details else ""))
    
    return f"Query Results ({len(output)} items):\n" + "\n".join(output)

question = "Show all model with vit"


In [5]:
# Generate SPARQL query
sparql = generate_sparql(question)
print("Generated SPARQL query:")
print(sparql)

# Execute the query
results = run_sparql(sparql)

# Display results
print("\nQuery Results:")
print(format_results(results))

Generated SPARQL query:
PREFIX mcro: <http://purl.obolibrary.org/obo/mcro.owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT DISTINCT ?model
WHERE {
    ?model a mcro:Model .
    ?model ?p ?o .
    FILTER(REGEX(STR(?o), "vit", "i"))
}

Query Results:
Query Results (1 items):
- openaiclipvitlargepatch14
