In [None]:
import re
import os
import requests
import google.generativeai as genai
from typing import Dict, List, Optional, Tuple
from getpass import getpass
from rdflib import Graph

# Configuration class for better organization
class Config:
    GRAPHDB_ENDPOINT = "http://Vishals-MacBook-Air.local:7200/repositories/thesis"
    
    # Prefixes - Fixed to match actual ontology structure
    PREFIXES = """
PREFIX mcro: <http://purl.obolibrary.org/obo/MCRO_>
PREFIX prov1: <https://www.w3.org/ns/prov#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
"""

    @staticmethod
    def get_api_key():
        """Securely get API key from environment or user input"""
        return os.getenv("GEMINI_API_KEY") or getpass("Enter Gemini API key: ")

# Gemini setup
genai.configure(api_key=Config.get_api_key())
gemini = genai.GenerativeModel('gemini-2.0-flash')

class KnowledgeGraphQuerySystem:
    def __init__(self):
        self.session = requests.Session()
        
    def get_schema_context(self) -> str:
        """Return known schema patterns from your Turtle file"""
        return """
        # Known Model Properties
        mcro:hasUseCase - links to use case information
        mcro:hasTrainingData - links to training data details
        mcro:hasModelArchitecture - links to architecture information
        mcro:hasLimitation - links to limitations
        
        # Common Text Value Pattern
        ?section prov1:hasTextValue ?text
        
        # Example Model Structure:
        mcro:Falconsainsfwimagedetection a mcro:Model ;
            mcro:hasUseCase mcro:Falconsainsfwimagedetection-UseCase .
            
        mcro:Falconsainsfwimagedetection-UseCase prov1:hasTextValue "NSFW Image Classification..." .
        """
        
    def generate_sparql(self, question: str) -> str:
        """Convert natural language to SPARQL with enhanced prompting"""
        prompt = f"""{Config.PREFIXES}
        
        Your task: Convert questions to SPARQL using this knowledge graph structure:
        
        # Knowledge Graph Patterns
        {self.get_schema_context()}
        
        # Important Rules
        1. Always include these prefixes at the top:
           PREFIX mcro: <http://purl.obolibrary.org/obo/MCRO_>
           PREFIX prov1: <https://www.w3.org/ns/prov#>
           PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        2. Use proper path patterns:
           ?model -> mcro:hasProperty -> ?section -> prov1:hasTextValue -> ?value
        3. Return ONLY the SPARQL query in triple backticks
        
        # Example 1: List all models
        ```sparql
        PREFIX mcro: <http://purl.obolibrary.org/obo/MCRO_>
        SELECT ?model WHERE {{
          ?model a mcro:Model .
        }}
        ```
        
        # Question: {question}
        
        # SPARQL:
        """
        
        for attempt in range(3):  # Retry mechanism
            try:
                response = gemini.generate_content(prompt)
                match = re.search(r"```sparql(.*?)```", response.text, re.DOTALL)
                if match:
                    # Always prepend prefixes to ensure they're included
                    sparql_query = match.group(1).strip()
                    if not sparql_query.startswith("PREFIX"):
                        sparql_query = Config.PREFIXES + "\n" + sparql_query
                    return sparql_query
                print("Failed to extract SPARQL from response. Retrying...")
            except Exception as e:
                print(f"Gemini Error: {e}")
        return ""

    def validate_sparql(self, query: str) -> Tuple[bool, str]:
        """Basic SPARQL validation"""
        if not query.lower().startswith("select"):
            return False, "Missing SELECT clause"
        if "mcro:" not in query and "MCRO_" not in query:
            return False, "Query does not reference mcro: namespace"
        return True, ""

    def _execute_query(self, query: str) -> dict:
        """Execute SPARQL query with proper headers"""
        headers = {
            "Accept": "application/sparql-results+json",
            "Content-Type": "application/sparql-query",
            "User-Agent": "Gemini-KG-Query-System"
        }
        
        try:
            response = self.session.post(
                Config.GRAPHDB_ENDPOINT,
                headers=headers,
                data=query,
                timeout=15
            )
            return response.json() if response.ok else {"error": f"{response.status_code}: {response.text}"}
        except Exception as e:
            return {"error": str(e)}

    def format_results(self, results: dict) -> str:
        """Format results with better visualization"""
        if "error" in results:
            return f"❌ Error: {results['error']}"
        
        bindings = results.get('results', {}).get('bindings', [])
        if not bindings:
            return "🔍 No results found matching your query"
            
        # Auto-detect primary variable
        primary_var = next(iter(bindings[0].keys())) if bindings else None
        
        # Format results
        output = []
        for row in bindings:
            if primary_var:
                entity = row[primary_var]['value'].split('#')[-1]
                details = [
                    f"{k}: {v['value'].split('#')[-1]}" 
                    for k, v in row.items() 
                    if k != primary_var and 'value' in v
                ]
                output.append(f"• {entity}" + (f" ({', '.join(details)})" if details else ""))
        
        return f"📊 Found {len(output)} results:\n" + "\n".join(output)

    def test_query(self):
        """Test with known working query"""
        test = f"""
        {Config.PREFIXES}
        SELECT ?model WHERE {{
            ?model a mcro:Model .
        }} LIMIT 5
        """
        print("🔍 Executing test SPARQL query:")
        print(test)  # ← Shows the SPARQL query
        results = self._execute_query(test)
        print(self.format_results(results))

    def interactive_query(self):
        """Interactive query interface"""
        print("🧠 Knowledge Graph Query System (type 'exit' to quit)")
        while True:
            question = input("\n❓ Question: ").strip()
            if question.lower() in ['exit', 'quit']:
                break
                
            print("🧠 Generating SPARQL...")
            sparql = self.generate_sparql(question)
            
            if not sparql:
                print("⚠️ Failed to generate valid SPARQL query")
                continue
                
            print("\n📄 Generated SPARQL Query:")
            print(sparql)  # ← Shows the generated SPARQL
            print("\n🔍 Executing query...")
            results = self._execute_query(sparql)
            print(self.format_results(results))

# Main execution
if __name__ == "__main__":
    kg_system = KnowledgeGraphQuerySystem()
    print("\n🧪 Running test query...")
    kg_system.test_query()
    print("\n🔁 Starting interactive mode...")
    kg_system.interactive_query()


🧪 Running test query...
🔍 Executing test SPARQL query:

        
PREFIX mcro: <http://purl.obolibrary.org/obo/MCRO_>
PREFIX prov1: <https://www.w3.org/ns/prov#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

        SELECT ?model WHERE {
            ?model a mcro:Model .
        } LIMIT 5
        
📊 Found 5 results:
• http://purl.obolibrary.org/obo/MCRO_Falconsainsfwimagedetection
• http://purl.obolibrary.org/obo/MCRO_Phi2GGUF
• http://purl.obolibrary.org/obo/MCRO_YOLOv8DetectionModel
• http://purl.obolibrary.org/obo/MCRO_allMiniLML6v2
• http://purl.obolibrary.org/obo/MCRO_allmpnetbasev2

🔁 Starting interactive mode...
🧠 Knowledge Graph Query System (type 'exit' to quit)
🧠 Generating SPARQL...

📄 Generated SPARQL Query:
PREFIX mcro: <http://purl.obolibrary.org/obo/MCRO_>
PREFIX prov1: <https://www.w3.org/ns/prov#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT 