In [2]:
from rdflib import Graph, Namespace, RDF, RDFS, OWL, Literal
from rdflib.namespace import XSD
import requests

# Define namespaces
VOC = Namespace("http://bvdheiden.nl/data/#voc/")
DATA = Namespace("http://bvdheiden.nl/data/#")

# Create a new graph for the ontology
g = Graph()
g.bind("voc", VOC)
g.bind("data", DATA)
g.bind("owl", OWL)
g.bind("rdf", RDF)
g.bind("rdfs", RDFS)

# Define Resource class
g.add((VOC.Resource, RDF.type, OWL.Class))
g.add((VOC.Resource, RDFS.label, Literal("Resource")))

# Define Team class
g.add((VOC.Team, RDF.type, OWL.Class))
g.add((VOC.Team, RDFS.label, Literal("Team")))

# Define containedIn property (Resource -> Resource)
g.add((VOC.containedIn, RDF.type, OWL.ObjectProperty))
g.add((VOC.containedIn, RDFS.label, Literal("contained in")))
g.add((VOC.containedIn, RDFS.domain, VOC.Resource))
g.add((VOC.containedIn, RDFS.range, VOC.Resource))

# Define ownedBy property (Resource -> Team)
g.add((VOC.ownedBy, RDF.type, OWL.ObjectProperty))
g.add((VOC.ownedBy, RDFS.label, Literal("owned by")))
g.add((VOC.ownedBy, RDFS.domain, VOC.Resource))
g.add((VOC.ownedBy, RDFS.range, VOC.Team))

# Serialize the graph to Turtle format
ttl_data = g.serialize(format='turtle')

# Create dataset in Fuseki (if it doesn't exist)
fuseki_url = "http://localhost:3030"
dataset_name = "ontology"

# Try to create the dataset
create_url = f"{fuseki_url}/$/datasets"
create_payload = {
    "dbName": dataset_name,
    "dbType": "tdb2"
}

try:
    response = requests.post(create_url, data=create_payload, auth=('admin', 'admin'))
    if response.status_code == 200:
        print(f"Dataset '{dataset_name}' created successfully")
    elif response.status_code == 409:
        print(f"Dataset '{dataset_name}' already exists")
    else:
        print(f"Response: {response.status_code} - {response.text}")
except Exception as e:
    print(f"Error creating dataset: {e}")

# Upload the ontology to Fuseki
upload_url = f"{fuseki_url}/{dataset_name}/data"
headers = {'Content-Type': 'text/turtle'}

try:
    response = requests.post(upload_url, data=ttl_data, headers=headers, auth=('admin', 'admin'))
    if response.status_code in [200, 201]:
        print(f"Ontology uploaded successfully to {dataset_name}")
    else:
        print(f"Error uploading: {response.status_code} - {response.text}")
except Exception as e:
    print(f"Error uploading ontology: {e}")

print("\nOntology created with:")
print(f"- Resource class")
print(f"- Team class")
print(f"- containedIn property (Resource -> Resource)")
print(f"- ownedBy property (Resource -> Team)")
print(f"\nNamespace: {VOC}")

Dataset 'ontology' created successfully
Ontology uploaded successfully to ontology

Ontology created with:
- Resource class
- Team class
- containedIn property (Resource -> Resource)
- ownedBy property (Resource -> Team)

Namespace: http://bvdheiden.nl/data/#voc/


In [10]:
from rdflib import Graph, Namespace, RDF, RDFS, Literal
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct
import requests
from litellm import embedding

# Define namespaces
VOC = Namespace("http://bvdheiden.nl/data/#voc/")
DATA = Namespace("http://bvdheiden.nl/data/#")

# Resources to add
resources = [
    "user-authentication-service",
    "customer-profile-service",
    "product-catalog-service",
    "product-search-service",
    "product-recommendation-service",
    "pricing-and-promotions-service",
    "shopping-cart-service",
    "checkout-service",
    "order-management-service",
    "payment-gateway-service",
    "shipping-and-tracking-service",
    "inventory-management-service",
    "warehouse-fulfillment-service",
    "notification-service",
    "review-and-ratings-service",
    "analytics-and-reporting-service",
    "fraud-detection-service",
    "loyalty-and-rewards-service",
    "cms-content-service",
    "api-gateway-service"
]

# Teams to add
teams = ["alpha", "beta", "charlie", "delta"]

# Create graph for data
g = Graph()
g.bind("voc", VOC)
g.bind("data", DATA)
g.bind("rdfs", RDFS)

# Add teams
for team in teams:
    team_uri = DATA[f"team-{team}"]
    g.add((team_uri, RDF.type, VOC.Team))
    g.add((team_uri, RDFS.label, Literal(f"Team {team}")))

# Add resources and distribute ownership equally across teams
for idx, resource in enumerate(resources):
    resource_uri = DATA[resource]
    g.add((resource_uri, RDF.type, VOC.Resource))
    g.add((resource_uri, RDFS.label, Literal(resource)))
    
    # Assign to team (round-robin distribution)
    team_idx = idx % len(teams)
    team_uri = DATA[f"team-{teams[team_idx]}"]
    g.add((resource_uri, VOC.ownedBy, team_uri))

# Serialize to Turtle
ttl_data = g.serialize(format='turtle')

# Upload data to Fuseki using POST
fuseki_url = "http://localhost:3030"
dataset_name = "ontology"
upload_url = f"{fuseki_url}/{dataset_name}/data"
headers = {'Content-Type': 'text/turtle'}

try:
    response = requests.post(upload_url, data=ttl_data, headers=headers, auth=('admin', 'admin'))
    if response.status_code in [200, 201, 204]:
        print(f"Data uploaded successfully to Fuseki")
        print(f"\nAdded {len(teams)} teams:")
        for team in teams:
            print(f"  - Team {team}")
        print(f"\nAdded {len(resources)} resources, distributed across teams:")
        for idx, resource in enumerate(resources):
            team_idx = idx % len(teams)
            print(f"  - {resource} -> Team {teams[team_idx]}")
    else:
        print(f"Error uploading to Fuseki: {response.status_code} - {response.text}")
except Exception as e:
    print(f"Error uploading data to Fuseki: {e}")

# Now add data to Qdrant
print("\n" + "="*80)
print("Adding data to Qdrant...")

# Connect to Qdrant
qdrant_client = QdrantClient(host="localhost", port=6333)

# Helper function to generate embeddings via LiteLLM
def get_embedding(text):
    """Generate embedding using LiteLLM with Ollama's nomic-embed-text model"""
    response = embedding(
        model='ollama/nomic-embed-text',
        input=[text],
        api_base='http://localhost:11434'
    )
    return response.data[0]['embedding']

# Get embedding size from first embedding
sample_embedding = get_embedding("test")
vector_size = len(sample_embedding)
print(f"Using embedding model: nomic-embed-text via LiteLLM (dimension: {vector_size})")

# Create collection
collection_name = "ontology_entities"

try:
    # Try to create collection
    qdrant_client.create_collection(
        collection_name=collection_name,
        vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE)
    )
    print(f"Collection '{collection_name}' created successfully")
except Exception as e:
    print(f"Collection may already exist or error occurred: {e}")

# Prepare points for Qdrant
points = []

# Add teams to Qdrant
point_id = 1
for team in teams:
    team_name = f"Team {team}"
    team_uri = str(DATA[f"team-{team}"])
    
    print(f"  Generating embedding for Team: {team_name}")
    embedding_vec = get_embedding(team_name)
    
    point = PointStruct(
        id=point_id,
        vector=embedding_vec,
        payload={
            "uri": team_uri,
            "label": team_name,
            "type": "Team",
            "rdf_type": str(VOC.Team)
        }
    )
    points.append(point)
    point_id += 1

# Add resources to Qdrant
for idx, resource in enumerate(resources):
    resource_uri = str(DATA[resource])
    
    print(f"  Generating embedding for Resource: {resource}")
    embedding_vec = get_embedding(resource)
    
    point = PointStruct(
        id=point_id,
        vector=embedding_vec,
        payload={
            "uri": resource_uri,
            "label": resource,
            "type": "Resource",
            "rdf_type": str(VOC.Resource)
        }
    )
    points.append(point)
    point_id += 1

# Upload points to Qdrant
try:
    qdrant_client.upsert(
        collection_name=collection_name,
        points=points
    )
    print(f"\n✓ Successfully uploaded {len(points)} points to Qdrant")
    print(f"  - {len(teams)} teams")
    print(f"  - {len(resources)} resources")
    print(f"  - Embeddings generated via LiteLLM (Ollama nomic-embed-text)")
except Exception as e:
    print(f"Error uploading to Qdrant: {e}")

print("="*80)

Data uploaded successfully to Fuseki

Added 4 teams:
  - Team alpha
  - Team beta
  - Team charlie
  - Team delta

Added 20 resources, distributed across teams:
  - user-authentication-service -> Team alpha
  - customer-profile-service -> Team beta
  - product-catalog-service -> Team charlie
  - product-search-service -> Team delta
  - product-recommendation-service -> Team alpha
  - pricing-and-promotions-service -> Team beta
  - shopping-cart-service -> Team charlie
  - checkout-service -> Team delta
  - order-management-service -> Team alpha
  - payment-gateway-service -> Team beta
  - shipping-and-tracking-service -> Team charlie
  - inventory-management-service -> Team delta
  - warehouse-fulfillment-service -> Team alpha
  - notification-service -> Team beta
  - review-and-ratings-service -> Team charlie
  - analytics-and-reporting-service -> Team delta
  - fraud-detection-service -> Team alpha
  - loyalty-and-rewards-service -> Team beta
  - cms-content-service -> Team charlie
 

In [14]:
from rdflib import Graph, Namespace, RDF, RDFS, Literal
from qdrant_client import QdrantClient
from qdrant_client.models import PointStruct
import requests
from litellm import embedding

# Define namespaces
VOC = Namespace("http://bvdheiden.nl/data/#voc/")
DATA = Namespace("http://bvdheiden.nl/data/#")

# Define endpoints for each service
service_endpoints = {
    "user-authentication-service": [
        "/api/v1/auth/login",
        "/api/v1/auth/logout",
        "/api/v1/auth/refresh-token",
        "/api/v1/auth/validate-session"
    ],
    "customer-profile-service": [
        "/api/v1/profile/get",
        "/api/v1/profile/update",
        "/api/v1/profile/delete"
    ],
    "product-catalog-service": [
        "/api/v1/products/list",
        "/api/v1/products/{id}",
        "/api/v1/categories/list"
    ],
    "product-search-service": [
        "/api/v1/search/products",
        "/api/v1/search/filters",
        "/api/v1/search/suggestions"
    ],
    "product-recommendation-service": [
        "/api/v1/recommendations/personalized",
        "/api/v1/recommendations/trending",
        "/api/v1/recommendations/similar"
    ],
    "pricing-and-promotions-service": [
        "/api/v1/pricing/calculate",
        "/api/v1/promotions/active",
        "/api/v1/promotions/validate-coupon"
    ],
    "shopping-cart-service": [
        "/api/v1/cart/add",
        "/api/v1/cart/remove",
        "/api/v1/cart/get",
        "/api/v1/cart/clear"
    ],
    "checkout-service": [
        "/api/v1/checkout/initiate",
        "/api/v1/checkout/validate",
        "/api/v1/checkout/complete"
    ],
    "order-management-service": [
        "/api/v1/orders/create",
        "/api/v1/orders/{id}",
        "/api/v1/orders/list",
        "/api/v1/orders/cancel"
    ],
    "payment-gateway-service": [
        "/api/v1/payment/process",
        "/api/v1/payment/refund",
        "/api/v1/payment/status"
    ],
    "shipping-and-tracking-service": [
        "/api/v1/shipping/calculate-cost",
        "/api/v1/shipping/create-label",
        "/api/v1/tracking/status",
        "/api/v1/tracking/history"
    ],
    "inventory-management-service": [
        "/api/v1/inventory/check-availability",
        "/api/v1/inventory/reserve",
        "/api/v1/inventory/release"
    ],
    "warehouse-fulfillment-service": [
        "/api/v1/fulfillment/create-pick-list",
        "/api/v1/fulfillment/pack-order",
        "/api/v1/fulfillment/ship-order"
    ],
    "notification-service": [
        "/api/v1/notifications/send-email",
        "/api/v1/notifications/send-sms",
        "/api/v1/notifications/push"
    ],
    "review-and-ratings-service": [
        "/api/v1/reviews/create",
        "/api/v1/reviews/list",
        "/api/v1/ratings/calculate-average"
    ],
    "analytics-and-reporting-service": [
        "/api/v1/analytics/track-event",
        "/api/v1/reports/sales",
        "/api/v1/reports/customer-behavior",
        "/api/v1/reports/inventory"
    ],
    "fraud-detection-service": [
        "/api/v1/fraud/analyze-transaction",
        "/api/v1/fraud/check-risk-score"
    ],
    "loyalty-and-rewards-service": [
        "/api/v1/loyalty/points-balance",
        "/api/v1/loyalty/earn-points",
        "/api/v1/loyalty/redeem-points"
    ],
    "cms-content-service": [
        "/api/v1/content/pages/{slug}",
        "/api/v1/content/banners",
        "/api/v1/content/articles"
    ],
    "api-gateway-service": [
        "/api/v1/gateway/route",
        "/api/v1/gateway/health"
    ]
}

# Create graph for endpoints
g = Graph()
g.bind("voc", VOC)
g.bind("data", DATA)
g.bind("rdfs", RDFS)

# Add endpoints to graph with containedIn relationship
all_endpoints = []
for service, endpoints in service_endpoints.items():
    service_uri = DATA[service]
    
    for endpoint_path in endpoints:
        # Create a safe ID for the endpoint
        endpoint_id = f"{service}-{endpoint_path.replace('/', '-').replace('{', '').replace('}', '')}"
        endpoint_uri = DATA[endpoint_id]
        
        # Add endpoint as a Resource
        g.add((endpoint_uri, RDF.type, VOC.Resource))
        g.add((endpoint_uri, RDFS.label, Literal(endpoint_path)))
        
        # Add containedIn relationship (endpoint is contained in service)
        g.add((endpoint_uri, VOC.containedIn, service_uri))
        
        all_endpoints.append((endpoint_id, endpoint_path, service))

# Serialize to Turtle
ttl_data = g.serialize(format='turtle')

# Upload data to Fuseki
fuseki_url = "http://localhost:3030"
dataset_name = "ontology"
upload_url = f"{fuseki_url}/{dataset_name}/data"
headers = {'Content-Type': 'text/turtle'}

try:
    response = requests.post(upload_url, data=ttl_data, headers=headers, auth=('admin', 'admin'))
    if response.status_code in [200, 201, 204]:
        print(f"✓ Endpoint data uploaded successfully to Fuseki")
        print(f"  Added {len(all_endpoints)} endpoints across {len(service_endpoints)} services")
    else:
        print(f"Error uploading to Fuseki: {response.status_code} - {response.text}")
except Exception as e:
    print(f"Error uploading endpoint data to Fuseki: {e}")

# Now add endpoints to Qdrant
print("\n" + "="*80)
print("Adding endpoints to Qdrant...")

# Connect to Qdrant
qdrant_client = QdrantClient(host="localhost", port=6333)
collection_name = "ontology_entities"

# Helper function to generate embeddings
def get_embedding(text):
    """Generate embedding using LiteLLM with Ollama's nomic-embed-text model"""
    response = embedding(
        model='ollama/nomic-embed-text',
        input=[text],
        api_base='http://localhost:11434'
    )
    return response.data[0]['embedding']

# Prepare points for Qdrant
points = []
point_id = 1000  # Start from 1000 to avoid conflicts with existing IDs

for endpoint_id, endpoint_path, service in all_endpoints:
    endpoint_uri = str(DATA[endpoint_id])
    
    # Create a descriptive text for better semantic search
    # Combine service name and endpoint path
    embedding_text = f"{service} {endpoint_path}"
    
    print(f"  Generating embedding for: {endpoint_path} (in {service})")
    embedding_vec = get_embedding(embedding_text)
    
    point = PointStruct(
        id=point_id,
        vector=embedding_vec,
        payload={
            "uri": endpoint_uri,
            "label": endpoint_path,
            "type": "Resource",
            "rdf_type": str(VOC.Resource),
            "service": service
        }
    )
    points.append(point)
    point_id += 1

# Upload points to Qdrant
try:
    qdrant_client.upsert(
        collection_name=collection_name,
        points=points
    )
    print(f"\n✓ Successfully uploaded {len(points)} endpoint points to Qdrant")
    print(f"  - All endpoints have 'containedIn' relationships to their services")
    print(f"  - Embeddings generated via LiteLLM (Ollama nomic-embed-text)")
except Exception as e:
    print(f"Error uploading endpoints to Qdrant: {e}")

print("="*80)
print("\nSummary by service:")
for service, endpoints in service_endpoints.items():
    print(f"  {service}: {len(endpoints)} endpoints")

✓ Endpoint data uploaded successfully to Fuseki
  Added 63 endpoints across 20 services

Adding endpoints to Qdrant...
  Generating embedding for: /api/v1/auth/login (in user-authentication-service)
  Generating embedding for: /api/v1/auth/logout (in user-authentication-service)
  Generating embedding for: /api/v1/auth/refresh-token (in user-authentication-service)
  Generating embedding for: /api/v1/auth/validate-session (in user-authentication-service)
  Generating embedding for: /api/v1/profile/get (in customer-profile-service)
  Generating embedding for: /api/v1/profile/update (in customer-profile-service)
  Generating embedding for: /api/v1/profile/delete (in customer-profile-service)
  Generating embedding for: /api/v1/products/list (in product-catalog-service)
  Generating embedding for: /api/v1/products/{id} (in product-catalog-service)
  Generating embedding for: /api/v1/categories/list (in product-catalog-service)
  Generating embedding for: /api/v1/search/products (in produc

In [22]:
from litellm import completion, embedding
from qdrant_client import QdrantClient
import requests
import json
from IPython.display import display, Markdown

# Configuration
fuseki_url = "http://localhost:3030"
dataset_name = "ontology"
sparql_endpoint = f"{fuseki_url}/{dataset_name}/sparql"
qdrant_client = QdrantClient(host="localhost", port=6333)
collection_name = "ontology_entities"

# Ontology description for context
ontology_description = """
Ontology Schema:
- Namespace: http://bvdheiden.nl/data/#voc/
- Classes: Resource, Team
- Properties:
  * containedIn: Resource -> Resource (describes containment relationships)
  * ownedBy: Resource -> Team (describes ownership relationships)
- Data namespace: http://bvdheiden.nl/data/#
"""

def get_embedding_vector(text):
    """Generate embedding using LiteLLM with Ollama's nomic-embed-text model"""
    response = embedding(
        model='ollama/nomic-embed-text',
        input=[text],
        api_base='http://localhost:11434'
    )
    return response.data[0]['embedding']

def search_entities_in_qdrant(query_text, limit=5):
    """Search for entities in Qdrant using semantic similarity"""
    print(f"  Generating embedding for query: '{query_text}'")
    query_vector = get_embedding_vector(query_text)
    
    print(f"  Searching Qdrant collection '{collection_name}'...")
    search_results = qdrant_client.search(
        collection_name=collection_name,
        query_vector=query_vector,
        limit=limit
    )
    
    entities = []
    for result in search_results:
        entity = {
            'uri': result.payload['uri'],
            'label': result.payload['label'],
            'type': result.payload['type'],
            'score': result.score
        }
        entities.append(entity)
        print(f"    Found: {entity['label']} (type: {entity['type']}, score: {entity['score']:.4f})")
    
    return entities

def classify_intent(question):
    """Classify the user's question intent"""
    prompt = f"""Classify the following question into one of these intents:
- query_resource_containment: Questions about what resources are contained in other resources
- query_resource_ownership: Questions about which team owns resources or which resources belong to teams

Question: {question}

Respond with ONLY the intent name, nothing else."""

    print(f"  Calling LiteLLM for intent classification...")
    response = completion(
        model="ollama/gpt-oss:20b",
        messages=[{"role": "user", "content": prompt}],
        api_base="http://localhost:11434"
    )
    
    intent = response.choices[0].message.content.strip()
    print(f"  LLM Response: {intent}")
    return intent

def extract_entity_references(question):
    """Extract entity names/references from the question"""
    prompt = f"""Extract the key entity names or references from this question.
Return them as a comma-separated list of search terms.

Question: {question}

Examples:
"Which resources are owned by Team alpha?" -> "Team alpha"
"What does the payment service contain?" -> "payment service"

Return only the search terms, nothing else."""

    response = completion(
        model="ollama/gpt-oss:20b",
        messages=[{"role": "user", "content": prompt}],
        api_base="http://localhost:11434"
    )
    
    terms = response.choices[0].message.content.strip()
    return [term.strip() for term in terms.split(',')]

def generate_sparql_query(question, intent, entities):
    """Generate SPARQL query based on question, intent, and resolved entities"""
    entities_context = "\n".join([
        f"  - {e['label']} (type: {e['type']}, URI: <{e['uri']}>)"
        for e in entities
    ])
    
    prompt = f"""Given this ontology:
{ontology_description}

Intent: {intent}
Question: {question}

Relevant entities found via semantic search:
{entities_context}

Generate a valid SPARQL query to answer this question. Use PREFIX voc: <http://bvdheiden.nl/data/#voc/> and PREFIX data: <http://bvdheiden.nl/data/#>

Important rules:
- Use the exact URIs from the entities list above when referencing specific entities
- For ownership queries, use the ownedBy property
- For containment queries, use the containedIn property
- Return only the SPARQL query, no explanations
- Use SELECT query format
"""

    print(f"  Calling LiteLLM for SPARQL generation...")
    response = completion(
        model="ollama/gpt-oss:20b",
        messages=[{"role": "user", "content": prompt}],
        api_base="http://localhost:11434"
    )
    
    sparql_query = response.choices[0].message.content.strip()
    print(f"  LLM Response received (length: {len(sparql_query)} chars)")
    
    # Clean up the query if it's wrapped in code blocks
    if sparql_query.startswith("```"):
        lines = sparql_query.split("\n")
        sparql_query = "\n".join(lines[1:-1]) if len(lines) > 2 else sparql_query
    
    return sparql_query

def validate_sparql(sparql_query):
    """Basic validation of SPARQL query"""
    # Check for basic SPARQL keywords
    required_keywords = ["SELECT", "WHERE"]
    query_upper = sparql_query.upper()
    
    for keyword in required_keywords:
        if keyword not in query_upper:
            print(f"  Validation failed: Missing keyword '{keyword}'")
            return False, f"Missing required keyword: {keyword}"
    
    print(f"  All required keywords found")
    return True, "Valid"

def execute_sparql(sparql_query):
    """Execute SPARQL query against Fuseki"""
    headers = {
        'Content-Type': 'application/sparql-query',
        'Accept': 'application/sparql-results+json'
    }
    
    print(f"  Sending request to: {sparql_endpoint}")
    print(f"  Headers: {headers}")
    print(f"  Auth: admin/***")
    
    response = requests.post(
        sparql_endpoint,
        data=sparql_query,
        headers=headers,
        auth=('admin', 'admin')
    )
    
    print(f"  Response status code: {response.status_code}")
    print(f"  Response headers: {dict(response.headers)}")
    
    if response.status_code == 200:
        result_json = response.json()
        print(f"  Response body (JSON):")
        print(f"    {json.dumps(result_json, indent=4)}")
        return result_json
    else:
        print(f"  Error response body: {response.text}")
        raise Exception(f"SPARQL query failed: {response.status_code} - {response.text}")

def interpret_results(question, results):
    """Interpret SPARQL results and generate natural language answer"""
    results_str = json.dumps(results, indent=2)
    
    prompt = f"""Given the following question and SPARQL query results, provide a clear, natural language answer.

Question: {question}

SPARQL Results:
{results_str}

Provide a concise answer in natural language. If there are no results, say so clearly."""

    print(f"  Calling LiteLLM for result interpretation...")
    response = completion(
        model="ollama/gpt-oss:20b",
        messages=[{"role": "user", "content": prompt}],
        api_base="http://localhost:11434"
    )
    
    answer = response.choices[0].message.content.strip()
    print(f"  LLM Response received (length: {len(answer)} chars)")
    return answer

def answer_question(question):
    """Main function to process a question and return an answer"""
    print(f"Question: {question}\n")
    print("="*80)
    
    # Step 1: Extract entity references from question
    print("\nStep 1: Extracting entity references from question...")
    search_terms = extract_entity_references(question)
    print(f"Search terms: {search_terms}\n")
    
    # Step 2: Search for entities in Qdrant
    print("Step 2: Searching for entities in Qdrant...")
    all_entities = []
    for term in search_terms:
        entities = search_entities_in_qdrant(term, limit=3)
        all_entities.extend(entities)
    
    # Remove duplicates based on URI
    unique_entities = []
    seen_uris = set()
    for entity in all_entities:
        if entity['uri'] not in seen_uris:
            unique_entities.append(entity)
            seen_uris.add(entity['uri'])
    
    print(f"\nFound {len(unique_entities)} unique entities\n")
    
    # Step 3: Classify intent
    print("Step 3: Classifying intent...")
    intent = classify_intent(question)
    print(f"Intent: {intent}\n")
    
    # Step 4: Generate SPARQL query with resolved entities
    print("Step 4: Generating SPARQL query with resolved entity URIs...")
    sparql_query = generate_sparql_query(question, intent, unique_entities)
    print(f"SPARQL Query:\n{sparql_query}\n")
    
    # Step 5: Validate SPARQL query
    print("Step 5: Validating SPARQL query...")
    is_valid, validation_msg = validate_sparql(sparql_query)
    if not is_valid:
        return f"Invalid SPARQL query: {validation_msg}"
    print(f"Validation: {validation_msg}\n")
    
    # Step 6: Execute query
    print("Step 6: Executing query against Fuseki...")
    try:
        results = execute_sparql(sparql_query)
        num_results = len(results.get('results', {}).get('bindings', []))
        print(f"  Results retrieved: {num_results} rows\n")
    except Exception as e:
        print(f"  Exception occurred: {e}")
        return f"Error executing query: {e}"
    
    # Step 7: Interpret results
    print("Step 7: Interpreting results...")
    answer = interpret_results(question, results)
    
    print("="*80)
    print(f"\n")
    
    # Display final answer in markdown format
    markdown_output = f"""## Question
{question}

## Answer
{answer}
"""
    display(Markdown(markdown_output))
    
    return answer

# Example usage
question = "Which resources are owned by Team alpha?"
answer_question(question)

Question: Which resources are owned by Team alpha?


Step 1: Extracting entity references from question...
Search terms: ['Team alpha']

Step 2: Searching for entities in Qdrant...
  Generating embedding for query: 'Team alpha'
  Searching Qdrant collection 'ontology_entities'...
    Found: Team alpha (type: Team, score: 1.0000)
    Found: Team beta (type: Team, score: 0.7464)
    Found: Team delta (type: Team, score: 0.6340)

Found 3 unique entities

Step 3: Classifying intent...
  Calling LiteLLM for intent classification...


  search_results = qdrant_client.search(


  LLM Response: query_resource_ownership
Intent: query_resource_ownership

Step 4: Generating SPARQL query with resolved entity URIs...
  Calling LiteLLM for SPARQL generation...
  LLM Response received (length: 156 chars)
SPARQL Query:
PREFIX voc: <http://bvdheiden.nl/data/#voc/>
PREFIX data: <http://bvdheiden.nl/data/#>

SELECT ?resource
WHERE {
  ?resource voc:ownedBy data:team-alpha .
}

Step 5: Validating SPARQL query...
  All required keywords found
Validation: Valid

Step 6: Executing query against Fuseki...
  Sending request to: http://localhost:3030/ontology/sparql
  Headers: {'Content-Type': 'application/sparql-query', 'Accept': 'application/sparql-results+json'}
  Auth: admin/***
  Response status code: 200
  Response headers: {'Date': 'Sat, 25 Oct 2025 12:55:17 GMT', 'Vary': 'Accept-Encoding, Origin', 'Fuseki-Request-Id': '50', 'Cache-Control': 'must-revalidate,no-cache,no-store', 'Pragma': 'no-cache', 'Content-Type': 'application/sparql-results+json; charset=utf-8', 'Conte

## Question
Which resources are owned by Team alpha?

## Answer
Team alpha owns the following resources:

- Fraud‑Detection Service  
- Order‑Management Service  
- Product‑Recommendation Service  
- User‑Authentication Service  
- Warehouse‑Fulfillment Service


'Team\u202falpha owns the following resources:\n\n- Fraud‑Detection Service  \n- Order‑Management Service  \n- Product‑Recommendation Service  \n- User‑Authentication Service  \n- Warehouse‑Fulfillment Service'

In [15]:
question = "How many endpoints are int he order management service?"
answer_question(question)

Question: How many endpoints are int he order management service?


Step 1: Extracting entity references from question...
Search terms: ['endpoints', 'order management service']

Step 2: Searching for entities in Qdrant...
  Generating embedding for query: 'endpoints'
  Searching Qdrant collection 'ontology_entities'...
    Found: /api/v1/loyalty/points-balance (type: Resource, score: 0.4818)
    Found: /api/v1/loyalty/earn-points (type: Resource, score: 0.4778)
    Found: /api/v1/loyalty/redeem-points (type: Resource, score: 0.4744)
  Generating embedding for query: 'order management service'
  Searching Qdrant collection 'ontology_entities'...
    Found: order-management-service (type: Resource, score: 0.9541)
    Found: /api/v1/orders/list (type: Resource, score: 0.7889)
    Found: inventory-management-service (type: Resource, score: 0.7830)

Found 6 unique entities

Step 3: Classifying intent...
  Calling LiteLLM for intent classification...


  search_results = qdrant_client.search(


  LLM Response: query_resource_containment
Intent: query_resource_containment

Step 4: Generating SPARQL query with resolved entity URIs...
  Calling LiteLLM for SPARQL generation...
  LLM Response received (length: 193 chars)
SPARQL Query:
PREFIX voc: <http://bvdheiden.nl/data/#voc/>
PREFIX data: <http://bvdheiden.nl/data/#>

SELECT (COUNT(?endpoint) AS ?count)
WHERE {
  ?endpoint voc:containedIn data:order-management-service .
}

Step 5: Validating SPARQL query...
  All required keywords found
Validation: Valid

Step 6: Executing query against Fuseki...
  Sending request to: http://localhost:3030/ontology/sparql
  Headers: {'Content-Type': 'application/sparql-query', 'Accept': 'application/sparql-results+json'}
  Auth: admin/***
  Response status code: 200
  Response headers: {'Date': 'Sat, 25 Oct 2025 12:26:07 GMT', 'Vary': 'Accept-Encoding, Origin', 'Fuseki-Request-Id': '43', 'Cache-Control': 'must-revalidate,no-cache,no-store', 'Pragma': 'no-cache', 'Content-Type': 'application/sp

'There are 4 endpoints in the order management service.'

In [24]:
question = "Which endpoints are owned by team charlie, either directly or indirectly?"
answer_question(question)

Question: Which endpoints are owned by team charlie, either directly or indirectly?


Step 1: Extracting entity references from question...
Search terms: ['team charlie']

Step 2: Searching for entities in Qdrant...
  Generating embedding for query: 'team charlie'
  Searching Qdrant collection 'ontology_entities'...
    Found: Team charlie (type: Team, score: 1.0000)
    Found: Team delta (type: Team, score: 0.6221)
    Found: Team alpha (type: Team, score: 0.6116)

Found 3 unique entities

Step 3: Classifying intent...
  Calling LiteLLM for intent classification...


  search_results = qdrant_client.search(


  LLM Response: query_resource_ownership
Intent: query_resource_ownership

Step 4: Generating SPARQL query with resolved entity URIs...
  Calling LiteLLM for SPARQL generation...
  LLM Response received (length: 210 chars)
SPARQL Query:
PREFIX voc: <http://bvdheiden.nl/data/#voc/>
PREFIX data: <http://bvdheiden.nl/data/#>

SELECT DISTINCT ?resource WHERE {
  ?resource (voc:containedIn)* ?ownedRes .
  ?ownedRes voc:ownedBy data:team-charlie .
}

Step 5: Validating SPARQL query...
  All required keywords found
Validation: Valid

Step 6: Executing query against Fuseki...
  Sending request to: http://localhost:3030/ontology/sparql
  Headers: {'Content-Type': 'application/sparql-query', 'Accept': 'application/sparql-results+json'}
  Auth: admin/***
  Response status code: 200
  Response headers: {'Date': 'Sat, 25 Oct 2025 12:59:20 GMT', 'Vary': 'Accept-Encoding, Origin', 'Fuseki-Request-Id': '52', 'Cache-Control': 'must-revalidate,no-cache,no-store', 'Pragma': 'no-cache', 'Content-Type': 'a

## Question
Which endpoints are owned by team charlie, either directly or indirectly?

## Answer
The endpoints owned by **team Charlie** (directly or indirectly) are:

- **CMS Content Service**  
  - `http://bvdheiden.nl/data/#cms-content-service`  
  - `http://bvdheiden.nl/data/#cms-content-service--api-v1-content-articles`  
  - `http://bvdheiden.nl/data/#cms-content-service--api-v1-content-banners`  
  - `http://bvdheiden.nl/data/#cms-content-service--api-v1-content-pages-slug`

- **Product Catalog Service**  
  - `http://bvdheiden.nl/data/#product-catalog-service`  
  - `http://bvdheiden.nl/data/#product-catalog-service--api-v1-categories-list`  
  - `http://bvdheiden.nl/data/#product-catalog-service--api-v1-products-id`  
  - `http://bvdheiden.nl/data/#product-catalog-service--api-v1-products-list`

- **Review & Ratings Service**  
  - `http://bvdheiden.nl/data/#review-and-ratings-service`  
  - `http://bvdheiden.nl/data/#review-and-ratings-service--api-v1-ratings-calculate-average`  
  - `http://bvdheiden.nl/data/#review-and-ratings-service--api-v1-reviews-create`  
  - `http://bvdheiden.nl/data/#review-and-ratings-service--api-v1-reviews-list`

- **Shipping & Tracking Service**  
  - `http://bvdheiden.nl/data/#shipping-and-tracking-service`  
  - `http://bvdheiden.nl/data/#shipping-and-tracking-service--api-v1-shipping-calculate-cost`  
  - `http://bvdheiden.nl/data/#shipping-and-tracking-service--api-v1-shipping-create-label`  
  - `http://bvdheiden.nl/data/#shipping-and-tracking-service--api-v1-tracking-history`  
  - `http://bvdheiden.nl/data/#shipping-and-tracking-service--api-v1-tracking-status`

- **Shopping Cart Service**  
  - `http://bvdheiden.nl/data/#shopping-cart-service`  
  - `http://bvdheiden.nl/data/#shopping-cart-service--api-v1-cart-add`  
  - `http://bvdheiden.nl/data/#shopping-cart-service--api-v1-cart-clear`  
  - `http://bvdheiden.nl/data/#shopping-cart-service--api-v1-cart-get`  
  - `http://bvdheiden.nl/data/#shopping-cart-service--api-v1-cart-remove`

These are all the endpoints that belong to team Charlie.


'The endpoints owned by **team\u202fCharlie** (directly or indirectly) are:\n\n- **CMS Content Service**  \n  - `http://bvdheiden.nl/data/#cms-content-service`  \n  - `http://bvdheiden.nl/data/#cms-content-service--api-v1-content-articles`  \n  - `http://bvdheiden.nl/data/#cms-content-service--api-v1-content-banners`  \n  - `http://bvdheiden.nl/data/#cms-content-service--api-v1-content-pages-slug`\n\n- **Product Catalog Service**  \n  - `http://bvdheiden.nl/data/#product-catalog-service`  \n  - `http://bvdheiden.nl/data/#product-catalog-service--api-v1-categories-list`  \n  - `http://bvdheiden.nl/data/#product-catalog-service--api-v1-products-id`  \n  - `http://bvdheiden.nl/data/#product-catalog-service--api-v1-products-list`\n\n- **Review & Ratings Service**  \n  - `http://bvdheiden.nl/data/#review-and-ratings-service`  \n  - `http://bvdheiden.nl/data/#review-and-ratings-service--api-v1-ratings-calculate-average`  \n  - `http://bvdheiden.nl/data/#review-and-ratings-service--api-v1-rev

In [25]:
question = "Which team has ownership of the most endpoints. Either directly, or indirectly."
answer_question(question)

Question: Which team has ownership of the most endpoints. Either directly, or indirectly.


Step 1: Extracting entity references from question...
Search terms: ['team', 'endpoints']

Step 2: Searching for entities in Qdrant...
  Generating embedding for query: 'team'
  Searching Qdrant collection 'ontology_entities'...
    Found: Team alpha (type: Team, score: 0.7359)
    Found: Team beta (type: Team, score: 0.7124)
    Found: Team delta (type: Team, score: 0.7023)
  Generating embedding for query: 'endpoints'
  Searching Qdrant collection 'ontology_entities'...
    Found: /api/v1/loyalty/points-balance (type: Resource, score: 0.4818)
    Found: /api/v1/loyalty/earn-points (type: Resource, score: 0.4778)
    Found: /api/v1/loyalty/redeem-points (type: Resource, score: 0.4744)

Found 6 unique entities

Step 3: Classifying intent...
  Calling LiteLLM for intent classification...


  search_results = qdrant_client.search(


  LLM Response: query_resource_ownership
Intent: query_resource_ownership

Step 4: Generating SPARQL query with resolved entity URIs...
  Calling LiteLLM for SPARQL generation...
  LLM Response received (length: 252 chars)
SPARQL Query:
PREFIX voc: <http://bvdheiden.nl/data/#voc/>
PREFIX data: <http://bvdheiden.nl/data/#>

SELECT ?team (COUNT(DISTINCT ?res) AS ?count)
WHERE {
  ?res voc:containedIn* ?parent .
  ?parent voc:ownedBy ?team .
}
GROUP BY ?team
ORDER BY DESC(?count)
LIMIT 1

Step 5: Validating SPARQL query...
  All required keywords found
Validation: Valid

Step 6: Executing query against Fuseki...
  Sending request to: http://localhost:3030/ontology/sparql
  Headers: {'Content-Type': 'application/sparql-query', 'Accept': 'application/sparql-results+json'}
  Auth: admin/***
  Response status code: 200
  Response headers: {'Date': 'Sat, 25 Oct 2025 13:06:29 GMT', 'Vary': 'Accept-Encoding, Origin', 'Fuseki-Request-Id': '57', 'Cache-Control': 'must-revalidate,no-cache,no-store'

## Question
Which team has ownership of the most endpoints. Either directly, or indirectly.

## Answer
Team Charlie owns the most endpoints (22 in total).


'Team Charlie owns the most endpoints (22 in total).'