# SPARQL Agent: Basic Usage

This notebook demonstrates basic usage of SPARQL Agent for querying knowledge graphs.

## Setup

First, install SPARQL Agent:

In [None]:
# Install if needed
# !pip install sparql-agent

In [None]:
import os
from sparql_agent import SPARQLAgent
from sparql_agent.formatting import ResultFormatter
import pandas as pd

# Set API key (optional for basic queries)
# os.environ['ANTHROPIC_API_KEY'] = 'your-key-here'

## Example 1: Basic Query

Query UniProt for protein information:

In [None]:
# Initialize agent
agent = SPARQLAgent(endpoint="https://sparql.uniprot.org/sparql")

# Execute query
sparql = """
PREFIX up: <http://purl.uniprot.org/core/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>

SELECT ?protein ?name ?organism
WHERE {
    ?protein a up:Protein ;
             rdfs:label ?name ;
             up:organism ?organism .
    ?organism a up:Taxon ;
              up:scientificName "Homo sapiens" .
}
LIMIT 10
"""

results = agent.execute_sparql(sparql)
print(f"Found {len(results)} proteins")

# Display as DataFrame
df = pd.DataFrame(results)
df.head()

## Example 2: Schema Discovery

Discover what's available in the endpoint:

In [None]:
from sparql_agent.discovery import ConnectivityChecker

checker = ConnectivityChecker("https://sparql.uniprot.org/sparql")

# Check connectivity
if checker.check_connectivity():
    print("✓ Endpoint is accessible")
    
    # Get statistics
    stats = checker.get_statistics()
    print(f"\nEndpoint Statistics:")
    print(f"  Total triples: {stats.get('triple_count', 'N/A'):,}")
    print(f"  Classes: {stats.get('class_count', 'N/A')}")
    print(f"  Properties: {stats.get('property_count', 'N/A')}")

## Example 3: Working with Ontologies

Use OLS4 to search for ontology terms:

In [None]:
from sparql_agent.ontology import OLSClient

ols = OLSClient()

# Search for terms
terms = ols.search("diabetes", ontology="efo", limit=5)

print("Diabetes-related terms in EFO:")
for term in terms:
    print(f"\n{term['label']}")
    print(f"  IRI: {term['iri']}")
    print(f"  Definition: {term.get('definition', 'N/A')[:100]}...")

## Example 4: Formatting Results

Format query results in different ways:

In [None]:
formatter = ResultFormatter()

# Get some data
sparql = """
SELECT ?protein ?name
WHERE {
    ?protein a up:Protein ;
             rdfs:label ?name .
    FILTER(CONTAINS(?name, "insulin"))
}
LIMIT 5
"""

results = agent.execute_sparql(sparql)

# Format as table
print("Table format:")
print(formatter.to_table(results))

# Convert to DataFrame
df = formatter.to_dataframe(results)
display(df)

## Example 5: Multiple Endpoints

Query different endpoints:

In [None]:
# UniProt
uniprot = SPARQLAgent(endpoint="https://sparql.uniprot.org/sparql")
uniprot_results = uniprot.execute_sparql(
    "SELECT ?p WHERE { ?p a up:Protein } LIMIT 5"
)
print(f"UniProt: {len(uniprot_results)} results")

# Wikidata
wikidata = SPARQLAgent(endpoint="https://query.wikidata.org/sparql")
wikidata_results = wikidata.execute_sparql(
    "SELECT ?item WHERE { ?item wdt:P31 wd:Q7187 } LIMIT 5"
)
print(f"Wikidata: {len(wikidata_results)} results")

## Example 6: Error Handling

In [None]:
from sparql_agent.core import EndpointError, TimeoutError

try:
    # This might timeout or fail
    results = agent.execute_sparql(
        "SELECT * WHERE { ?s ?p ?o }"  # Very broad query
    )
except TimeoutError as e:
    print(f"Query timeout: {e}")
except EndpointError as e:
    print(f"Endpoint error: {e}")
except Exception as e:
    print(f"Error: {e}")

## Next Steps

- Explore [biomedical queries notebook](biomedical_queries.ipynb)
- Try [federated queries notebook](advanced_federation.ipynb)
- Read the [tutorials](https://github.com/yourusername/sparql-agent/docs/tutorials)