# Query-Test for Wikidata and DBpedia

In [1]:
pip install requests SPARQLWrapper pandas

Collecting requests
  Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting SPARQLWrapper
  Using cached SPARQLWrapper-2.0.0-py3-none-any.whl.metadata (2.0 kB)
Collecting pandas
  Downloading pandas-2.2.3-cp313-cp313-win_amd64.whl.metadata (19 kB)
Collecting charset-normalizer<4,>=2 (from requests)
  Downloading charset_normalizer-3.4.0-cp313-cp313-win_amd64.whl.metadata (34 kB)
Collecting idna<4,>=2.5 (from requests)
  Using cached idna-3.10-py3-none-any.whl.metadata (10 kB)
Collecting urllib3<3,>=1.21.1 (from requests)
  Using cached urllib3-2.2.3-py3-none-any.whl.metadata (6.5 kB)
Collecting certifi>=2017.4.17 (from requests)
  Using cached certifi-2024.8.30-py3-none-any.whl.metadata (2.2 kB)
Collecting rdflib>=6.1.1 (from SPARQLWrapper)
  Using cached rdflib-7.0.0-py3-none-any.whl.metadata (11 kB)
Collecting numpy>=1.26.0 (from pandas)
  Downloading numpy-2.1.2-cp313-cp313-win_amd64.whl.metadata (59 kB)
Collecting pytz>=2020.1 (from pandas)
  Using cached pytz-

In [2]:
from SPARQLWrapper import SPARQLWrapper, JSON
import json
import pandas as pd

In [3]:
# Wikidata SPARQL Endpoint
WIKIDATA_ENDPOINT = "https://skynet.coypu.org/sparql"
# DBpedia SPARQL Endpoint
DBPEDIA_ENDPOINT = "http://localhost:8890/sparql"

def query_sparql(endpoint, query):
    """Send a SPARQL query to the specified endpoint and return the results."""
    sparql = SPARQLWrapper(endpoint)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    try:
        results = sparql.query().convert()
        return results
    except Exception as e:
        print(f"Query failed: {e}")
        return None

In [5]:
import requests

# Load the LC-QuAD 2.0 dataset (sample from GitHub)
url = "https://raw.githubusercontent.com/AskNowQA/LC-QuAD2.0/refs/heads/master/dataset/train.json"
response = requests.get(url)
lc_quad_data = response.json()

# Inspect the first question
print(json.dumps(lc_quad_data[0], indent=2))


{
  "NNQT_question": "What is the {periodical literature} for {mouthpiece} of {Delta Air Lines}",
  "uid": 19719,
  "subgraph": "simple question right",
  "template_index": 65,
  "question": "What periodical literature does Delta Air Lines use as a moutpiece?",
  "sparql_wikidata": " select distinct ?obj where { wd:Q188920 wdt:P2813 ?obj . ?obj wdt:P31 wd:Q1002697 } ",
  "sparql_dbpedia18": "select distinct ?obj where { ?statement <http://www.w3.org/1999/02/22-rdf-syntax-ns#subject> <http://wikidata.dbpedia.org/resource/Q188920> . ?statement <http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate> <http://www.wikidata.org/entity/P2813> . ?statement <http://www.w3.org/1999/02/22-rdf-syntax-ns#object> ?obj . ?obj <http://www.wikidata.org/entity/P31> <http://wikidata.dbpedia.org/resource/Q1002697> } ",
  "template": " <S P ?O ; ?O instanceOf Type>",
  "answer": [],
  "template_id": 1,
  "paraphrased_question": "What is Delta Air Line's periodical literature mouthpiece?"
}


In [20]:
def extract_and_run_queries(lc_quad_data):
    results_list = []

    for entry in lc_quad_data:
        # Extract the question and SPARQL queries based on the structure of your data
        question = entry.get("question", "No question provided")
        sparql_wikidata = entry.get("sparql_wikidata", None)
        sparql_dbpedia = entry.get("sparql_dbpedia18", None)

        # Test both SPARQL queries (Wikidata and DBpedia)
        if sparql_wikidata:
            result_wikidata = query_sparql(WIKIDATA_ENDPOINT, sparql_wikidata)
            success_wikidata = result_wikidata is not None
            results_list.append({
                "Question": question,
                "SPARQL_Query": sparql_wikidata,
                "Endpoint": "Wikidata",
                "Success": success_wikidata,
                "Result": result_wikidata if success_wikidata else "Error"
            })

        if sparql_dbpedia:
            result_dbpedia = query_sparql(DBPEDIA_ENDPOINT, sparql_dbpedia)
            success_dbpedia = result_dbpedia is not None
            results_list.append({
                "Question": question,
                "SPARQL_Query": sparql_dbpedia,
                "Endpoint": "DBpedia",
                "Success": success_dbpedia,
                "Result": result_dbpedia if success_dbpedia else "Error"
            })
    
    # Convert list of results to a DataFrame at once (efficient)
    results_df = pd.DataFrame(results_list)
    return results_df

# Load the LC-QuAD 2.0 dataset (sample from GitHub)
url = "https://raw.githubusercontent.com/AskNowQA/LC-QuAD2.0/refs/heads/master/dataset/train.json"
response = requests.get(url)
lc_quad_data = response.json()

# Run the queries and get the results
results_df = extract_and_run_queries(lc_quad_data)

# Save results to a CSV file
results_df.to_csv("lc_quad_sparql_results.csv", index=False)

# Optionally, save as JSON
results_df.to_json("lc_quad_sparql_results.json", orient="records")

# Print the first few rows of the results DataFrame
print(results_df.head())


Query failed: EndPointNotFound: It was not possible to connect to the given endpoint: check it is correct. 

Response:
b'HTTP ERROR 404 Not Found\nURI: http://skynet.coypu.org/sparql?query=+select+distinct+%3Fobj+where+%7B+wd%3AQ188920+wdt%3AP2813+%3Fobj+.+%3Fobj+wdt%3AP31+wd%3AQ1002697+%7D+&format=json&output=json&results=json\nSTATUS: 404\nMESSAGE: Not Found\n'
Query failed: EndPointNotFound: It was not possible to connect to the given endpoint: check it is correct. 

Response:
b'HTTP ERROR 404 Not Found\nURI: http://skynet.coypu.org/sparql?query=SELECT+%3Fanswer+WHERE+%7B+wd%3AQ169794+wdt%3AP26+%3FX+.+%3FX+wdt%3AP22+%3Fanswer%7D&format=json&output=json&results=json\nSTATUS: 404\nMESSAGE: Not Found\n'
Query failed: EndPointNotFound: It was not possible to connect to the given endpoint: check it is correct. 

Response:
b'HTTP ERROR 404 Not Found\nURI: http://skynet.coypu.org/sparql?query=ASK+WHERE+%7B+wd%3AQ174843+wdt%3AP106+wd%3AQ1804811+.+wd%3AQ174843+wdt%3AP106+wd%3AQ33231+%7D&form

KeyboardInterrupt: 

In [24]:
# Test endpoints
from SPARQLWrapper import SPARQLWrapper, JSON

# Define the SPARQL endpoints
WIKIDATA_ENDPOINT = "https://skynet.coypu.org/sparql"
DBPEDIA_ENDPOINT = "http://localhost:8890/sparql"

def query_sparql(endpoint, query):
    """Send a SPARQL query to the specified endpoint and return the results."""
    sparql = SPARQLWrapper(endpoint)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    sparql.setTimeout(60)  # Timeout of 60 seconds
    try:
        results = sparql.query().convert()
        return results
    except Exception as e:
        print(f"Query failed: {e}")
        return None

# # Test a simple query on Wikidata
# test_query_wikidata = """
# SELECT ?item WHERE {
#   ?item wdt:P31 wd:Q5.
# } LIMIT 1
# """
# print("Testing Wikidata...")
# wikidata_result = query_sparql(WIKIDATA_ENDPOINT, test_query_wikidata)
# print(wikidata_result)

# Test a simple query on DBpedia with prefix correction
test_query_dbpedia = """
PREFIX dbo: <http://dbpedia.org/ontology/>
SELECT ?entity WHERE {
  ?entity rdf:type ?type.
} LIMIT 10
"""

print("Testing DBpedia...")
dbpedia_result = query_sparql(DBPEDIA_ENDPOINT, test_query_dbpedia)
print(dbpedia_result)


Testing DBpedia...
{'head': {'link': [], 'vars': ['entity']}, 'results': {'distinct': False, 'ordered': True, 'bindings': [{'entity': {'type': 'uri', 'value': 'http://www.openlinksw.com/virtrdf-data-formats#default-iid'}}, {'entity': {'type': 'uri', 'value': 'http://www.openlinksw.com/virtrdf-data-formats#default-iid-nullable'}}, {'entity': {'type': 'uri', 'value': 'http://www.openlinksw.com/virtrdf-data-formats#default-iid-nonblank'}}, {'entity': {'type': 'uri', 'value': 'http://www.openlinksw.com/virtrdf-data-formats#default-iid-nonblank-nullable'}}, {'entity': {'type': 'uri', 'value': 'http://www.openlinksw.com/virtrdf-data-formats#default'}}, {'entity': {'type': 'uri', 'value': 'http://www.openlinksw.com/virtrdf-data-formats#default-nullable'}}, {'entity': {'type': 'uri', 'value': 'http://www.openlinksw.com/virtrdf-data-formats#sql-varchar'}}, {'entity': {'type': 'uri', 'value': 'http://www.openlinksw.com/virtrdf-data-formats#sql-varchar-nullable'}}, {'entity': {'type': 'uri', 'val

## Wikidata testing the end point after finding right query on the website

In [26]:
from SPARQLWrapper import SPARQLWrapper, JSON

# Define the Skynet Coypu SPARQL endpoint
WIKIDATA_ENDPOINT = "https://skynet.coypu.org/sparql"

def query_sparql(endpoint, query):
    """Send a SPARQL query to the specified endpoint and return the results."""
    sparql = SPARQLWrapper(endpoint)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)  # Force JSON return format
    sparql.setMethod('GET')       # Use GET method (not POST)
    sparql.setTimeout(60)         # Timeout of 60 seconds
    try:
        results = sparql.query().convert()
        return results
    except Exception as e:
        print(f"Query failed: {e}")
        return None

# Test a simple query on Wikidata via Skynet Coypu
test_query_wikidata = """
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT ?item ?itemLabel
WHERE
{
  ?item wdt:P31 wd:Q146.  # Must be a cat
  OPTIONAL { ?item rdfs:label ?itemLabel. }  # Get label
  FILTER(LANG(?itemLabel) = "en" || LANG(?itemLabel) = "")
} LIMIT 10
"""

print("Testing Wikidata via Skynet Coypu...")
wikidata_result = query_sparql(WIKIDATA_ENDPOINT, test_query_wikidata)
print(wikidata_result)

Testing Wikidata via Skynet Coypu...
Query failed: EndPointNotFound: It was not possible to connect to the given endpoint: check it is correct. 

Response:
b'HTTP ERROR 404 Not Found\nURI: http://skynet.coypu.org/sparql?query=%0APREFIX+wdt%3A+%3Chttp%3A//www.wikidata.org/prop/direct/%3E%0APREFIX+wd%3A+%3Chttp%3A//www.wikidata.org/entity/%3E%0APREFIX+rdfs%3A+%3Chttp%3A//www.w3.org/2000/01/rdf-schema%23%3E%0A%0ASELECT+%3Fitem+%3FitemLabel%0AWHERE%0A%7B%0A++%3Fitem+wdt%3AP31+wd%3AQ146.++%23+Must+be+a+cat%0A++OPTIONAL+%7B+%3Fitem+rdfs%3Alabel+%3FitemLabel.+%7D++%23+Get+label%0A++FILTER%28LANG%28%3FitemLabel%29+%3D+%22en%22+%7C%7C+LANG%28%3FitemLabel%29+%3D+%22%22%29%0A%7D+LIMIT+10%0A&format=json&output=json&results=json\nSTATUS: 404\nMESSAGE: Not Found\n'
None


In [29]:
import sys
import pandas as pd
from typing import List, Dict
from SPARQLWrapper import SPARQLWrapper, JSON

class WikiDataQueryResults:
    """
    A class that can be used to query data from Wikidata using SPARQL and return the results as a Pandas DataFrame or a list
    of values for a specific key.
    """
    def __init__(self, query: str):
        """
        Initializes the WikiDataQueryResults object with a SPARQL query string.
        :param query: A SPARQL query string.
        """
        self.user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0], sys.version_info[1])
        self.endpoint_url = "https://query.wikidata.org/sparql"
        self.sparql = SPARQLWrapper(self.endpoint_url, agent=self.user_agent)
        self.sparql.setQuery(query)
        self.sparql.setReturnFormat(JSON)

    def __transform2dicts(self, results: List[Dict]) -> List[Dict]:
        """
        Helper function to transform SPARQL query results into a list of dictionaries.
        :param results: A list of query results returned by SPARQLWrapper.
        :return: A list of dictionaries, where each dictionary represents a result row and has keys corresponding to the
        variables in the SPARQL SELECT clause.
        """
        new_results = []
        for result in results:
            new_result = {}
            for key in result:
                new_result[key] = result[key]['value']
            new_results.append(new_result)
        return new_results

    def _load(self) -> List[Dict]:
        """
        Helper function that loads the data from Wikidata using the SPARQLWrapper library, and transforms the results into
        a list of dictionaries.
        :return: A list of dictionaries, where each dictionary represents a result row and has keys corresponding to the
        variables in the SPARQL SELECT clause.
        """
        results = self.sparql.queryAndConvert()['results']['bindings']
        results = self.__transform2dicts(results)
        return results

    def load_as_dataframe(self) -> pd.DataFrame:
        """
        Executes the SPARQL query and returns the results as a Pandas DataFrame.
        :return: A Pandas DataFrame representing the query results.
        """
        results = self._load()
        return pd.DataFrame.from_dict(results)
    
data_extracter = WikiDataQueryResults(query)
df = data_extracter.load_as_dataframe()
print(df.head())

NameError: name 'query' is not defined

NameError: name 'query' is not defined