In [1]:
import sys
import pandas as pd
from typing import List, Dict
from SPARQLWrapper import SPARQLWrapper, JSON

class WikiDataQueryResults:
    """
    A class that can be used to query data from Wikidata using SPARQL and return the results as a Pandas DataFrame or a list
    of values for a specific key.
    """
    def __init__(self, query: str):
        """
        Initializes the WikiDataQueryResults object with a SPARQL query string.
        :param query: A SPARQL query string.
        """
        self.user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0], sys.version_info[1])
        self.endpoint_url = "https://query.wikidata.org/sparql"
        self.sparql = SPARQLWrapper(self.endpoint_url, agent=self.user_agent)
        self.sparql.setQuery(query)
        self.sparql.setReturnFormat(JSON)

    def __transform2dicts(self, results: List[Dict]) -> List[Dict]:
        """
        Helper function to transform SPARQL query results into a list of dictionaries.
        :param results: A list of query results returned by SPARQLWrapper.
        :return: A list of dictionaries, where each dictionary represents a result row and has keys corresponding to the
        variables in the SPARQL SELECT clause.
        """
        new_results = []
        for result in results:
            new_result = {}
            for key in result:
                new_result[key] = result[key]['value']
            new_results.append(new_result)
        return new_results

    def _load(self) -> List[Dict]:
        """
        Helper function that loads the data from Wikidata using the SPARQLWrapper library, and transforms the results into
        a list of dictionaries.
        :return: A list of dictionaries, where each dictionary represents a result row and has keys corresponding to the
        variables in the SPARQL SELECT clause.
        """
        results = self.sparql.queryAndConvert()['results']['bindings']
        results = self.__transform2dicts(results)
        return results

    def load_as_dataframe(self) -> pd.DataFrame:
        """
        Executes the SPARQL query and returns the results as a Pandas DataFrame.
        :return: A Pandas DataFrame representing the query results.
        """
        results = self._load()
        return pd.DataFrame.from_dict(results)

In [2]:
query = """
SELECT ?city ?cityLabel ?location ?locationLabel ?founding_date
WHERE {
  ?city wdt:P31/wdt:P279* wd:Q515.
  ?city wdt:P17 wd:Q30.
  ?city wdt:P625 ?location.
  ?city wdt:P571 ?founding_date.
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
"""

In [4]:
data_extracter = WikiDataQueryResults(query)
df = data_extracter.load_as_dataframe()
df

Unnamed: 0,city,founding_date,location,cityLabel,locationLabel
0,http://www.wikidata.org/entity/Q62,1776-06-29T00:00:00Z,Point(-122.416388888 37.7775),San Francisco,Point(-122.416388888 37.7775)
1,http://www.wikidata.org/entity/Q65,1781-09-04T00:00:00Z,Point(-118.24368 34.05223),Los Angeles,Point(-118.24368 34.05223)
2,http://www.wikidata.org/entity/Q5917,1909-02-17T00:00:00Z,Point(-117.999722222 33.692777777),Huntington Beach,Point(-117.999722222 33.692777777)
3,http://www.wikidata.org/entity/Q16552,1769-07-16T00:00:00Z,Point(-117.1625 32.715),San Diego,Point(-117.1625 32.715)
4,http://www.wikidata.org/entity/Q16553,1777-11-29T00:00:00Z,Point(-121.872777777 37.304166666),San Jose,Point(-121.872777777 37.304166666)
...,...,...,...,...,...
5866,http://www.wikidata.org/entity/Q49244,1869-01-01T00:00:00Z,Point(-117.881388888 33.740833333),Santa Ana,Point(-117.881388888 33.740833333)
5867,http://www.wikidata.org/entity/Q214164,1869-01-01T00:00:00Z,Point(-122.1925 47.614444444),Bellevue,Point(-122.1925 47.614444444)
5868,http://www.wikidata.org/entity/Q484678,1866-01-01T00:00:00Z,Point(-122.268055555 37.870277777),Berkeley,Point(-122.268055555 37.870277777)
5869,http://www.wikidata.org/entity/Q913543,1868-01-01T00:00:00Z,Point(-121.763611111 36.92),Watsonville,Point(-121.763611111 36.92)
