## Import Libraries

In [1]:
import sys
import pandas as pd
import seaborn as sn
import sklearn as sk
import matplotlib as mb
from typing import List, Dict
from SPARQLWrapper import SPARQLWrapper, JSON

## Extract data from Wikidata

In [2]:
class WikiDataQueryResults:
    """
    A class that can be used to query data from Wikidata using SPARQL and return the results as a Pandas DataFrame or a list
    of values for a specific key.
    """
    def __init__(self, query: str):
        """
        Initializes the WikiDataQueryResults object with a SPARQL query string.
        :param query: A SPARQL query string.
        """
        self.user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0], sys.version_info[1])
        self.endpoint_url = "https://query.wikidata.org/sparql"
        self.sparql = SPARQLWrapper(self.endpoint_url, agent=self.user_agent)
        self.sparql.setQuery(query)
        self.sparql.setReturnFormat(JSON)

    def __transform2dicts(self, results: List[Dict]) -> List[Dict]:
        """
        Helper function to transform SPARQL query results into a list of dictionaries.
        :param results: A list of query results returned by SPARQLWrapper.
        :return: A list of dictionaries, where each dictionary represents a result row and has keys corresponding to the
        variables in the SPARQL SELECT clause.
        """
        new_results = []
        for result in results:
            new_result = {}
            for key in result:
                new_result[key] = result[key]['value']
            new_results.append(new_result)
        return new_results

    def _load(self) -> List[Dict]:
        """
        Helper function that loads the data from Wikidata using the SPARQLWrapper library, and transforms the results into
        a list of dictionaries.
        :return: A list of dictionaries, where each dictionary represents a result row and has keys corresponding to the
        variables in the SPARQL SELECT clause.
        """
        results = self.sparql.queryAndConvert()['results']['bindings']
        results = self.__transform2dicts(results)
        return results

    def load_as_dataframe(self) -> pd.DataFrame:
        """
        Executes the SPARQL query and returns the results as a Pandas DataFrame.
        :return: A Pandas DataFrame representing the query results.
        """
        results = self._load()
        return pd.DataFrame.from_dict(results)

In [None]:
query = """
SELECT ?country ?countryLabel WHERE {
  ?country wdt:P31 wd:Q6256 .
  ?country wdt:P30 wd:Q46 .
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
LIMIT 10
"""

wd_query = WikiDataQueryResults(query)
df = wd_query.load_as_dataframe()
print(df)


                              country countryLabel
0  http://www.wikidata.org/entity/Q20       Norway
1  http://www.wikidata.org/entity/Q27      Ireland
2  http://www.wikidata.org/entity/Q28      Hungary
3  http://www.wikidata.org/entity/Q29        Spain
4  http://www.wikidata.org/entity/Q31      Belgium
5  http://www.wikidata.org/entity/Q32   Luxembourg
6  http://www.wikidata.org/entity/Q33      Finland
7  http://www.wikidata.org/entity/Q34       Sweden
8  http://www.wikidata.org/entity/Q35      Denmark
9  http://www.wikidata.org/entity/Q36       Poland
