In [1]:
import pandas as pd
import SPARQLWrapper as sw

In [2]:
import requests

ModuleNotFoundError: No module named 'requests'

In [3]:
query = """
SELECT ?city ?cityLabel ?location ?locationLabel ?founding_date
WHERE {
  ?city wdt:P31/wdt:P279* wd:Q515.
  ?city wdt:P17 wd:Q30.
  ?city wdt:P625 ?location.
  ?city wdt:P571 ?founding_date.
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
"""

In [5]:
import sys
import pandas as pd
from typing import List, Dict
from SPARQLWrapper import SPARQLWrapper, JSON

class WikiDataQueryResults:
    """
    A class that can be used to query data from Wikidata using SPARQL and return the results as a Pandas DataFrame or a list
    of values for a specific key.
    """
    def __init__(self, query: str):
        """
        Initializes the WikiDataQueryResults object with a SPARQL query string.
        :param query: A SPARQL query string.
        """
        self.user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0], sys.version_info[1])
        self.endpoint_url = "https://query.wikidata.org/sparql"
        self.sparql = SPARQLWrapper(self.endpoint_url, agent=self.user_agent)
        self.sparql.setQuery(query)
        self.sparql.setReturnFormat(JSON)

    def __transform2dicts(self, results: List[Dict]) -> List[Dict]:
        """
        Helper function to transform SPARQL query results into a list of dictionaries.
        :param results: A list of query results returned by SPARQLWrapper.
        :return: A list of dictionaries, where each dictionary represents a result row and has keys corresponding to the
        variables in the SPARQL SELECT clause.
        """
        new_results = []
        for result in results:
            new_result = {}
            for key in result:
                new_result[key] = result[key]['value']
            new_results.append(new_result)
        return new_results

    def _load(self) -> List[Dict]:
        """
        Helper function that loads the data from Wikidata using the SPARQLWrapper library, and transforms the results into
        a list of dictionaries.
        :return: A list of dictionaries, where each dictionary represents a result row and has keys corresponding to the
        variables in the SPARQL SELECT clause.
        """
        results = self.sparql.queryAndConvert()['results']['bindings']
        results = self.__transform2dicts(results)
        return results

    def load_as_dataframe(self) -> pd.DataFrame:
        """
        Executes the SPARQL query and returns the results as a Pandas DataFrame.
        :return: A Pandas DataFrame representing the query results.
        """
        results = self._load()
        return pd.DataFrame.from_dict(results)

In [6]:
data_extracter = WikiDataQueryResults(query)

In [7]:
data_extracter

<__main__.WikiDataQueryResults at 0x116f6134320>

In [8]:
df = data_extracter.load_as_dataframe()
print(df.head())

                                    city         founding_date  \
0     http://www.wikidata.org/entity/Q62  1776-06-29T00:00:00Z   
1     http://www.wikidata.org/entity/Q65  1781-09-04T00:00:00Z   
2  http://www.wikidata.org/entity/Q16552  1769-07-16T00:00:00Z   
3  http://www.wikidata.org/entity/Q16553  1777-11-29T00:00:00Z   
4  http://www.wikidata.org/entity/Q16739  1888-01-01T00:00:00Z   

                             location      cityLabel  \
0        Point(-122.419444444 37.775)  San Francisco   
1          Point(-118.24368 34.05223)    Los Angeles   
2             Point(-117.1625 32.715)      San Diego   
3  Point(-121.872777777 37.304166666)       San Jose   
4  Point(-118.195555555 33.768055555)     Long Beach   

                        locationLabel  
0        Point(-122.419444444 37.775)  
1          Point(-118.24368 34.05223)  
2             Point(-117.1625 32.715)  
3  Point(-121.872777777 37.304166666)  
4  Point(-118.195555555 33.768055555)  


In [12]:
from faker import Faker
fake = Faker()

In [16]:
fake.name()

'Kristy Mcguire'

In [36]:
fake.company()


'Jones-Johnson'

In [7]:
import networkx as nx
import json
import random
from faker import Faker



In [8]:
query = """
        SELECT ?ceo ?ceoLabel ?company ?companyLabel WHERE {
          ?company wdt:P169 ?ceo.
          SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
        }
        LIMIT 20
        """
