In [12]:
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas as pd

from Code.UtilityFunctions.get_data_path import get_path

In [2]:
def wikidata_query(sparql_query: str):
    # From https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples#Cats
    user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0], sys.version_info[1])
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql", agent=user_agent)
    sparql.setQuery(sparql_query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    results_df = pd.json_normalize(results['results']['bindings'])
    return results_df

# Cities in the USA

In [32]:
# P31 instance of
# P131 located in the administrative territorial entity
# P279 subclass of
# P585 point in time
# P1082 population
# Q515 city
# Q35657 U.S. state

query_all_us_cities = """
SELECT DISTINCT ?city ?cityLabel ?state ?population
WHERE {
    ?city wdt:P31/wdt:P279* wd:Q515 .
    ?city wdt:P131 ?county .
    ?county wdt:P131 ?state .
    ?state wdt:P31 wd:Q35657 .
    ?city p:P1082 ?statement .
    ?statement ps:P1082 ?population .
    ?statement pq:P585 ?date .
    FILTER NOT EXISTS {
        ?city p:P1082/pq:P585 ?date2 .
        FILTER(?date2 > ?date)
    }
    SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" . }
}
"""

result_us_cities = wikidata_query(query_all_us_cities)

In [None]:
result_us_cities = result_us_cities[["city.value", "cityLabel.value", "state.value", "population.value"]]
us_city_renames = {"city.value": "CityCode", "cityLabel.value": "City", "state.value": "StateCode", "population.value": "Population"}
result_us_cities.rename(columns=us_city_renames, inplace=True)

In [None]:
result_us_cities.to_csv(path_or_buf=get_path("usCities.csv"), index=False)

# States in the USA

In [34]:
# States in US
# P31 instance of
# Q35657 U.S. state

query_all_us_states = """
SELECT ?state ?stateLabel
WHERE {
  ?state wdt:P31 wd:Q35657 .
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" . }
}
"""

result_us_states = wikidata_query(query_all_us_states)

In [35]:
result_us_states = result_us_states[["state.value", "stateLabel.value"]]
us_state_renames = {"state.value": "StateCode", "stateLabel.value": "State"}
result_us_states.rename(columns=us_state_renames, inplace=True)

In [36]:
result_us_states.to_csv(path_or_buf=get_path("usStates.csv"))

# Cities in Canada

In [37]:
# Cities in Canada
# P131 located in the administrative territorial entity
# P17 country
# Q16 Canada

query_all_canada_cities = """
SELECT ?city ?cityLabel ?province ?population
WHERE {
    ?city wdt:P31/wdt:P279* wd:Q515 .
    {
        ?city wdt:P131 ?county .
        ?county wdt:P131 ?province .
        ?province wdt:P31 wd:Q11828004 .
    }
    UNION
    {
        ?city wdt:P131 ?province .
        ?province wdt:P31 wd:Q11828004 .
    }
    ?city p:P1082 ?statement .
    ?statement ps:P1082 ?population .
    ?statement pq:P585 ?date .
    FILTER NOT EXISTS {
        ?city p:P1082/pq:P585 ?date2 .
        FILTER(?date2 > ?date)
    }
    SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" . }
}
"""

result_can_cities = wikidata_query(query_all_canada_cities)

In [38]:
result_can_cities = result_can_cities[["city.value", "cityLabel.value", "province.value", "population.value"]]
can_city_renames = {"city.value": "CityCode", "cityLabel.value": "City", "province.value": "ProvinceCode", "population.value": "Population"}
result_can_cities.rename(columns=can_city_renames, inplace=True)

In [39]:
result_can_cities.to_csv(path_or_buf=get_path("canadaCities.csv"))

# Provinces of Canada

In [40]:
# Provinces of Canada
# P17 country
# Q11828004 Canada

query_all_canada_provinces = """
SELECT ?province ?provinceLabel
WHERE {
  ?province wdt:P31 wd:Q11828004 .
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" . }
}
"""

result_can_provinces = wikidata_query(query_all_canada_provinces)

In [41]:
result_can_provinces = result_can_provinces[["province.value", "provinceLabel.value"]]
can_province_renames = {"province.value": "ProvinceCode", "provinceLabel.value": "Province"}

In [42]:
result_can_provinces.to_csv(path_or_buf=get_path("canadaProvinces.csv"))