In [1]:
import requests

# SPARQL query to fetch all cities in Germany
query = """
SELECT ?city ?cityLabel WHERE {
  ?city wdt:P31/wdt:P279* wd:Q515;
        wdt:P17 wd:Q183.
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
"""

# Wikidata endpoint URL
url = "https://query.wikidata.org/sparql"

# Make the request to the Wikidata SPARQL endpoint
response = requests.get(url, params={'query': query, 'format': 'json'})
data = response.json()
nr=1
# Extract and print city names
for item in data['results']['bindings']:
    nr += 1
    print(nr, item['cityLabel']['value'])
    if nr > 10: 
        break; 

2 Zwickau
3 Plauen
4 Görlitz
5 Neu-Ulm
6 Freital
7 Dippoldiswalde
8 Pirna
9 Sebnitz
10 Freising
11 Dachau


In [2]:
import requests
import datetime
import pandas as pd

def query_wikidata(sparql):
    url = "https://query.wikidata.org/sparql"
    headers = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0'}
    data = requests.get(url, headers=headers, params={'format': 'json', 'query': sparql})
    return data.json()

def get_us_presidents(last_n_years=80):
    current_year = datetime.datetime.now().year
    start_year = current_year - last_n_years
    
    sparql = """
    SELECT ?president ?presidentLabel ?startTerm ?endTerm WHERE {
        ?president p:P39 ?statement.
        ?statement ps:P39 wd:Q11696; pq:P580 ?startTerm.
        OPTIONAL {?statement pq:P582 ?endTerm.}
        FILTER(YEAR(?startTerm) >= """ + str(start_year) + """)
        SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
    }
    ORDER BY ?startTerm
    """
    return query_wikidata(sparql)

def get_us_economic_data(year, property_code):
    # Common function to get economic data like GDP or unemployment rate
    sparql = f"""
    SELECT ?year ?value WHERE {{
        ?us wdt:{property_code} ?value;
            p:{property_code} [pq:P585 ?date].
        BIND(YEAR(?date) AS ?year)
        FILTER(?year = {year})
        FILTER(?value > 0)
    }}
    LIMIT 1
    """
    results = query_wikidata(sparql)
    data = results.get('results', {}).get('bindings', [])
    if data:
        return data[0]['value']['value']
    return None

def main():
    presidents_data = get_us_presidents()
    data_list = []

    for president in presidents_data['results']['bindings']:
        name = president['presidentLabel']['value']
        start_year = president['startTerm']['value'][:4]
        end_year = president['endTerm']['value'][:4] if 'endTerm' in president else datetime.datetime.now().year
        
        # GDP and unemployment rate queries
        start_gdp = get_us_economic_data(start_year, "P2131")  # GDP
        end_gdp = get_us_economic_data(end_year, "P2131")
        start_unemployment = get_us_economic_data(start_year, "P1198")  # Unemployment rate
        end_unemployment = get_us_economic_data(end_year, "P1198")

        data_list.append({
            "President": name,
            "Start Year": start_year,
            "Start GDP": start_gdp,
            "End Year": end_year,
            "End GDP": end_gdp,
            "Start Unemployment Rate": start_unemployment,
            "End Unemployment Rate": end_unemployment
        })

    df = pd.DataFrame(data_list)
    print(df)
    df.to_csv("us_presidents_economic_data.csv", index=False)

if __name__ == "__main__":
    main()



               President Start Year         Start GDP End Year  \
0        Harry S. Truman       1945              None     1953   
1   Dwight D. Eisenhower       1953              None     1961   
2        John F. Kennedy       1961  23909289978.5861     1963   
3      Lyndon B. Johnson       1963       12162100000     1969   
4          Richard Nixon       1969  1485994387.49716     1974   
5            Gerald Ford       1974   75931656814.657     1977   
6           Jimmy Carter       1977  3012914131.16971     1981   
7          Ronald Reagan       1981  3012914131.16971     1989   
8      George H. W. Bush       1989   789629629.62963     1993   
9           Bill Clinton       1993  13039352743.9616     2001   
10        George W. Bush       2001  13039352743.9616     2009   
11          Barack Obama       2009   789629629.62963     2017   
12          Donald Trump       2017  47168303744.1329     2021   
13            Phil Baker       2018         238308749     2024   
14        

# Next we read in all villages in Austria

In [3]:
# Function to extract the Wikidata entity ID from the URL
def extract_entity_id(url):
    return url.split('/')[-1]

In [4]:
import requests
import pandas as pd

def query_wikidata(sparql_query):
    """
    Sends a SPARQL query to the Wikidata endpoint and returns the results as a DataFrame.

    Args:
    sparql_query (str): A SPARQL query string.

    Returns:
    DataFrame: Results of the query as a pandas DataFrame.
    """
    url = "https://query.wikidata.org/sparql"
    headers = {
        "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
        "Accept": "application/sparql-results+json"
    }
    response = requests.get(url, headers=headers, params={'query': sparql_query, 'format': 'json'})
    if response.status_code == 200:
        data = response.json()
        results = data['results']['bindings']
        return pd.json_normalize(results)
    else:
        print(f"Failed to fetch data: HTTP {response.status_code}")
        return pd.DataFrame()


# SPARQL query to fetch village name and current mayor in Austria
sparql_query = """
    SELECT ?village ?villageLabel  WHERE {
      ?village wdt:P31 wd:Q532; # instance of a village
               wdt:P17 wd:Q40. # located in Austria
      SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
    }
    ORDER BY ?villageLabel
    """

villagesAUT = query_wikidata(sparql_query)
if not villagesAUT.empty:
        # Adding the 'entity' column by applying the function to each row in 'village.value'
    villagesAUT['entity'] = villagesAUT['village.value'].apply(extract_entity_id)
      #  for row in results:
         #   row['property'] = "test"
         #   print(results['village.value'].iloc[0].split('/')[-1])
        #    for iteri in results.index: 
           #  print(results[iteri]['village']['value'].split('/')[-1])
              
    print("Data retrieved successfully.")
    print(villagesAUT.iloc[150:160])
else:
    print("No data found.")

Data retrieved successfully.
    village.type                              village.value villageLabel.type  \
150          uri   http://www.wikidata.org/entity/Q57245580           literal   
151          uri   http://www.wikidata.org/entity/Q69472508           literal   
152          uri   http://www.wikidata.org/entity/Q54854521           literal   
153          uri   http://www.wikidata.org/entity/Q15107518           literal   
154          uri  http://www.wikidata.org/entity/Q118369915           literal   
155          uri   http://www.wikidata.org/entity/Q20826873           literal   
156          uri     http://www.wikidata.org/entity/Q856760           literal   
157          uri   http://www.wikidata.org/entity/Q18616739           literal   
158          uri     http://www.wikidata.org/entity/Q670057           literal   
159          uri     http://www.wikidata.org/entity/Q865764           literal   

                    villageLabel.value villageLabel.xml:lang      entity  
150 

# Get all wikidata info about some entity

In [5]:
import requests

def fetch_wikidata_entity(entity_id):
    """Fetches all data for a given Wikidata entity ID."""
    url = f"https://www.wikidata.org/wiki/Special:EntityData/{entity_id}.json"
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Failed to retrieve data: HTTP {response.status_code}")
        return None

def display_entity_data(entity_data):
    """Displays information about the Wikidata entity."""
    if entity_data:
        entities = entity_data.get('entities', {})
        if entity_id in entities:
            entity = entities[entity_id]
            print(f"Information for entity {entity_id}:")
            labels = entity.get('labels', {}).get('en', {}).get('value', 'No label')
            descriptions = entity.get('descriptions', {}).get('en', {}).get('value', 'No description')
            print(f"Label: {labels}")
            print(f"Description: {descriptions}")

            # Output claims (properties and values)
            claims = entity.get('claims', {})
            for prop, values in claims.items():
                for value in values:
                    mainsnak = value.get('mainsnak', {})
                    datavalue = mainsnak.get('datavalue', {}).get('value', 'No value')
                    print(f"Property {prop} has value {datavalue}")
        else:
            print("Entity ID not found in the response.")
    else:
        print("No data to display.")

# Example usage
entity_id = villagesAUT.iloc[160]['entity']
 
print(entity_id)
entity_data = fetch_wikidata_entity(entity_id)
display_entity_data(entity_data)

Q118369964
Information for entity Q118369964:
Label: Birnberg
Description: locality and village in Haus in Liezen District
Property P31 has value {'entity-type': 'item', 'numeric-id': 3257686, 'id': 'Q3257686'}
Property P31 has value {'entity-type': 'item', 'numeric-id': 532, 'id': 'Q532'}
Property P17 has value {'entity-type': 'item', 'numeric-id': 40, 'id': 'Q40'}
Property P131 has value {'entity-type': 'item', 'numeric-id': 666283, 'id': 'Q666283'}
Property P625 has value {'latitude': 47.411732, 'longitude': 13.728155, 'altitude': None, 'precision': 1e-06, 'globe': 'http://www.wikidata.org/entity/Q2'}
Property P373 has value Birnberg
Property P8384 has value 15780


# Try to find official website

In [7]:
import requests

def get_village_website(entity_code):
    # Endpoint URL for the Wikidata API
    url = 'https://www.wikidata.org/w/api.php'
    
    # Parameters for the API call
    params = {
        'action': 'wbgetentities',
        'ids': entity_code,
        'format': 'json',
        'props': 'claims'
    }
    
    # Making the request to the Wikidata API
    response = requests.get(url, params=params)
    data = response.json()
    
    # Accessing the claims section from the response
    claims = data['entities'][entity_code]['claims']
    
    # The property code for 'official website' on Wikidata is P856
    if 'P856' in claims:
        website_claim = claims['P856'][0]  # Taking the first claim for the official website
        website_url = website_claim['mainsnak']['datavalue']['value']
        return website_url
    else:
        return "No official website listed for this entity."

# Example usage
entity_code = 'Q674282'  # Pyhra

print(get_village_website(entity_code))

# Create an empty DataFrame with specified column names
df = pd.DataFrame(columns=['village', 'website'])

# Display the empty DataFrame
print(df)
for iteri in range(400,420): 
    entity_code = villagesAUT.iloc[iteri]['entity']
    website = get_village_website(entity_code)
    print(f'village ',villagesAUT.iloc[iteri]['villageLabel.value']," site: ", website, entity_code)

http://www.pyhra.gv.at/
Empty DataFrame
Columns: [village, website]
Index: []
village  Mitschig  site:  No official website listed for this entity. Q1939781
village  Mittelberg  site:  No official website listed for this entity. Q1728318
village  Mitteldorf  site:  No official website listed for this entity. Q1940114
village  Mitterberg  site:  No official website listed for this entity. Q1940542
village  Mitteregg  site:  No official website listed for this entity. Q44085990
village  Mitterndorf  site:  No official website listed for this entity. Q67013263
village  Mixnitz  site:  No official website listed for this entity. Q112589607
village  Modriach  site:  No official website listed for this entity. Q695603
village  Moosbach  site:  No official website listed for this entity. Q15835595
village  Mönchhof  site:  No official website listed for this entity. Q21880463
village  Mösern  site:  No official website listed for this entity. Q1958167
village  Mühltal  site:  No official webs