# Open Data Zurich API

### https://www.zuerich.com/en/api/v2/data


In [1]:
import logging
import pandas as pd
import requests

In [2]:
logger = logging.getLogger(__name__)
pd.set_option("display.max_columns", None)

### data fetching functions


In [3]:
def fetch_zurich_data(dataset_id: int) -> pd.DataFrame:
    """
    Fetch data from Zürich Open Data API v2.0 and convert to pandas DataFrame.

    Args:
        dataset_id: The ID of the dataset to fetch

    Returns:
        pd.DataFrame: DataFrame containing the fetched data

    Raises:
        requests.RequestException: If API request fails
        ValueError: If response data is invalid
    """
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger(__name__)

    base_url = "https://www.zuerich.com/en/api/v2/data"
    params = {"id": dataset_id}

    try:
        logger.info(f"Fetching data from API for dataset {dataset_id}")
        response = requests.get(base_url, params=params)
        response.raise_for_status()

        data = response.json()
        df = pd.DataFrame(data)

        if df.empty:
            raise ValueError("Received empty dataset from API")

        logger.info(f"Successfully fetched {len(df)} records")
        return df

    except requests.RequestException as e:
        logger.error(f"API request failed: {e!s}")
        raise
    except ValueError as e:
        logger.error(f"Data validation failed: {e!s}")
        raise
    except Exception as e:
        logger.error(f"Unexpected error: {e!s}")
        raise

# fetch data


### dataset overview


In [4]:
dataset_overview = fetch_zurich_data(dataset_id={})

INFO:__main__:Fetching data from API for dataset {}
INFO:__main__:Successfully fetched 162 records


In [5]:
dataset_overview["id"] = dataset_overview["id"].astype(int)
dataset_overview["parent"] = dataset_overview["parent"].astype(int)
dataset_overview["name_en"] = dataset_overview["name"].apply(
    lambda x: x.get("en", "") if isinstance(x, dict) else ""
)

In [6]:
# all datasets
print(dataset_overview.sort_values("id")["name_en"].unique().tolist())

['Accommodation', 'Attractions', 'Place', 'Gastronomy', 'Events', 'Hotels', 'Apartments', 'B&Bs', 'Hostels', 'Campsites', 'Architecture', 'Vantage Points', 'Monuments', 'Works of Art', 'Squares & Streets', 'Churches', 'Shopping', 'Culture', 'Sport', 'Wellness', 'Nature', 'Nightlife', 'Restaurants', 'Coffee Houses & Tea Rooms', 'Bars & Lounges', 'Online Bookings', 'Exhibitions', 'Parties', 'Festivals', 'Comedy', 'Sports Events', 'Fashion & Accessoires', 'Swissness', 'Watches & Jewelry', 'Souvenirs & Gifts', 'Food & Delicacy', 'Markets', 'Movie', 'Music', 'Stages', 'Museums', 'Walks', 'Hikes', 'Bike Tours', 'Mountain Biking', 'Swimming', 'Inline Skating', 'Running', 'Summer Tobogganing', 'Climbing', 'Golf', 'Waterskiing/Wakeboarding', 'Motor Boat Hire', 'Sailing', 'SUP Stand Up Paddling', 'Pedalos', 'Skiing/Snowboarding', 'Tobogganing', 'Ski Touring', 'Snowshoe Trekking', 'Ice Skating', 'Cross-Country Skiing', 'Mountains', 'Water', 'Parks & Gardens', 'Zoos & Animals', 'Clubs & Discos', '

In [7]:
# all parent datasets
print(
    dataset_overview[
        dataset_overview["id"].isin(dataset_overview["parent"].unique().tolist())
    ]
    .sort_values("id")["name_en"]
    .unique()
    .tolist()
)

['Accommodation', 'Attractions', 'Place', 'Gastronomy', 'Events', 'Shopping', 'Culture', 'Sport', 'Nature', 'Nightlife', 'Restaurants', 'Coffee Houses & Tea Rooms', 'Bars & Lounges', 'Stages', 'Museums', 'Clubs & Discos', 'Cuisine', 'Meal', 'Swiss Specialties', 'Italian', 'Asian', 'Atmosphere', 'Products', 'Restaurant Features']


In [8]:
# set datasets to include
include_datasets = [
    "Accommodation",
    "Attractions",
    "Place",
    "Gastronomy",
    "Events",
    "Shopping",
    "Culture",
    "Sport",
    "Nature",
    "Nightlife",
    "Restaurants",
    "Coffee Houses & Tea Rooms",
    "Bars & Lounges",
    "Stages",
    "Museums",
    "Clubs & Discos",
    "Cuisine",
    "Meal",
    "Swiss Specialties",
    "Italian",
    "Asian",
    "Atmosphere",
    "Products",
    "Restaurant Features",
]

In [9]:
data = pd.DataFrame()

for dataset_id in (
    dataset_overview[dataset_overview["name_en"].isin(include_datasets)]["id"]
    .unique()
    .tolist()
):
    try:
        df = fetch_zurich_data(dataset_id=dataset_id)
        df["dataset_id"] = dataset_id
        data = pd.concat([data, df])
    except Exception as e:
        logger.error(f"Error: {e!s}")

INFO:__main__:Fetching data from API for dataset 71
INFO:__main__:Successfully fetched 189 records
INFO:__main__:Fetching data from API for dataset 1471
INFO:__main__:Successfully fetched 191 records
INFO:__main__:Fetching data from API for dataset 188
ERROR:__main__:API request failed: 500 Server Error: Internal Server Error for url: https://www.zuerich.com/en/api/v2/data?id=188
ERROR:__main__:Error: 500 Server Error: Internal Server Error for url: https://www.zuerich.com/en/api/v2/data?id=188
INFO:__main__:Fetching data from API for dataset 72
INFO:__main__:Successfully fetched 109 records
INFO:__main__:Fetching data from API for dataset 103
INFO:__main__:Successfully fetched 223 records
INFO:__main__:Fetching data from API for dataset 135
INFO:__main__:Successfully fetched 19 records
INFO:__main__:Fetching data from API for dataset 102
ERROR:__main__:API request failed: 500 Server Error: Internal Server Error for url: https://www.zuerich.com/en/api/v2/data?id=102
ERROR:__main__:Erro

## data cleaning


In [10]:
data_cleaned = data.copy()
data_cleaned = data_cleaned.drop_duplicates(subset="identifier", keep="first")
data_cleaned["name_en"] = data_cleaned["name"].apply(
    lambda x: x.get("en", "") if isinstance(x, dict) else ""
)
data_cleaned["latitude"] = data_cleaned["geoCoordinates"].apply(
    lambda x: x.get("latitude", "") if isinstance(x, dict) else ""
)
data_cleaned["longitude"] = data_cleaned["geoCoordinates"].apply(
    lambda x: x.get("longitude", "") if isinstance(x, dict) else ""
)

### store data


In [11]:
output_data = (
    data_cleaned.filter(["identifier", "@type", "name_en", "latitude", "longitude"])
    .rename(columns={"name_en": "name", "@type": "type"})
    .sort_values(["type", "name"])
)
output_data.to_csv("../local_data/tourism.csv", index=False)

In [12]:
output_data.head()

Unnamed: 0,identifier,type,name,latitude,longitude
71,88375,BarOrPub,2. Akt Restaurant & Bar,47.36983,8.533639
72,88366,BarOrPub,4. Akt Restaurant & Bar,47.389523,8.521725
178,2033,BarOrPub,67 Sportsbar,47.176646,8.50768
37,326061,BarOrPub,Acid,47.377111,8.526034
55,168166,BarOrPub,Aelpli Bar,47.371387,8.543551
