## Setup

In [48]:
import json

from pyproj import Transformer

In [34]:
# load data

with open('../data/buildings.geojson', 'r') as file:
    data = json.load(file)

## Data Exploration

In [35]:
# How many features are in the dataset?

total_features = len(data['features'])
print(f'{total_features} building entries in the dataset')

393297 building entries in the dataset


In [36]:
# How many buildings are there with the Class 1252 (Behälter, Silos und Lagergebäude )?

count_1252 = sum(1 for feature in data['features'] if feature['properties'].get('buildingClass') == 1252)

print(f'{count_1252} entries with buildingClass 1252 (Behälter, Silos und Lagergebäude)')

4766 entries with buildingClass 1252 (Behälter, Silos und Lagergebäude )


In [38]:
# How many of those buildings do have the status 1004 (Bestehend)?

# Count buildings with specified class and status
count = sum(
    1
    for feature in data['features']
    if feature['properties'].get('buildingClass') == 1252 and
       feature['properties'].get('buildingStatus') == 1004
)

print(f'{count} buildings with class 1252 and status 1004 (Bestehend)')


4031 buildings with class 1252 and status 1004 (Bestehend)


With the data from before this means that from the 4'766 entries that have the correct class, 4'031 are technically "available" as of now.

In [40]:
# How many of those buildings have the buildingCategory 1060 (Gebäude ohne Wohnnutzung)

count = sum(
    1
    for feature in data['features']
    if feature['properties'].get('buildingClass') == 1252 and
       feature['properties'].get('buildingStatus') == 1004 and
       feature['properties'].get('buildingCategory') == 1060
)


print(f'{count} buildings with class 1252, status 1004 and category 1060 (Gebäude ohne Wohnnutzung)')


3605 buildings with class 1252, status 1004 and category 1060 (Gebäude ohne Wohnnutzung)


This means we are left with 3'605 possible storage locations in the canton of Zurich.

## Data Preprocessing

In [41]:
# Remove the 'canton' property from each feature, since we just have data from "ZH"

for feature in data['features']:
    if 'canton' in feature['properties']:
        del feature['properties']['canton']

In [42]:
# filter out unneeded data

filtered_features = [
    feature
    for feature in data['features']
        if feature['properties'].get('buildingClass') == 1252 and
            feature['properties'].get('buildingStatus') == 1004 and
            feature['properties'].get('buildingCategory') == 1060
]

data['features'] = filtered_features

In [43]:
# transform spatial data from EPSG:2056 (mainly used in Switzerland) to EPSG:4326 (global standard)

transformer = Transformer.from_crs("EPSG:2056", "EPSG:4326", always_xy=True)

for feature in data['features']:
    x, y = feature['geometry']['coordinates']
    lon, lat = transformer.transform(x, y)
    feature['geometry']['coordinates'] = [lon, lat]

## Save data

In [44]:
with open('../data/preprocessed_buildings.geojson', 'w') as file:
    json.dump(data, file, indent=4)

In [47]:
# count preprocessed buildings

with open('../data/preprocessed_buildings.geojson', 'r') as file:
    preprocessed_data = json.load(file)

total_buildings = len(preprocessed_data['features'])
print(f'{total_buildings} building entries saved in the preprocessed dataset')

3605 building entries saved in the preprocessed dataset
