In [23]:
import pandas as pd
import geopandas as gpd
import os
import numpy as np
import functions as f

The data collection stage was performed by administrative regions. Now, we want to put together all green spaces in denmark, into one big dataframe.

In [24]:
df = pd.DataFrame()

for file in os.listdir("./dataset/raw_unprocessed/green_spaces"):
    place = file.split("_")[0]
    type1 = file.split("_")[1]
    type2 = "_".join(file.replace(".parquet", "").replace("_ALL", "").split("_")[2:])

    x = gpd.read_parquet(f"./dataset/raw_unprocessed/green_spaces/{file}")
    # hey Gino, yoou have to reproject the data before calculating the area
    # dont do this. The files in WGS84 projection
    # x.crs = "EPSG:25832"

    # reproject
    x = x.to_crs(f.DENMARK_CRS)
    x.crs = "EPSG:25832"

    x = x.reset_index()

    x["place"] = place
    x["type1"] = type1
    x["type2"] = type2
    x["area_km"] = x[["geometry"]].area/1000000 # square kilometer instead of square meter
    df = pd.concat([df, x], ignore_index=True)
    

Make a summary of the green spaces, including the number, the total area (in km2), the average area and the standard deviation.

In [29]:
pd.set_option('display.float_format', lambda x: '%.5f' % x)
summary = df[["type1", "type2", "area_km"]].groupby(["type1", "type2"]).agg(Number=("type2", "count"), Total=("area_km", "sum"), AVG=("area_km", "mean"), STD=("area_km", "std")).reset_index()

In [30]:
summary.sort_values("AVG", ascending=False)

Unnamed: 0,type1,type2,Number,Total,AVG,STD
34,water,lagoon,47,582.41141,12.39173,57.13227
23,natural,heath,5538,825.83725,0.14912,1.17797
4,landuse,farmland,115907,15444.0742,0.13325,0.24781
18,natural,beach,934,75.95641,0.08132,0.89633
6,landuse,forest,120330,6532.49036,0.05429,0.49867
14,leisure,dog_park,311,16.32359,0.05249,0.15416
42,water,river,207,10.7513,0.05194,0.10142
20,natural,coastline,1579,78.68648,0.04983,0.44226
8,landuse,greenfield,1484,68.34794,0.04606,0.16277
10,landuse,orchard,929,35.40959,0.03812,0.05723


In [28]:
summary.Number.sum()

617702

Let's make a reduced version of the dataset, where we keep the type of green space, its geometry and area. We simplify the polygons with 5 meters tolerance, to optimize storage and computations.

In [31]:
reduced= df[["type1", "type2", "geometry", "area_km"]].simplify(5)

Save the resulting geodataframe.

In [32]:
f.geoseries_to_geopandas(reduced, crs=f.DENMARK_CRS).to_parquet("./dataset/raw_unprocessed/green_spaces.parquet")