In [2]:
import pandas as pd
import geopandas as gpd
import os
import numpy as np
import functions as f

The data collection stage was performed by administrative regions. Now, we want to put together all green spaces in denmark, into one big dataframe.

In [3]:
df = pd.DataFrame()

for file in os.listdir("./dataset/raw_unprocessed/green_spaces"):
    place = file.split("_")[0]
    type1 = file.split("_")[1]
    type2 = "_".join(file.replace(".parquet", "").replace("_ALL", "").split("_")[2:])

    x = gpd.read_parquet(f"./dataset/raw_unprocessed/green_spaces/{file}")
    # hey Gino, yoou have to reproject the data before calculating the area
    # dont do this. The files in WGS84 projection
    # x.crs = "EPSG:25832"

    # reproject
    x = x.to_crs(f.DENMARK_CRS)
    x.crs = "EPSG:25832"

    x = x.reset_index()

    x["place"] = place
    x["type1"] = type1
    x["type2"] = type2
    x["area_km"] = x[["geometry"]].area/1000000 # square kilometer instead of square meter
    df = pd.concat([df, x], ignore_index=True)
    

Make a summary of the green spaces, including the number, the total area (in km2), the average area and the standard deviation.

In [4]:
pd.set_option('display.float_format', lambda x: '%.5f' % x)
summary = df[["type1", "type2", "area_km"]].groupby(["type1", "type2"]).agg(Number=("type2", "count"), Total=("area_km", "sum"), AVG=("area_km", "mean"), STD=("area_km", "std")).reset_index()

In [5]:
summary.sort_values("AVG", ascending=False)

Unnamed: 0,type1,type2,Number,Total,AVG,STD
34,water,lagoon,47,582.41141,12.39173,57.13227
23,natural,heath,5538,825.83725,0.14912,1.17797
4,landuse,farmland,115907,15444.0742,0.13325,0.24781
18,natural,beach,934,75.95641,0.08132,0.89633
6,landuse,forest,120330,6532.49036,0.05429,0.49867
14,leisure,dog_park,311,16.32359,0.05249,0.15416
42,water,river,207,10.7513,0.05194,0.10142
20,natural,coastline,1579,78.68648,0.04983,0.44226
8,landuse,greenfield,1484,68.34794,0.04606,0.16277
10,landuse,orchard,929,35.40959,0.03812,0.05723


In [6]:
summary.Number.sum()

617702

Let's make a reduced version of the dataset, where we keep the type of green space, its geometry and area. We simplify the polygons with 5 meters tolerance, to optimize storage and computations.

## Random sample of a polygon to show the effect of simplification

In [16]:
df[(df.type1 == "leisure") & (df.type2 == "garden")].geometry.explore()

Polygon before simplification

In [6]:
df[527757:527758].geometry.explore()

Polygon after simplification

In [7]:

df[527757:527758].geometry.simplify(5).explore()

In [10]:
reduced = df[["type1", "type2", "geometry", "area_km"]].copy()
reduced.geometry = reduced.simplify(5)

In [11]:
reduced

Unnamed: 0,type1,type2,geometry,area_km
0,amenity,grave_yard,"POLYGON ((868651.634 6136455.972, 868674.825 6...",0.00440
1,amenity,grave_yard,"POLYGON ((891462.668 6118760.110, 891468.951 6...",0.00368
2,amenity,grave_yard,"POLYGON ((871040.481 6118525.349, 871098.664 6...",0.00702
3,amenity,grave_yard,"POLYGON ((877690.109 6118708.526, 877792.150 6...",0.00760
4,amenity,grave_yard,"POLYGON ((863660.833 6119954.343, 863460.452 6...",0.04445
...,...,...,...,...
617697,water,river,"POLYGON ((665138.055 6183776.697, 665095.266 6...",0.12376
617698,water,river,"POLYGON ((671309.138 6143210.684, 671307.632 6...",0.00381
617699,water,river,"POLYGON ((640169.170 6140546.869, 640188.573 6...",0.02047
617700,water,river,"POLYGON ((640963.105 6140376.982, 640969.944 6...",0.04175


Save the resulting geodataframe.

In [14]:
reduced.to_parquet("./dataset/raw_unprocessed/green_spaces2.parquet")

In [12]:
f.geoseries_to_geopandas(reduced, crs=f.DENMARK_CRS).to_parquet("./dataset/raw_unprocessed/green_spaces.parquet")

In [16]:
green_areas = gpd.read_parquet("dataset/processed/green_spaces_crossed_by_bikelane_wgs84.parquet")

In [40]:
a = reduced.loc[green_areas.reset_index(level=[0,1]).level_0].groupby(["type1", "type2"]).agg(Number=("type2", "count"), Total=("area_km", "sum"), AVG=("area_km", "mean"), STD=("area_km", "std")).reset_index()
a = a.sort_values("Number", ascending=False)
a.Number.sum()

66179

In [37]:
with open("table.txt", "w") as outfile:

    for ix, row in a.iterrows():
        outfile.write(" & ".join(map(str, row.values)))
        outfile.write("\\\\")
        outfile.write("\n")
        

In [15]:
green_areas = gpd.read_parquet("./dataset/raw_unprocessed/green_spaces2.parquet")
green_areas

Unnamed: 0,type1,type2,geometry,area_km
0,amenity,grave_yard,"POLYGON ((868651.634 6136455.972, 868674.825 6...",0.00440
1,amenity,grave_yard,"POLYGON ((891462.668 6118760.110, 891468.951 6...",0.00368
2,amenity,grave_yard,"POLYGON ((871040.481 6118525.349, 871098.664 6...",0.00702
3,amenity,grave_yard,"POLYGON ((877690.109 6118708.526, 877792.150 6...",0.00760
4,amenity,grave_yard,"POLYGON ((863660.833 6119954.343, 863460.452 6...",0.04445
...,...,...,...,...
617697,water,river,"POLYGON ((665138.055 6183776.697, 665095.266 6...",0.12376
617698,water,river,"POLYGON ((671309.138 6143210.684, 671307.632 6...",0.00381
617699,water,river,"POLYGON ((640169.170 6140546.869, 640188.573 6...",0.02047
617700,water,river,"POLYGON ((640963.105 6140376.982, 640969.944 6...",0.04175
