# Import library

In [146]:
import osmnx as ox
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd

# Import data

## Green space from landuse key

In [147]:
green_landuse_area = gpd.read_file('../data/raw/osm/green space/green_landuse_area.geojson')

In [148]:
green_landuse_area = green_landuse_area.to_crs(epsg=32633)

In [149]:
green_landuse_area.columns

Index(['element', 'id', 'landuse', 'name', 'area:highway', 'old_name',
       'source', 'addr:city', 'addr:country', 'addr:housenumber',
       'addr:postcode', 'addr:street', 'email', 'website', 'barrier',
       'operator', 'phone', 'sport', 'created_by', 'note', 'opening_hours',
       'wikidata', 'check_date', 'access', 'description', 'leisure',
       'contact:email', 'contact:phone', 'contact:website', 'start_date',
       'disused:leisure', 'leaf_type', 'toilets:wheelchair', 'tourism',
       'wheelchair', 'height', 'surface', 'leaf_cycle', 'comment:history',
       'alt_name', 'kerb', 'produce', 'trees', 'name:signed', 'description:de',
       'embankment', 'building:levels', 'landcover', 'image', 'noname',
       'amenity', 'lit', 'parking', 'smoothness', 'meadow', 'wikipedia',
       'genus:de', 'natural', 'species', 'boundary', 'protect_class',
       'protection_title', 'ref', 'short_protection_title', 'crop', 'fixme',
       'level', 'type', 'name:hsb', 'layer', 'geometry'

In [150]:
green_landuse_area = green_landuse_area[['element', 'id', 'landuse', 
                                         'name', 'barrier', 'note', 
                                         'opening_hours', 'access', 'description', 
                                         'leisure', 'tourism', 'surface', 
                                         'landcover', 'amenity', 'natural', 
                                         'type', 'geometry']]

In [151]:
green_landuse_area['area'] = green_landuse_area['geometry'].area

In [152]:
green_landuse_area['access'].unique()

array([None, 'yes', 'private'], dtype=object)

In [153]:
print('total rows:', green_landuse_area.shape[0])  # number of rows
for i in green_landuse_area['access'].unique():
    if i == None:
        print('access is None:', green_landuse_area[green_landuse_area['access'].isnull()].shape[0])
    else:
        # print number of rows that has access e.g. private,yes,none or others
        print('access is', i , ':', green_landuse_area[green_landuse_area['access']==i].shape[0])

total rows: 2352
access is None: 2340
access is yes : 1
access is private : 11


In [154]:
green_landuse_area['type'].unique()

array(['multipolygon', None], dtype=object)

In [155]:
green_landuse_area['geometry'].type.unique()

array(['MultiPolygon', 'Polygon'], dtype=object)

## Green space from leisure key

In [156]:
green_leisure_area = gpd.read_file('../data/raw/osm/green space/green_leisure_area.geojson')

In [157]:
green_leisure_area = green_leisure_area.to_crs(epsg=32633)

In [158]:
green_leisure_area.columns

Index(['element', 'id', 'access', 'leisure', 'name', 'opening_hours',
       'operator', 'wheelchair', 'sport', 'description', 'layer', 'check_date',
       'playground', 'source', 'lit', 'max_age', 'min_age', 'addr:city',
       'addr:housenumber', 'addr:postcode', 'addr:street', 'note', 'surface',
       'indoor', 'check_date:opening_hours', 'cn_tud:token', 'fee', 'website',
       'garden:type', 'operator:type', 'reservation', 'email', 'old_name',
       'old_name:1897', 'alt_name', 'loc_name', 'wikidata', 'wikipedia',
       'heritage', 'name:etymology:wikidata', 'created_by', 'addr:country',
       'barrier', 'dog', 'playground:theme', 'toilets:wheelchair',
       'contact:email', 'contact:fax', 'contact:phone', 'contact:website',
       'attraction', 'name:cs', 'species:wikidata', 'landuse', 'image', 'ref',
       'phone', 'natural', 'area', 'bicycle', 'service', 'vehicle', 'hoops',
       'height', 'place', 'operator:wikidata', 'start_date', 'species:de',
       'description:de'

In [159]:
green_leisure_area['charge'].unique()

array([None, '8-14€ pro Stunde und Feld'], dtype=object)

In [160]:
green_leisure_area = green_leisure_area[['element', 'id', 'access', 
                                         'leisure', 'name', 'opening_hours', 'indoor',
                                         'operator', 'description', 'playground',
                                         'note', 'surface', 'fee', 
                                         'barrier', 'landuse', 'natural',
                                         'charge', 'landcover', 'fence_type', 
                                         'wall', 'type', 'geometry']]

In [161]:
green_leisure_area['area'] = green_leisure_area['geometry'].area

In [162]:
green_leisure_area['access'].unique()

array(['yes', 'private', None, 'customers', 'permissive', 'permit'],
      dtype=object)

In [163]:
print('total rows:', green_leisure_area.shape[0])  # number of rows
for i in green_leisure_area['access'].unique():
    if i == None:
        print('access is None:', green_leisure_area[green_leisure_area['access'].isnull()].shape[0])
    else:
        # print number of rows that has access e.g. private,yes,none or others
        print('access is', i , ':', green_leisure_area[green_leisure_area['access']==i].shape[0])

total rows: 1136
access is yes : 253
access is private : 138
access is None: 696
access is customers : 27
access is permissive : 19
access is permit : 3


## Green space from nature key

In [164]:
green_natural_area = gpd.read_file('../data/raw/osm/green space/green_natural_area.geojson')

In [165]:
green_natural_area = green_natural_area.to_crs(epsg=32633)

In [166]:
green_natural_area.columns

Index(['element', 'id', 'natural', 'attraction', 'name', 'name:cs',
       'species:de', 'species:wikidata', 'wheelchair', 'leaf_type', 'fixme',
       'note', 'leisure', 'leaf_cycle', 'description', 'embankment', 'barrier',
       'landuse', 'source', 'height', 'old_name:cs', 'level', 'man_made',
       'description:de', 'image', 'landcover', 'comment', 'access', 'fee',
       'species:wikipedia', 'type', 'geometry'],
      dtype='object')

In [167]:
green_natural_area['comment'].unique()

array([None,
       'Mitte November 2018 startete eine heftige Baggeraktivität hier'],
      dtype=object)

In [168]:
green_natural_area = green_natural_area[['element', 'id', 'natural', 
                                         'name', 'note', 'leisure',
                                         'description', 'barrier', 'landuse',
                                         'landcover', 'access', 'fee', 'type', 'geometry']]

In [169]:
green_natural_area['area'] = green_natural_area['geometry'].area

In [170]:
green_natural_area['access'].unique()

array([None, 'yes'], dtype=object)

In [171]:
print('total rows:', green_natural_area.shape[0])  # number of rows
for i in green_natural_area['access'].unique():
    if i == None:
        print('access is None:', green_natural_area[green_natural_area['access'].isnull()].shape[0])
    else:
        # print number of rows that has access e.g. private,yes,none or others
        print('access is', i , ':', green_natural_area[green_natural_area['access']==i].shape[0])

total rows: 529
access is None: 526
access is yes : 3


## Green space from camp site

In [172]:
green_campsite_area = gpd.read_file('../data/raw/osm/green space/green_campsite_area.geojson')

In [173]:
green_campsite_area = green_campsite_area.to_crs(epsg=32633)

In [174]:
green_campsite_area.head()

Unnamed: 0,element,id,tourism,shelter,seats,covered,access,addr:city,addr:housenumber,addr:postcode,addr:street,geometry
0,node,3128780402,picnic_site,,,,,,,,,POINT (412264.551 5653901.83)
1,node,3163118403,picnic_site,no,,,,,,,,POINT (410834.202 5654098.847)
2,node,3806645587,picnic_site,no,,,,,,,,POINT (410891.184 5653950.093)
3,node,4352882229,picnic_site,,,,,,,,,POINT (409894.214 5653944.981)
4,node,4888415416,picnic_site,,20.0,,,,,,,POINT (410702.148 5653820.301)


In [175]:
green_campsite_area['area'] = green_campsite_area['geometry'].area

In [176]:
print('total rows:', green_campsite_area.shape[0])  # number of rows
for i in green_campsite_area['access'].unique():
    if i == None:
        print('access is None:', green_campsite_area[green_campsite_area['access'].isnull()].shape[0])
    else:
        # print number of rows that has access e.g. private,yes,none or others
        print('access is', i , ':', green_campsite_area[green_campsite_area['access']==i].shape[0])

total rows: 11
access is None: 9
access is private : 2


## Spatial join green space polygons (within)
If small polygon is in larger polygon, only keep the larger polygon

In [391]:
gdf_list = [green_landuse_area, green_leisure_area, green_natural_area, green_campsite_area]
count = 0
for i in range(len(gdf_list)):
    for j in range(len(gdf_list)):
        if i != j:
            # right contain left or not
            temp_gdf = gdf_list[i].sjoin(gdf_list[j], how='right', predicate='within')
            print('i:', i, 'j:', j, 'row:', temp_gdf.shape[0])
            if count == 0:
                joined_green_space = temp_gdf
            else:
                joined_green_space = pd.concat([joined_green_space, temp_gdf], ignore_index=True)
            count += 1

i: 0 j: 1 row: 1268
i: 0 j: 2 row: 529
i: 0 j: 3 row: 11
i: 1 j: 0 row: 2400
i: 1 j: 2 row: 529
i: 1 j: 3 row: 11
i: 2 j: 0 row: 2371
i: 2 j: 1 row: 1149
i: 2 j: 3 row: 11
i: 3 j: 0 row: 2352
i: 3 j: 1 row: 1136
i: 3 j: 2 row: 529


In [403]:
joined_green_space['id_left'] = joined_green_space['id_left'].fillna(-999)
joined_green_space['id_left'] = joined_green_space['id_left'].astype(int).astype(str)
joined_green_space['id_right'] = joined_green_space['id_right'].astype(str)

In [405]:
joined_green_space[joined_green_space['id_right'] == '365345'][['id_left', 'id_right', 'area_left', 'area_right']]

Unnamed: 0,id_left,id_right,area_left,area_right
270,157942,365345,94969.782463,1.517605e+06
271,157943,365345,100603.258852,1.517605e+06
272,157944,365345,285535.962086,1.517605e+06
273,396375,365345,1493.478101,1.517605e+06
274,421481,365345,181451.349952,1.517605e+06
...,...,...,...,...
7390,28889290,365345,150.398311,1.517605e+06
7391,28889293,365345,49.059637,1.517605e+06
7392,28889296,365345,381.918061,1.517605e+06
7393,28889298,365345,71.053700,1.517605e+06


In [406]:
clean_joined_green_space = joined_green_space[~joined_green_space['id_right'].isin(joined_green_space['id_left'])]

In [407]:
clean_joined_green_space

Unnamed: 0,index_left,element_left,id_left,landuse_left,name_left,barrier_left,note_left,opening_hours_left,access_left,description_left,...,tourism_right,shelter,seats,covered,addr:city,addr:housenumber,addr:postcode,addr:street,fee_left,fee_right
0,,,-999,,,,,,,,...,,,,,,,,,,
1,,,-999,,,,,,,,...,,,,,,,,,,
3,,,-999,,,,,,,,...,,,,,,,,,,
4,,,-999,,,,,,,,...,,,,,,,,,,
5,,,-999,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12291,,,-999,,,,,,,,...,,,,,,,,,,
12292,,,-999,,,,,,,,...,,,,,,,,,,
12293,,,-999,,,,,,,,...,,,,,,,,,,
12294,,,-999,,,,,,,,...,,,,,,,,,,


In [408]:
# 157942 is small polygon inside
clean_joined_green_space[clean_joined_green_space['id_right'] == '157942'][['id_left', 'id_right', 'area_left', 'area_right']]

Unnamed: 0,id_left,id_right,area_left,area_right


In [409]:
clean_joined_green_space[clean_joined_green_space['id_right'] == '365345']

Unnamed: 0,index_left,element_left,id_left,landuse_left,name_left,barrier_left,note_left,opening_hours_left,access_left,description_left,...,tourism_right,shelter,seats,covered,addr:city,addr:housenumber,addr:postcode,addr:street,fee_left,fee_right
270,1.0,relation,157942,forest,,,,,,,...,,,,,,,,,,
271,2.0,relation,157943,forest,,,,,,,...,,,,,,,,,,
272,3.0,relation,157944,forest,,,,,,,...,,,,,,,,,,
273,4.0,relation,396375,grass,,,,,,,...,,,,,,,,,,
274,5.0,relation,421481,forest,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7390,32.0,way,28889290,,,,,,,,...,,,,,,,,,,
7391,33.0,way,28889293,,,,,,,,...,,,,,,,,,,
7392,34.0,way,28889296,,,,,,,,...,,,,,,,,,,
7393,35.0,way,28889298,,,,,,,,...,,,,,,,,,,


In [410]:
clean_joined_green_space[clean_joined_green_space['id_right'] == '365345'].columns

Index(['index_left', 'element_left', 'id_left', 'landuse_left', 'name_left',
       'barrier_left', 'note_left', 'opening_hours_left', 'access_left',
       'description_left', 'leisure_left', 'tourism', 'surface_left',
       'landcover_left', 'amenity', 'natural_left', 'type_left', 'area_left',
       'element_right', 'id_right', 'access_right', 'leisure_right',
       'name_right', 'opening_hours_right', 'indoor', 'operator',
       'description_right', 'playground', 'note_right', 'surface_right', 'fee',
       'barrier_right', 'landuse_right', 'natural_right', 'charge',
       'landcover_right', 'fence_type', 'wall', 'type_right', 'geometry',
       'area_right', 'opening_hours', 'surface', 'landuse', 'name', 'barrier',
       'note', 'description', 'leisure', 'tourism_left', 'landcover',
       'natural', 'type', 'tourism_right', 'shelter', 'seats', 'covered',
       'addr:city', 'addr:housenumber', 'addr:postcode', 'addr:street',
       'fee_left', 'fee_right'],
      dtype='obje

In [411]:
clean_joined_green_space[(clean_joined_green_space['id_right'] == '365345')][['id_left', 'id_right', 'area_left', 'area_right', 'name_right', 'element_right']]

Unnamed: 0,id_left,id_right,area_left,area_right,name_right,element_right
270,157942,365345,94969.782463,1.517605e+06,Großer Garten,relation
271,157943,365345,100603.258852,1.517605e+06,Großer Garten,relation
272,157944,365345,285535.962086,1.517605e+06,Großer Garten,relation
273,396375,365345,1493.478101,1.517605e+06,Großer Garten,relation
274,421481,365345,181451.349952,1.517605e+06,Großer Garten,relation
...,...,...,...,...,...,...
7390,28889290,365345,150.398311,1.517605e+06,Großer Garten,relation
7391,28889293,365345,49.059637,1.517605e+06,Großer Garten,relation
7392,28889296,365345,381.918061,1.517605e+06,Großer Garten,relation
7393,28889298,365345,71.053700,1.517605e+06,Großer Garten,relation


In [412]:
clean_joined_green_space['id_right'].unique()

array(['266593887', '274970363', '289488035', ..., '1340028517',
       '1342392212', '1346869507'], dtype=object)

In [413]:
clean_joined_green_space[(clean_joined_green_space['id_right'] == '1340028517')][['id_left', 'id_right', 'area_left', 'area_right', 'name_right', 'element_right']]

Unnamed: 0,id_left,id_right,area_left,area_right,name_right,element_right
4204,3645255648,1340028517,0.0,8049.696317,,way
4205,359896413,1340028517,216.456693,8049.696317,,way
7116,-999,1340028517,,8049.696317,,way
10628,-999,1340028517,,8049.696317,,way


In [414]:
clean_joined_green_space[clean_joined_green_space['id_left'] == '359896413']

Unnamed: 0,index_left,element_left,id_left,landuse_left,name_left,barrier_left,note_left,opening_hours_left,access_left,description_left,...,tourism_right,shelter,seats,covered,addr:city,addr:housenumber,addr:postcode,addr:street,fee_left,fee_right
4205,714.0,way,359896413,,,,,,yes,,...,,,,,,,,,,


In [415]:
clean_joined_green_space[clean_joined_green_space['id_right'] == '1340028517'].explore()

In [416]:
clean_joined_green_space[clean_joined_green_space['id_left'] == '359896413'].explore()

---

In [277]:
final_joined_green_space = clean_joined_green_space.sort_values(by='name_right', ascending=True).drop_duplicates(subset=['id_right', 'element_right', 'area_right'])

In [278]:
final_joined_green_space

Unnamed: 0,index_left,element_left,id_left,landuse_left,name_left,barrier_left,note_left,opening_hours_left,access_left,description_left,...,tourism_right,shelter,seats,covered,addr:city,addr:housenumber,addr:postcode,addr:street,fee_left,fee_right
806,,,,,,,,,,,...,,,,,,,,,,
7949,,,,,,,,,,,...,,,,,,,,,,
8210,,,,,,,,,,,...,,,,,,,,,,yes
7947,,,,,,,,,,,...,,,,,,,,,,
7945,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10901,,,,,,,,,,,...,,,,,,,,,,
10933,,,,,,,,,,,...,,,,,,,,,,
11219,,,,,,,,,,,...,,,,,,,,,,
11312,,,,,,,,,,,...,,,,,,,,,,


In [282]:
final_joined_green_space[final_joined_green_space['id_right'] == 365345][['id_left', 'id_right', 'area_left', 'area_right']]

Unnamed: 0,id_left,id_right,area_left,area_right
10901,,365345,,1517605.0


-----

In [227]:
final_joined_green_space.columns

Index(['element_left', 'id_left', 'landuse_left', 'name_left', 'barrier_left',
       'note_left', 'opening_hours_left', 'access_left', 'description_left',
       'leisure_left', 'tourism', 'surface_left', 'landcover_left', 'amenity',
       'natural_left', 'type_left', 'geometry', 'area_left', 'index_right',
       'element_right', 'id_right', 'access_right', 'leisure_right',
       'name_right', 'opening_hours_right', 'indoor', 'operator',
       'description_right', 'playground', 'note_right', 'surface_right', 'fee',
       'barrier_right', 'landuse_right', 'natural_right', 'charge',
       'landcover_right', 'fence_type', 'wall', 'type_right', 'area_right',
       'opening_hours', 'surface', 'landuse', 'name', 'barrier', 'note',
       'description', 'leisure', 'tourism_left', 'landcover', 'natural',
       'type', 'tourism_right', 'shelter', 'seats', 'covered', 'addr:city',
       'addr:housenumber', 'addr:postcode', 'addr:street', 'fee_left',
       'fee_right'],
      dtype='obj

In [240]:
clean_joined_green_space[~clean_joined_green_space['area_right'].isna()][['fee', 'charge', 'access_left', 'access_right', 'area_left', 'area_right']]

Unnamed: 0,fee,charge,access_left,access_right,area_left,area_right


In [313]:
joined_green_space[(joined_green_space['id_left'] == 365345)].explore()

  joined_green_space[(joined_green_space['id_left'] == 365345)].explore()


In [249]:
joined_green_space[joined_green_space['id_left'] == 157942][['id_left', 'id_right', 'area_left', 'area_right']]

Unnamed: 0,id_left,id_right,area_left,area_right
1,157942,365345.0,94969.782463,1517605.0
2353,157942,,94969.782463,
4705,157942,,94969.782463,


In [270]:
joined_green_space[joined_green_space['id_left'] == 157942][['id_left', 'id_right', 'area_left', 'area_right']]

Unnamed: 0,id_left,id_right,area_left,area_right
270,157942.0,365345,94969.782463,1517605.0


In [248]:
clean_joined_green_space[clean_joined_green_space['id_left'] == 157942][['id_left', 'id_right', 'area_left', 'area_right']]

Unnamed: 0,id_left,id_right,area_left,area_right
2353,157942,,94969.782463,
4705,157942,,94969.782463,


In [257]:
clean_joined_green_space[clean_joined_green_space['id_left'] == 365345]

Unnamed: 0,element_left,id_left,landuse_left,name_left,barrier_left,note_left,opening_hours_left,access_left,description_left,leisure_left,...,tourism_right,shelter,seats,covered,addr:city,addr:housenumber,addr:postcode,addr:street,fee_left,fee_right
7326,relation,365345,,Großer Garten,,,,,,park,...,,,,,,,,,,
8462,relation,365345,,Großer Garten,,,,,,park,...,,,,,,,,,,
9598,relation,365345,,,,,,,,,...,,,,,,,,,,


In [237]:
temp = joined_green_space[~joined_green_space['area_right'].isna()][['id_left', 'id_right', 'area_left', 'area_right']]

In [238]:
temp['area_right'].unique()

array([1.51760534e+06, 4.51385572e+03, 6.33332445e+03, 7.74575604e+03,
       6.66928908e+03, 5.10576001e+04, 2.80771471e+03, 1.24377854e+03,
       2.94151108e+03, 1.49298877e+04, 8.47215962e+03, 1.04993594e+05,
       4.09598753e+04, 2.09745935e+03, 1.79670089e+03, 2.79345008e+03,
       7.44093676e+04, 3.63452737e+03, 2.50278384e+04, 5.38435476e+03,
       1.54150336e+04, 5.04885962e+03, 3.90068322e+03, 1.12144502e+04,
       2.15517879e+03, 2.83585603e+03, 2.62141221e+03, 6.42678666e+03,
       4.71043444e+03, 3.50036573e+03, 3.66513690e+03, 3.53441495e+04,
       8.04969632e+03, 1.41823028e+04, 1.64443582e+04, 2.01698529e+03,
       2.31061338e+03, 8.27314387e+03, 6.74488877e+03, 1.38475459e+04,
       4.41414085e+03, 5.48354143e+03, 2.80142266e+03, 3.60409000e+03,
       7.48573964e+02, 8.04115917e+03, 2.67120500e+04, 2.76202645e+03,
       4.54168210e+03, 3.20478956e+03, 3.23550919e+03, 2.94791253e+03,
       1.20105288e+02, 1.94353118e+03, 2.11186821e+02, 3.06434503e+04,
      

In [241]:
temp

Unnamed: 0,id_left,id_right,area_left,area_right
1,157942,3.653450e+05,94969.782463,1.517605e+06
2,157943,3.653450e+05,100603.258852,1.517605e+06
3,157944,3.653450e+05,285535.962086,1.517605e+06
4,396375,3.653450e+05,1493.478101,1.517605e+06
5,421481,3.653450e+05,181451.349952,1.517605e+06
...,...,...,...,...
11450,1079105453,3.006233e+08,24.056225,5.384355e+03
11487,1185503443,9.858176e+06,546.909172,2.502784e+04
12057,6591319651,1.024236e+09,0.000000,1.729487e+03
12058,12213145709,4.199469e+07,0.000000,2.572846e+02
