# Import library

In [98]:
import osmnx as ox
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Import data

## Green space from landuse key

In [99]:
# green_landuse_area = gpd.read_file('../data/raw/osm/green space/green_landuse_area.geojson')
green_landuse_area = gpd.read_file('../data/raw/osm/green space/green_landuse_area_dresden.geojson')

In [100]:
green_landuse_area = green_landuse_area.to_crs(epsg=32633)

In [101]:
green_landuse_area.columns

Index(['element', 'id', 'landuse', 'name', 'denomination', 'religion',
       'wikidata', 'check_date:opening_hours', 'opening_hours', 'website',
       ...
       'substance', 'utility', 'was:man_made', 'name:en', 'level', 'material',
       'type', 'name:hsb', 'source:outline', 'geometry'],
      dtype='object', length=127)

In [102]:
green_landuse_area = green_landuse_area[['element', 'id', 'landuse', 
                                         'name', 'barrier', 'note', 
                                         'opening_hours', 'access', 'description', 
                                         'leisure', 'tourism', 'surface', 
                                         'landcover', 'amenity', 'natural', 
                                         'type', 'geometry']]

In [103]:
green_landuse_area['area'] = green_landuse_area['geometry'].area

In [104]:
green_landuse_area['access'].unique()

array([None, 'private', 'yes', 'no'], dtype=object)

In [105]:
print('total rows:', green_landuse_area.shape[0])  # number of rows
for i in green_landuse_area['access'].unique():
    if i == None:
        print('access is None:', green_landuse_area[green_landuse_area['access'].isnull()].shape[0])
    else:
        # print number of rows that has access e.g. private,yes,none or others
        print('access is', i , ':', green_landuse_area[green_landuse_area['access']==i].shape[0])

total rows: 13757
access is None: 13718
access is private : 26
access is yes : 1
access is no : 12


In [106]:
green_landuse_area['type'].unique()

array([None, 'multipolygon'], dtype=object)

In [107]:
green_landuse_area['geometry'].type.unique()

array(['Point', 'MultiPolygon', 'Polygon'], dtype=object)

## Green space from leisure key

In [108]:
# green_leisure_area = gpd.read_file('../data/raw/osm/green space/green_leisure_area.geojson')
green_leisure_area = gpd.read_file('../data/raw/osm/green space/green_leisure_area_dresden.geojson')

In [109]:
green_leisure_area = green_leisure_area.to_crs(epsg=32633)

In [110]:
green_leisure_area.columns

Index(['element', 'id', 'created_by', 'leisure', 'wheelchair', 'access',
       'name', 'opening_hours', 'operator', 'sport',
       ...
       'noname', 'name:de', 'pitch:net', 'pitch:net:material',
       'pitch:net:overhang', 'construction', 'operator:short', 'type',
       'name:fr', 'geometry'],
      dtype='object', length=146)

In [111]:
green_leisure_area['charge'].unique()

array([None, '8-14€ pro Stunde und Feld'], dtype=object)

In [112]:
green_leisure_area = green_leisure_area[['element', 'id', 'access', 
                                         'leisure', 'name', 'opening_hours', 'indoor',
                                         'operator', 'description', 'playground',
                                         'note', 'surface', 'fee', 
                                         'barrier', 'landuse', 'natural',
                                         'charge', 'landcover', 'fence_type', 'garden:type',
                                         'wall', 'type', 'geometry']]

In [113]:
green_leisure_area['area'] = green_leisure_area['geometry'].area

In [114]:
green_leisure_area['access'].unique()

array([None, 'yes', 'private', 'customers', 'permissive', 'no', 'unknown',
       'permit', 'school'], dtype=object)

In [115]:
print('total rows:', green_leisure_area.shape[0])  # number of rows
for i in green_leisure_area['access'].unique():
    if i == None:
        print('access is None:', green_leisure_area[green_leisure_area['access'].isnull()].shape[0])
    else:
        # print number of rows that has access e.g. private,yes,none or others
        print('access is', i , ':', green_leisure_area[green_leisure_area['access']==i].shape[0])

total rows: 3959
access is None: 2662
access is yes : 679
access is private : 458
access is customers : 103
access is permissive : 37
access is no : 8
access is unknown : 1
access is permit : 10
access is school : 1


### Separate playground out as we will use it to map PUGS (Probably no need)

Playground also have access tag which might help classify green space

In [116]:
# # filter playgrounds that are indoor -> probably don't need this
# playground_gdf = green_leisure_area[(green_leisure_area['leisure']=='playground') & (green_leisure_area['indoor']!='yes') & (green_leisure_area['element']!='node')]

In [117]:
# green_leisure_area_new = green_leisure_area[~((green_leisure_area['leisure']=='playground')&(green_leisure_area['element']=='node')) | (green_leisure_area['indoor']!='yes')]

In [118]:
green_leisure_area_new = green_leisure_area[green_leisure_area['indoor']!='yes']

## Green space from nature key

In [119]:
# green_natural_area = gpd.read_file('../data/raw/osm/green space/green_natural_area.geojson')
green_natural_area = gpd.read_file('../data/raw/osm/green space/green_natural_area_dresden.geojson')

In [120]:
green_natural_area = green_natural_area.to_crs(epsg=32633)

In [121]:
green_natural_area.columns

Index(['element', 'id', 'natural', 'wetland', 'name', 'source:name',
       'old_name', 'source', 'boundary', 'denotation', 'protect_class',
       'protection_title', 'wikidata', 'wikipedia', 'leaf_type', 'landuse',
       'note', 'image', 'created_by', 'alt_name', 'intermittent', 'salt',
       'leisure', 'leaf_cycle', 'fenced', 'fruit', 'trees', 'description:de',
       'attraction', 'name:cs', 'species:de', 'species:wikidata', 'wheelchair',
       'fixme', 'tourism', 'barrier', 'fence_type', 'ref', 'start_date',
       'description', 'FIXME', 'historic', 'resource', 'place', 'tidal',
       'survey:date', 'addr:city', 'addr:country', 'addr:housenumber',
       'addr:postcode', 'addr:street', 'abandoned:landuse',
       'short_protection_title', 'basin', 'embankment', 'landcover',
       'operator', 'height', 'disused', 'disused:leisure', 'surface',
       'old_name:cs', 'fence', 'level', 'operator:wikidata', 'man_made',
       'layer', 'seasonal', 'meadow', 'access', 'comment', 'ar

In [122]:
green_natural_area['comment'].unique()

array([None,
       'Mitte November 2018 startete eine heftige Baggeraktivität hier'],
      dtype=object)

In [123]:
green_natural_area = green_natural_area[['element', 'id', 'natural', 
                                         'name', 'note', 'leisure',
                                         'description', 'barrier', 'landuse',
                                         'landcover', 'access', 'fee', 'type', 'geometry']]

In [124]:
green_natural_area['area'] = green_natural_area['geometry'].area

In [125]:
green_natural_area['access'].unique()

array([None, 'private', 'yes'], dtype=object)

In [126]:
print('total rows:', green_natural_area.shape[0])  # number of rows
for i in green_natural_area['access'].unique():
    if i == None:
        print('access is None:', green_natural_area[green_natural_area['access'].isnull()].shape[0])
    else:
        # print number of rows that has access e.g. private,yes,none or others
        print('access is', i , ':', green_natural_area[green_natural_area['access']==i].shape[0])

total rows: 4610
access is None: 4606
access is private : 1
access is yes : 3


## Green space from camp site

In [127]:
# green_campsite_area = gpd.read_file('../data/raw/osm/green space/green_campsite_area.geojson')
green_campsite_area = gpd.read_file('../data/raw/osm/green space/green_campsite_area_dresden.geojson')

In [128]:
green_campsite_area = green_campsite_area.to_crs(epsg=32633)

In [129]:
green_campsite_area.head()

Unnamed: 0,element,id,created_by,tourism,access,addr:city,addr:country,addr:housenumber,addr:postcode,addr:street,...,motor_vehicle,beds,group_only,guest_house,rooms,attraction,capacity:disabled,seasonal,socket:cee_blue,geometry
0,node,257564076,JOSM,picnic_site,,,,,,,...,,,,,,,,,,POINT (411597.918 5665030.941)
1,node,257564078,JOSM,picnic_site,,,,,,,...,,,,,,,,,,POINT (411626.748 5664796.911)
2,node,257922815,JOSM,picnic_site,,,,,,,...,,,,,,,,,,POINT (411564.728 5665084.269)
3,node,257923367,,picnic_site,,,,,,,...,,,,,,,,,,POINT (411586.657 5667217.952)
4,node,258326088,,camp_site,private,Dresden,DE,81.0,1277.0,Tolkewitzer Straße,...,,,,,,,,,,POINT (417186.97 5654969.5)


In [130]:
green_campsite_area['area'] = green_campsite_area['geometry'].area

In [131]:
print('total rows:', green_campsite_area.shape[0])  # number of rows
for i in green_campsite_area['access'].unique():
    if i == None:
        print('access is None:', green_campsite_area[green_campsite_area['access'].isnull()].shape[0])
    else:
        # print number of rows that has access e.g. private,yes,none or others
        print('access is', i , ':', green_campsite_area[green_campsite_area['access']==i].shape[0])

total rows: 199
access is None: 183
access is private : 7
access is yes : 3
access is customers : 5
access is permit : 1


In [132]:
# filter node out
green_campsite_area = green_campsite_area[green_campsite_area['element']!='node']

## POI/Amenity from amenity 

In [133]:
# poi_amenity = gpd.read_file('../data/raw/osm/amenity/poi_amenity.geojson')
poi_amenity = gpd.read_file('../data/raw/osm/amenity/poi_amenity_dresden.geojson')

In [134]:
poi_amenity = poi_amenity.to_crs(epsg=32633)

In [135]:
poi_amenity.head()

Unnamed: 0,element,id,amenity,backrest,description,direction,material,seats,wikimedia_commons,created_by,...,deposit_ring,support,seats:separated,outdoor_seating,leisure,barrier,height,area,man_made,geometry
0,node,251567329,bench,yes,Bank am Zickzack-Weg,100.0,wood,3.0,File:Bank am Zickzackweg.jpg,,...,,,,,,,,,,POINT (403728.926 5649371.357)
1,node,255298026,bench,yes,,,,,,,...,,,,,,,,,,POINT (415787.203 5655716.144)
2,node,255925960,waste_basket,,,,,,,,...,,,,,,,,,,POINT (409010.616 5654975)
3,node,257567101,bench,,,,,,,JOSM,...,,,,,,,,,,POINT (411676.54 5669898.692)
4,node,257934358,bench,yes,,,,,,JOSM,...,,,,,,,,,,POINT (410258.291 5668038.07)


## POI/Amenity from leisure

In [136]:
# poi_leisure = gpd.read_file('../data/raw/osm/amenity/poi_leisure.geojson')
poi_leisure = gpd.read_file('../data/raw/osm/amenity/poi_leisure_dresden.geojson')

In [137]:
poi_leisure = poi_leisure.to_crs(epsg=32633)

In [138]:
poi_leisure.head()

Unnamed: 0,element,id,created_by,leisure,wheelchair,access,name,opening_hours,operator,sport,...,playground:springy,fixme,start_date,contact:email,contact:mobile,contact:website,addr:suburb,construction,type,geometry
0,node,255545201,Potlatch 0.8a,playground,no,,,,,,...,,,NaT,,,,,,,POINT (409940.451 5652983.729)
1,node,262693384,JOSM,playground,limited,,,,,,...,,,NaT,,,,,,,POINT (414361.811 5663180.907)
2,node,266593887,,playground,,yes,Würzburger Straße - Park,Mo-Su 08:00-22:00,Landeshauptstadt Dresden,,...,,,NaT,,,,,,,POINT (409905.579 5653957.552)
3,node,269751929,,playground,,yes,,,,,...,,,NaT,,,,,,,POINT (415406.577 5664375.327)
4,node,269949803,,playground,,,,,,,...,,,NaT,,,,,,,POINT (400428.614 5648263.427)


In [139]:
poi_leisure[poi_leisure['leisure']=='playground']

Unnamed: 0,element,id,created_by,leisure,wheelchair,access,name,opening_hours,operator,sport,...,playground:springy,fixme,start_date,contact:email,contact:mobile,contact:website,addr:suburb,construction,type,geometry
0,node,255545201,Potlatch 0.8a,playground,no,,,,,,...,,,NaT,,,,,,,POINT (409940.451 5652983.729)
1,node,262693384,JOSM,playground,limited,,,,,,...,,,NaT,,,,,,,POINT (414361.811 5663180.907)
2,node,266593887,,playground,,yes,Würzburger Straße - Park,Mo-Su 08:00-22:00,Landeshauptstadt Dresden,,...,,,NaT,,,,,,,POINT (409905.579 5653957.552)
3,node,269751929,,playground,,yes,,,,,...,,,NaT,,,,,,,POINT (415406.577 5664375.327)
4,node,269949803,,playground,,,,,,,...,,,NaT,,,,,,,POINT (400428.614 5648263.427)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1606,way,1346758566,,playground,,,,,,,...,,,NaT,,,,,,,"POLYGON ((416934.711 5653744.111, 416940.135 5..."
1607,way,1347149587,,playground,no,yes,Spielplatz Selliner Straße 50,,Sächsische WOHNUNGSGENOSSENSCHAFT Dresden eG,,...,,,NaT,,,,,,,"POLYGON ((414764.098 5664124.394, 414710.627 5..."
1608,way,1350900467,,playground,,yes,,,,,...,,,NaT,,,,,,,"POLYGON ((411636.37 5653164.516, 411634.334 56..."
1609,way,1351207900,,playground,,,,,,,...,,,NaT,,,,,,,"POLYGON ((415350.467 5665466.769, 415347.462 5..."


## Barrier

In [140]:
# poi_barrier = gpd.read_file('../data/raw/osm/amenity/poi_barrier.geojson')
poi_barrier = gpd.read_file('../data/raw/osm/amenity/poi_barrier_dresden.geojson')

In [141]:
poi_barrier = poi_barrier.to_crs(epsg=32633)

In [142]:
poi_barrier.head()

Unnamed: 0,element,id,access,barrier,entrance,motor_vehicle,name,description,wheelchair,foot,...,tourism,note:de,note:access,layer,open,fence_type,supervised,disused,check_date,geometry
0,node,21637779,private,gate,,,,,,,...,,,,,,,,,NaT,POINT (406183.943 5656047.484)
1,node,25231936,no,gate,,,,,,,...,,,,,,,,,NaT,POINT (405134.039 5654579.647)
2,node,25693660,private,gate,yes,,,,,,...,,,,,,,,,NaT,POINT (406232.177 5652902.947)
3,node,26730567,permissive,gate,,no,,,,,...,,,,,,,,,NaT,POINT (408924.589 5660739.366)
4,node,26750550,private,gate,,,,,,,...,,,,,,,,,NaT,POINT (413021.647 5658399.619)


In [143]:
poi_barrier.columns

Index(['element', 'id', 'access', 'barrier', 'entrance', 'motor_vehicle',
       'name', 'description', 'wheelchair', 'foot',
       ...
       'tourism', 'note:de', 'note:access', 'layer', 'open', 'fence_type',
       'supervised', 'disused', 'check_date', 'geometry'],
      dtype='object', length=103)

In [144]:
poi_barrier['entrance'].unique()

array([None, 'yes', 'exit', 'main', 'service', 'emergency'], dtype=object)

In [145]:
# poi_barrier[~poi_barrier['name'].isna()].explore()

## Road network

In [146]:
# road_network_edges = gpd.read_file('../data/raw/osm/network/road network/road_network_edges.geojson')
# road_network_nodes = gpd.read_file('../data/raw/osm/network/road network/road_network_nodes.geojson')

road_network_edges = gpd.read_file('../data/raw/osm/network/road network/road_network_edges_dresden.geojson')
road_network_nodes = gpd.read_file('../data/raw/osm/network/road network/road_network_nodes_dresden.geojson')

Skipping field highway: unsupported OGR type: 5
Skipping field maxspeed: unsupported OGR type: 5
Skipping field name: unsupported OGR type: 5
Skipping field service: unsupported OGR type: 5
Skipping field lanes: unsupported OGR type: 5
Skipping field width: unsupported OGR type: 5
Skipping field ref: unsupported OGR type: 5
Skipping field access: unsupported OGR type: 5
Skipping field tunnel: unsupported OGR type: 5


## Cycle network

In [147]:
# cycle_network_edges = gpd.read_file('../data/raw/osm/network/cycle network/cycle_network_edges.geojson')
# cycle_network_nodes = gpd.read_file('../data/raw/osm/network/cycle network/cycle_network_nodes.geojson')

cycle_network_edges = gpd.read_file('../data/raw/osm/network/cycle network/cycle_network_edges_dresden.geojson')
cycle_network_nodes = gpd.read_file('../data/raw/osm/network/cycle network/cycle_network_nodes_dresden.geojson')

Skipping field highway: unsupported OGR type: 5
Skipping field maxspeed: unsupported OGR type: 5
Skipping field name: unsupported OGR type: 5
Skipping field service: unsupported OGR type: 5
Skipping field lanes: unsupported OGR type: 5
Skipping field width: unsupported OGR type: 5
Skipping field ref: unsupported OGR type: 5
Skipping field access: unsupported OGR type: 5
Skipping field tunnel: unsupported OGR type: 5


## Footpath network

In [148]:
# footpath_network_edges = gpd.read_file('../data/raw/osm/network/footpath network/footpath_network_edges.geojson')
# footpath_network_nodes = gpd.read_file('../data/raw/osm/network/footpath network/footpath_network_nodes.geojson')

footpath_network_edges = gpd.read_file('../data/raw/osm/network/footpath network/footpath_network_edges_dresden.geojson')
footpath_network_nodes = gpd.read_file('../data/raw/osm/network/footpath network/footpath_network_nodes_dresden.geojson')

Skipping field highway: unsupported OGR type: 5
Skipping field maxspeed: unsupported OGR type: 5
Skipping field name: unsupported OGR type: 5
Skipping field service: unsupported OGR type: 5
Skipping field lanes: unsupported OGR type: 5
Skipping field bridge: unsupported OGR type: 5
Skipping field access: unsupported OGR type: 5
Skipping field width: unsupported OGR type: 5
Skipping field ref: unsupported OGR type: 5
Skipping field tunnel: unsupported OGR type: 5


# Explore data

In [149]:
all_green_space_gdf = pd.concat([green_landuse_area, green_leisure_area_new, green_natural_area, green_campsite_area], ignore_index=True) 

In [155]:
clean_all_green_space_gdf = all_green_space_gdf[['element', 'id', 'landuse', 'name', 'barrier', 'note', 'opening_hours',
       'access', 'description', 'leisure', 'tourism', 'surface', 'landcover',
       'natural', 'type', 'geometry', 'area', 'operator',
       'playground', 'fee', 'charge', 'fence_type', 'garden:type']]

In [159]:
clean_all_green_space_gdf.shape[0]

22363

## Filter polygon that area < 100m2 out

In [160]:
clean_all_green_space_gdf = clean_all_green_space_gdf[clean_all_green_space_gdf['area'] > 100]

In [161]:
clean_all_green_space_gdf.shape[0]

18748

In [172]:
clean_all_green_space_gdf.columns

Index(['element', 'id', 'landuse', 'name', 'barrier', 'note', 'opening_hours',
       'access', 'description', 'leisure', 'tourism', 'surface', 'landcover',
       'natural', 'type', 'geometry', 'area', 'operator', 'playground', 'fee',
       'charge', 'fence_type', 'garden:type', 'is_public'],
      dtype='object')

In [189]:
for i in clean_all_green_space_gdf.columns:
    if(i=='barrier')|(i=='access')|(i=='surface')|(i=='landcover')|(i=='natural')|(i=='fee')|(i=='charge')|(i=='garden:type'):
        print(i, ":", clean_all_green_space_gdf[i].unique())

barrier : [None 'fence' 'wall' 'hedge']
access : [None 'private' 'yes' 'no' 'permissive' 'customers' 'permit' 'school']
surface : [None 'grass' 'gravel' 'tartan' 'artificial_turf' 'clay' 'sand' 'asphalt'
 'ground' 'fine_gravel' 'compacted' 'dirt' 'rubber' 'concrete'
 'paving_stones' 'sett' 'unpaved' 'pebblestone' 'wood' 'woodchips'
 'decoturf' 'textile' 'dirt/sand' 'acrylic' 'paved' 'textile_chips' nan]
landcover : [None 'trees' 'grass' 'bushes' 'scrub' nan]
natural : [None 'scrub' 'grassland' 'wetland' 'wood' 'water' 'plant' 'heath']
fee : [nan None 'no' 'yes']
charge : [nan None '8-14€ pro Stunde und Feld' '25 EUR' '25 EUR/24 hours'
 '15 EUR/24 hours' '12 EUR/24 hours' '9.80 EUR/24 hours']
garden:type : [nan None 'botanical' 'residential' 'community' 'show_garden' 'private']


In [191]:
print("natural tag is water:", clean_all_green_space_gdf[clean_all_green_space_gdf['natural']=='water'].shape[0])

natural tag is water: 1


## Try spatial join first

### Make all gdfs have same columns' name

In [199]:
gdf_list = [green_landuse_area, green_leisure_area_new, green_natural_area, green_campsite_area]
columns_list = []
for i in gdf_list:
    columns_list += i.columns.tolist()

# remove duplicate columns' name
columns_list = list(dict.fromkeys(columns_list))
print(columns_list)

['element', 'id', 'landuse', 'name', 'barrier', 'note', 'opening_hours', 'access', 'description', 'leisure', 'tourism', 'surface', 'landcover', 'amenity', 'natural', 'type', 'geometry', 'area', 'indoor', 'operator', 'playground', 'fee', 'charge', 'fence_type', 'garden:type', 'wall', 'created_by', 'addr:city', 'addr:country', 'addr:housenumber', 'addr:postcode', 'addr:street', 'note:de', 'phone', 'tents', 'website', 'capacity', 'covered', 'backrest', 'table', 'layer', 'contact:phone', 'check_date', 'heritage', 'historic', 'material', 'start_date', 'wikimedia_commons', 'addr:suburb', 'source', 'fireplace', 'image', 'seats', 'shelter_type', 'barbecue_grill', 'drinking_water', 'openfire', 'roof', 'shelter', 'wheelchair', 'ele', 'capacity:tents', 'caravans', 'reservation', 'payment:cash', 'sanitary_dump_station', 'benches', 'covered:sides', 'contact:website', 'power_supply', 'water_point', 'toilets', 'nudism', 'static_caravans', 'internet_access', 'contact:email', 'contact:fax', 'building',

In [200]:
for i in gdf_list:
    temp_list = i.columns.tolist()
    temp_df = pd.DataFrame()
    for column in columns_list:
        if column not in temp_list:
            temp_df[column] = None
    i = pd.concat([i, temp_df], axis=1)

### Spatial join

In [218]:
# gdf_list = [green_landuse_area, green_leisure_area_new, green_natural_area, green_campsite_area]
joined_green_space_list = []
for i in range(len(gdf_list)):
    for j in range(len(gdf_list)):
        # right contain left or not
        temp_gdf = gdf_list[i].sjoin(gdf_list[j], how='right', predicate='within')
        print('i:', i, 'j:', j, 'row:', temp_gdf.shape[0])
        joined_green_space_list.append(temp_gdf)
joined_green_space = pd.concat(joined_green_space_list, ignore_index=True)

  df_reset.reset_index(inplace=True)
  df_reset.reset_index(inplace=True)


i: 0 j: 0 row: 13941
i: 0 j: 1 row: 4279
i: 0 j: 2 row: 4616


  df_reset.reset_index(inplace=True)
  df_reset.reset_index(inplace=True)


i: 0 j: 3 row: 42
i: 1 j: 0 row: 13917


  df_reset.reset_index(inplace=True)
  df_reset.reset_index(inplace=True)
  df_reset.reset_index(inplace=True)


i: 1 j: 1 row: 4392
i: 1 j: 2 row: 4615
i: 1 j: 3 row: 54
i: 2 j: 0 row: 13952
i: 2 j: 1 row: 4016


  df_reset.reset_index(inplace=True)
  df_reset.reset_index(inplace=True)
  df_reset.reset_index(inplace=True)
  df_reset.reset_index(inplace=True)


i: 2 j: 2 row: 4639
i: 2 j: 3 row: 78
i: 3 j: 0 row: 13759
i: 3 j: 1 row: 3957


  df_reset.reset_index(inplace=True)


i: 3 j: 2 row: 4610
i: 3 j: 3 row: 40


  joined_green_space = pd.concat(joined_green_space_list, ignore_index=True)
  joined_green_space = pd.concat(joined_green_space_list, ignore_index=True)
  joined_green_space = pd.concat(joined_green_space_list, ignore_index=True)
  joined_green_space = pd.concat(joined_green_space_list, ignore_index=True)


In [219]:
joined_green_space['id_left'] = joined_green_space['id_left'].fillna(-999)
joined_green_space['id_left'] = joined_green_space['id_left'].astype(int).astype(str)
joined_green_space['id_right'] = joined_green_space['id_right'].astype(str)

In [220]:
joined_green_space.shape[0]

90907

## Clean data

In [224]:
# remove itself within itself row
joined_green_space = joined_green_space[joined_green_space['id_left'] != joined_green_space['id_right']]

In [225]:
joined_green_space.shape[0]

68412

In [233]:
# comment this line out since now we aim to keep the small polygon and see whether meringing them is making sense or not
#
# clean_joined_green_space = joined_green_space[~joined_green_space['id_right'].isin(joined_green_space['id_left'])]

In [234]:
clean_joined_green_space = joined_green_space.drop_duplicates(subset=['id_right', 'id_left'])

In [235]:
clean_joined_green_space.shape[0]

24242

In [236]:
joined_green_space[joined_green_space['id_right'] == '365345'][['id_left', 'id_right', 'area_left', 'area_right']]

Unnamed: 0,id_left,id_right,area_left,area_right
14734,157942,365345,94969.782463,1.517605e+06
14735,157943,365345,100603.258852,1.517605e+06
14736,157944,365345,285535.962086,1.517605e+06
14737,396375,365345,1493.478101,1.517605e+06
14738,421481,365345,181451.349952,1.517605e+06
...,...,...,...,...
60602,28889290,365345,150.398311,1.517605e+06
60603,28889293,365345,49.059637,1.517605e+06
60604,28889296,365345,381.918061,1.517605e+06
60605,28889298,365345,71.053700,1.517605e+06


In [237]:
clean_joined_green_space[(clean_joined_green_space['id_right'] == '1340028517')][['id_left', 'id_right', 'area_left', 'area_right', 'name_right', 'element_right']]

Unnamed: 0,id_left,id_right,area_left,area_right,name_right,element_right
36638,3645255648,1340028517,0.0,8049.696317,,way
36639,359896413,1340028517,216.456693,8049.696317,,way
59652,-999,1340028517,,8049.696317,,way


In [238]:
clean_joined_green_space[(clean_joined_green_space['id_right'] == '3645255648')][['id_left', 'id_right', 'area_left', 'area_right', 'name_right', 'element_right']]

Unnamed: 0,id_left,id_right,area_left,area_right,name_right,element_right
14199,-999,3645255648,,0.0,,node


In [245]:
print("before filter surface:", clean_joined_green_space.shape[0])
new_clean_joined_green_space = clean_joined_green_space[~((clean_joined_green_space['surface_right']!='grass')&(~clean_joined_green_space['surface_right'].isna()))]
print("after filter surface:", new_clean_joined_green_space.shape[0])

before filter surface: 24242
after filter surface: 23677


## Explore after merge

In [248]:
print("check conflict of the access tag between small and large polygon")
print("small polygon and large polygon that have access tag:", clean_joined_green_space[(~clean_joined_green_space['access_left'].isna())&(~clean_joined_green_space['access_right'].isna())][['access_left', 'access_right', 'element_left', 'element_right', 'geometry']].shape[0])

check conflict of the access tag between small and large polygon
small polygon and large polygon that have access tag: 32


In [246]:
clean_joined_green_space[(~clean_joined_green_space['access_left'].isna())&(~clean_joined_green_space['access_right'].isna())][['access_left', 'access_right', 'element_left', 'element_right', 'geometry']]

Unnamed: 0,access_left,access_right,element_left,element_right,geometry
24029,yes,yes,way,way,"POLYGON ((408770.511 5656024.606, 408758.787 5..."
29131,private,private,node,way,"POLYGON ((404943.7 5651777.935, 404942.329 565..."
30446,private,private,node,way,"POLYGON ((407994.853 5654986.764, 408005.88 56..."
30447,private,private,node,way,"POLYGON ((407976.826 5655068.798, 407983.383 5..."
37609,private,private,way,relation,"POLYGON ((411168.668 5659687.749, 411156.693 5..."
37788,yes,yes,node,way,"POLYGON ((415732.56 5655709.506, 415741.359 56..."
37789,yes,yes,node,way,"POLYGON ((415732.56 5655709.506, 415741.359 56..."
37820,yes,yes,node,way,"POLYGON ((408908.377 5654964.766, 408915.736 5..."
37894,private,private,way,way,"POLYGON ((413884.249 5656446.789, 413887.46 56..."
38564,yes,yes,node,way,"POLYGON ((409256.384 5655642.846, 409257.143 5..."


In [243]:
clean_joined_green_space[(~clean_joined_green_space['access_left'].isna())&(~clean_joined_green_space['access_right'].isna())][['access_left', 'access_right', 'element_left', 'element_right', 'geometry']].explore()