# Import library

In [228]:
import osmnx as ox
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Import data

## Green space from landuse key

In [229]:
green_landuse_area = gpd.read_file('../data/raw/osm/green space/green_landuse_area.geojson')

In [230]:
green_landuse_area = green_landuse_area.to_crs(epsg=32633)

In [231]:
green_landuse_area.columns

Index(['element', 'id', 'landuse', 'name', 'area:highway', 'old_name',
       'source', 'addr:city', 'addr:country', 'addr:housenumber',
       'addr:postcode', 'addr:street', 'email', 'website', 'barrier',
       'operator', 'phone', 'sport', 'created_by', 'note', 'opening_hours',
       'wikidata', 'check_date', 'access', 'description', 'leisure',
       'contact:email', 'contact:phone', 'contact:website', 'start_date',
       'disused:leisure', 'leaf_type', 'toilets:wheelchair', 'tourism',
       'wheelchair', 'height', 'surface', 'leaf_cycle', 'comment:history',
       'alt_name', 'kerb', 'produce', 'trees', 'name:signed', 'description:de',
       'embankment', 'building:levels', 'landcover', 'image', 'noname',
       'amenity', 'lit', 'parking', 'smoothness', 'meadow', 'wikipedia',
       'genus:de', 'natural', 'species', 'boundary', 'protect_class',
       'protection_title', 'ref', 'short_protection_title', 'crop', 'fixme',
       'level', 'type', 'name:hsb', 'layer', 'geometry'

In [232]:
green_landuse_area = green_landuse_area[['element', 'id', 'landuse', 
                                         'name', 'barrier', 'note', 
                                         'opening_hours', 'access', 'description', 
                                         'leisure', 'tourism', 'surface', 
                                         'landcover', 'amenity', 'natural', 
                                         'type', 'geometry']]

In [233]:
green_landuse_area['area'] = green_landuse_area['geometry'].area

In [234]:
green_landuse_area['access'].unique()

array([None, 'yes', 'private'], dtype=object)

In [235]:
print('total rows:', green_landuse_area.shape[0])  # number of rows
for i in green_landuse_area['access'].unique():
    if i == None:
        print('access is None:', green_landuse_area[green_landuse_area['access'].isnull()].shape[0])
    else:
        # print number of rows that has access e.g. private,yes,none or others
        print('access is', i , ':', green_landuse_area[green_landuse_area['access']==i].shape[0])

total rows: 2352
access is None: 2340
access is yes : 1
access is private : 11


In [236]:
green_landuse_area['type'].unique()

array(['multipolygon', None], dtype=object)

In [237]:
green_landuse_area['geometry'].type.unique()

array(['MultiPolygon', 'Polygon'], dtype=object)

## Green space from leisure key

In [238]:
green_leisure_area = gpd.read_file('../data/raw/osm/green space/green_leisure_area.geojson')

In [239]:
green_leisure_area = green_leisure_area.to_crs(epsg=32633)

In [240]:
green_leisure_area.columns

Index(['element', 'id', 'access', 'leisure', 'name', 'opening_hours',
       'operator', 'wheelchair', 'sport', 'description', 'layer', 'check_date',
       'playground', 'source', 'lit', 'max_age', 'min_age', 'addr:city',
       'addr:housenumber', 'addr:postcode', 'addr:street', 'note', 'surface',
       'indoor', 'check_date:opening_hours', 'cn_tud:token', 'fee', 'website',
       'garden:type', 'operator:type', 'reservation', 'email', 'old_name',
       'old_name:1897', 'alt_name', 'loc_name', 'wikidata', 'wikipedia',
       'heritage', 'name:etymology:wikidata', 'created_by', 'addr:country',
       'barrier', 'dog', 'playground:theme', 'toilets:wheelchair',
       'contact:email', 'contact:fax', 'contact:phone', 'contact:website',
       'attraction', 'name:cs', 'species:wikidata', 'landuse', 'image', 'ref',
       'phone', 'natural', 'area', 'bicycle', 'service', 'vehicle', 'hoops',
       'height', 'place', 'operator:wikidata', 'start_date', 'species:de',
       'description:de'

In [241]:
green_leisure_area['charge'].unique()

array([None, '8-14€ pro Stunde und Feld'], dtype=object)

In [242]:
green_leisure_area = green_leisure_area[['element', 'id', 'access', 
                                         'leisure', 'name', 'opening_hours', 'indoor',
                                         'operator', 'description', 'playground',
                                         'note', 'surface', 'fee', 
                                         'barrier', 'landuse', 'natural',
                                         'charge', 'landcover', 'fence_type', 
                                         'wall', 'type', 'geometry']]

In [243]:
green_leisure_area['area'] = green_leisure_area['geometry'].area

In [244]:
green_leisure_area['access'].unique()

array(['yes', 'private', None, 'customers', 'permissive', 'permit'],
      dtype=object)

In [245]:
print('total rows:', green_leisure_area.shape[0])  # number of rows
for i in green_leisure_area['access'].unique():
    if i == None:
        print('access is None:', green_leisure_area[green_leisure_area['access'].isnull()].shape[0])
    else:
        # print number of rows that has access e.g. private,yes,none or others
        print('access is', i , ':', green_leisure_area[green_leisure_area['access']==i].shape[0])

total rows: 1136
access is yes : 253
access is private : 138
access is None: 696
access is customers : 27
access is permissive : 19
access is permit : 3


### Separate playground out as we will use it to map PUGS

In [246]:
# filter playgrounds that are indoor
playground_gdf = green_leisure_area[(green_leisure_area['leisure']=='playground') & ~(green_leisure_area['indoor']=='yes')]

In [247]:
green_leisure_area_new = green_leisure_area[(green_leisure_area['leisure']!='playground') | (green_leisure_area['indoor']=='yes')]

## Green space from nature key

In [248]:
green_natural_area = gpd.read_file('../data/raw/osm/green space/green_natural_area.geojson')

In [249]:
green_natural_area = green_natural_area.to_crs(epsg=32633)

In [250]:
green_natural_area.columns

Index(['element', 'id', 'natural', 'attraction', 'name', 'name:cs',
       'species:de', 'species:wikidata', 'wheelchair', 'leaf_type', 'fixme',
       'note', 'leisure', 'leaf_cycle', 'description', 'embankment', 'barrier',
       'landuse', 'source', 'height', 'old_name:cs', 'level', 'man_made',
       'description:de', 'image', 'landcover', 'comment', 'access', 'fee',
       'species:wikipedia', 'type', 'geometry'],
      dtype='object')

In [251]:
green_natural_area['comment'].unique()

array([None,
       'Mitte November 2018 startete eine heftige Baggeraktivität hier'],
      dtype=object)

In [252]:
green_natural_area = green_natural_area[['element', 'id', 'natural', 
                                         'name', 'note', 'leisure',
                                         'description', 'barrier', 'landuse',
                                         'landcover', 'access', 'fee', 'type', 'geometry']]

In [253]:
green_natural_area['area'] = green_natural_area['geometry'].area

In [254]:
green_natural_area['access'].unique()

array([None, 'yes'], dtype=object)

In [255]:
print('total rows:', green_natural_area.shape[0])  # number of rows
for i in green_natural_area['access'].unique():
    if i == None:
        print('access is None:', green_natural_area[green_natural_area['access'].isnull()].shape[0])
    else:
        # print number of rows that has access e.g. private,yes,none or others
        print('access is', i , ':', green_natural_area[green_natural_area['access']==i].shape[0])

total rows: 529
access is None: 526
access is yes : 3


## Green space from camp site

In [256]:
green_campsite_area = gpd.read_file('../data/raw/osm/green space/green_campsite_area.geojson')

In [257]:
green_campsite_area = green_campsite_area.to_crs(epsg=32633)

In [258]:
green_campsite_area.head()

Unnamed: 0,element,id,tourism,shelter,seats,covered,access,addr:city,addr:housenumber,addr:postcode,addr:street,geometry
0,node,3128780402,picnic_site,,,,,,,,,POINT (412264.551 5653901.83)
1,node,3163118403,picnic_site,no,,,,,,,,POINT (410834.202 5654098.847)
2,node,3806645587,picnic_site,no,,,,,,,,POINT (410891.184 5653950.093)
3,node,4352882229,picnic_site,,,,,,,,,POINT (409894.214 5653944.981)
4,node,4888415416,picnic_site,,20.0,,,,,,,POINT (410702.148 5653820.301)


In [259]:
green_campsite_area['area'] = green_campsite_area['geometry'].area

In [260]:
print('total rows:', green_campsite_area.shape[0])  # number of rows
for i in green_campsite_area['access'].unique():
    if i == None:
        print('access is None:', green_campsite_area[green_campsite_area['access'].isnull()].shape[0])
    else:
        # print number of rows that has access e.g. private,yes,none or others
        print('access is', i , ':', green_campsite_area[green_campsite_area['access']==i].shape[0])

total rows: 11
access is None: 9
access is private : 2


## POI/Amenity from amenity 

In [261]:
poi_amenity = gpd.read_file('../data/raw/osm/amenity/poi_amenity.geojson')

In [262]:
poi_amenity = poi_amenity.to_crs(epsg=32633)

In [263]:
poi_amenity.head()

Unnamed: 0,element,id,amenity,backrest,check_date,material,covered,colour,seats,armrest,...,lit,bench:type,vending,access,operator,deposit_ring,leisure,barrier,man_made,geometry
0,node,255925960,waste_basket,,NaT,,,,,,...,,,,,,,,,,POINT (409010.616 5654975)
1,node,266687099,bench,yes,NaT,,,,,,...,,,,,,,,,,POINT (408942.691 5654898.334)
2,node,304869266,bench,yes,2021-08-13,,,,,,...,,,,,,,,,,POINT (412089.991 5656475.039)
3,node,304869267,bench,yes,2021-08-13,,,,,,...,,,,,,,,,,POINT (412091.465 5656481.777)
4,node,304869268,bench,yes,2021-08-13,,,,,,...,,,,,,,,,,POINT (412090.847 5656487.85)


## POI/Amenity from leisure

In [264]:
poi_leisure = gpd.read_file('../data/raw/osm/amenity/poi_leisure.geojson')

In [265]:
poi_leisure = poi_leisure.to_crs(epsg=32633)

In [266]:
poi_leisure.head()

Unnamed: 0,element,id,access,leisure,name,opening_hours,operator,wheelchair,sport,layer,...,capacity,fence_type,fixme,addr:country,contact:email,contact:mobile,contact:website,level,type,geometry
0,node,266593887,yes,playground,Würzburger Straße - Park,Mo-Su 08:00-22:00,Landeshauptstadt Dresden,,,,...,,,,,,,,,,POINT (409905.579 5653957.552)
1,node,274970363,private,playground,,,Vonovia,yes,,,...,,,,,,,,,,POINT (410248.323 5654411.132)
2,node,280462694,yes,playground,,,,,,,...,,,,,,,,,,POINT (413613.876 5655700.3)
3,node,289488035,yes,playground,Spielplatz Schanzenstraße,,,yes,table_tennis,,...,,,,,,,,,,POINT (412445.204 5659013.112)
4,node,372545516,customers,playground,,,,,,,...,,,,,,,,,,POINT (414579.736 5654048.389)


## Barrier

In [447]:
poi_barrier = gpd.read_file('../data/raw/osm/amenity/poi_barrier.geojson')

In [448]:
poi_barrier = poi_barrier.to_crs(epsg=32633)

In [449]:
poi_barrier.head()

Unnamed: 0,element,id,access,barrier,locked,bicycle,foot,material,wheelchair,created_by,...,start_date,amenity,maxlength,maxheight,colour,note:access,door,operator,fixme,geometry
0,node,26750550,private,gate,,,,,,,...,,,,,,,,,,POINT (413021.647 5658399.619)
1,node,26848700,,gate,,,,,,,...,,,,,,,,,,POINT (408388.208 5657674.043)
2,node,91515286,,gate,,,,,,,...,,,,,,,,,,POINT (410429.15 5656568.648)
3,node,104550850,,gate,,,,,,,...,,,,,,,,,,POINT (409447.582 5658644.222)
4,node,104605769,,gate,,,,,,,...,,,,,,,,,,POINT (410618.188 5657957.624)


## Road network

In [267]:
road_network_edges = gpd.read_file('../data/raw/osm/network/road network/road_network_edges.geojson')
road_network_nodes = gpd.read_file('../data/raw/osm/network/road network/road_network_nodes.geojson')

Skipping field highway: unsupported OGR type: 5
Skipping field lanes: unsupported OGR type: 5
Skipping field maxspeed: unsupported OGR type: 5
Skipping field name: unsupported OGR type: 5
Skipping field width: unsupported OGR type: 5


## Cycle network

In [268]:
cycle_network_edges = gpd.read_file('../data/raw/osm/network/cycle network/cycle_network_edges.geojson')
cycle_network_nodes = gpd.read_file('../data/raw/osm/network/cycle network/cycle_network_nodes.geojson')

Skipping field highway: unsupported OGR type: 5
Skipping field lanes: unsupported OGR type: 5
Skipping field maxspeed: unsupported OGR type: 5
Skipping field name: unsupported OGR type: 5
Skipping field ref: unsupported OGR type: 5
Skipping field access: unsupported OGR type: 5
Skipping field width: unsupported OGR type: 5
Skipping field service: unsupported OGR type: 5
Skipping field tunnel: unsupported OGR type: 5


## Footpath network

In [269]:
footpath_network_edges = gpd.read_file('../data/raw/osm/network/footpath network/footpath_network_edges.geojson')
footpath_network_nodes = gpd.read_file('../data/raw/osm/network/footpath network/footpath_network_nodes.geojson')

Skipping field highway: unsupported OGR type: 5
Skipping field lanes: unsupported OGR type: 5
Skipping field maxspeed: unsupported OGR type: 5
Skipping field name: unsupported OGR type: 5
Skipping field ref: unsupported OGR type: 5
Skipping field access: unsupported OGR type: 5
Skipping field bridge: unsupported OGR type: 5
Skipping field width: unsupported OGR type: 5
Skipping field service: unsupported OGR type: 5
Skipping field tunnel: unsupported OGR type: 5


# Spatial join green space polygons (within)
If small polygon is in larger polygon, only keep the larger polygon

## Make all gdfs to have same columns' name first

In [270]:
gdf_list = [green_landuse_area, green_leisure_area_new, green_natural_area, green_campsite_area]
columns_list = []
for i in gdf_list:
    columns_list += i.columns.tolist()

# remove duplicate columns' name
columns_list = list(dict.fromkeys(columns_list))
print(columns_list)

['element', 'id', 'landuse', 'name', 'barrier', 'note', 'opening_hours', 'access', 'description', 'leisure', 'tourism', 'surface', 'landcover', 'amenity', 'natural', 'type', 'geometry', 'area', 'indoor', 'operator', 'playground', 'fee', 'charge', 'fence_type', 'wall', 'shelter', 'seats', 'covered', 'addr:city', 'addr:housenumber', 'addr:postcode', 'addr:street']


In [271]:
for i in gdf_list:
    temp_list = i.columns.tolist()
    for column in columns_list:
        if column not in temp_list:
            i[column] = None

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = 

## Spatial join all gdfs (green space gdf)

In [272]:
# gdf_list = [green_landuse_area, green_leisure_area_new, green_natural_area, green_campsite_area]
count = 0
for i in range(len(gdf_list)):
    for j in range(len(gdf_list)):
        # right contain left or not
        temp_gdf = gdf_list[i].sjoin(gdf_list[j], how='right', predicate='within')
        print('i:', i, 'j:', j, 'row:', temp_gdf.shape[0])
        if count == 0:
            joined_green_space = temp_gdf
            print(temp_gdf.columns)
        else:
            joined_green_space = pd.concat([joined_green_space, temp_gdf], ignore_index=True)
            print(joined_green_space.columns)
        count += 1

i: 0 j: 0 row: 2358
Index(['index_left', 'element_left', 'id_left', 'landuse_left', 'name_left',
       'barrier_left', 'note_left', 'opening_hours_left', 'access_left',
       'description_left', 'leisure_left', 'tourism_left', 'surface_left',
       'landcover_left', 'amenity_left', 'natural_left', 'type_left',
       'area_left', 'indoor_left', 'operator_left', 'playground_left',
       'fee_left', 'charge_left', 'fence_type_left', 'wall_left',
       'shelter_left', 'seats_left', 'covered_left', 'addr:city_left',
       'addr:housenumber_left', 'addr:postcode_left', 'addr:street_left',
       'element_right', 'id_right', 'landuse_right', 'name_right',
       'barrier_right', 'note_right', 'opening_hours_right', 'access_right',
       'description_right', 'leisure_right', 'tourism_right', 'surface_right',
       'landcover_right', 'amenity_right', 'natural_right', 'type_right',
       'geometry', 'area_right', 'indoor_right', 'operator_right',
       'playground_right', 'fee_right',

In [273]:
joined_green_space['id_left'] = joined_green_space['id_left'].fillna(-999)
joined_green_space['id_left'] = joined_green_space['id_left'].astype(int).astype(str)
joined_green_space['id_right'] = joined_green_space['id_right'].astype(str)

In [274]:
joined_green_space[joined_green_space['id_right'] == '365345'][['id_left', 'id_right', 'area_left', 'area_right']]

Unnamed: 0,id_left,id_right,area_left,area_right
2468,157942,365345,94969.782463,1.517605e+06
2469,157943,365345,100603.258852,1.517605e+06
2470,157944,365345,285535.962086,1.517605e+06
2471,396375,365345,1493.478101,1.517605e+06
2472,421481,365345,181451.349952,1.517605e+06
...,...,...,...,...
9942,28889290,365345,150.398311,1.517605e+06
9943,28889293,365345,49.059637,1.517605e+06
9944,28889296,365345,381.918061,1.517605e+06
9945,28889298,365345,71.053700,1.517605e+06


In [275]:
joined_green_space.shape[0]

14720

## Remove itself within itself row

In [276]:
joined_green_space = joined_green_space[joined_green_space['id_left'] != joined_green_space['id_right']]

In [277]:
joined_green_space.shape[0]

11095

In [278]:
joined_green_space[joined_green_space['id_right'] == '365345'][['id_left', 'id_right', 'area_left', 'area_right']]['id_left'].unique()

array(['157942', '157943', '157944', '396375', '421481', '36006691',
       '36006694', '36006695', '36006696', '36006697', '36006698',
       '36006699', '36006700', '36006701', '36006702', '36006703',
       '36006704', '36006705', '36006706', '36006707', '36006708',
       '36006709', '36006710', '36006711', '36006712', '36006713',
       '36006714', '36006715', '36006716', '36006720', '36006721',
       '36006722', '36006723', '36006724', '36006725', '36006726',
       '36006728', '36006729', '36006732', '38867041', '48991210',
       '48991211', '48991217', '48991221', '48991222', '48991223',
       '48992703', '48992704', '48992705', '48992706', '48993727',
       '48994214', '48994220', '48994225', '48994228', '48994231',
       '48994239', '48994242', '389227741', '424056390', '424202562',
       '424202563', '424202564', '424202566', '424202568', '424202569',
       '424202570', '424202571', '426633229', '426633231', '776328519',
       '776328522', '1312793920', '1312793921',

In [279]:
clean_joined_green_space = joined_green_space[~joined_green_space['id_right'].isin(joined_green_space['id_left'])]

In [280]:
clean_joined_green_space

Unnamed: 0,index_left,element_left,id_left,landuse_left,name_left,barrier_left,note_left,opening_hours_left,access_left,description_left,...,charge_right,fence_type_right,wall_right,shelter_right,seats_right,covered_right,addr:city_right,addr:housenumber_right,addr:postcode_right,addr:street_right
18,1455.0,way,709615762,meadow,,,,,,,...,,,,,,,,,,
685,1058.0,way,345110108,grass,,,,,,,...,,,,,,,,,,
709,1080.0,way,352739233,forest,,,,,,,...,,,,,,,,,,
1215,1942.0,way,928898643,meadow,Glatthaferwiese am Elbufer Johannstadt,,,,,Artenreiche Glatthaferwiese mit hohem floristi...,...,,,,,,,,,,
1408,1404.0,way,578835840,forest,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14704,,,-999,,,,,,,,...,,,,,,,,,,
14705,,,-999,,,,,,,,...,,,,,,,,,,
14706,,,-999,,,,,,,,...,,,,,,,,,,
14707,,,-999,,,,,,,,...,,,,,,,,,,


In [281]:
# 157942 is small polygon inside
clean_joined_green_space[clean_joined_green_space['id_right'] == '157942'][['id_left', 'id_right', 'area_left', 'area_right']]

Unnamed: 0,id_left,id_right,area_left,area_right


In [282]:
clean_joined_green_space[clean_joined_green_space['id_right'] == '365345']

Unnamed: 0,index_left,element_left,id_left,landuse_left,name_left,barrier_left,note_left,opening_hours_left,access_left,description_left,...,charge_right,fence_type_right,wall_right,shelter_right,seats_right,covered_right,addr:city_right,addr:housenumber_right,addr:postcode_right,addr:street_right
2468,1.0,relation,157942,forest,,,,,,,...,,,,,,,,,,
2469,2.0,relation,157943,forest,,,,,,,...,,,,,,,,,,
2470,3.0,relation,157944,forest,,,,,,,...,,,,,,,,,,
2471,4.0,relation,396375,grass,,,,,,,...,,,,,,,,,,
2472,5.0,relation,421481,forest,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9942,32.0,way,28889290,,,,,,,,...,,,,,,,,,,
9943,33.0,way,28889293,,,,,,,,...,,,,,,,,,,
9944,34.0,way,28889296,,,,,,,,...,,,,,,,,,,
9945,35.0,way,28889298,,,,,,,,...,,,,,,,,,,


In [283]:
clean_joined_green_space[clean_joined_green_space['id_right'] == '365345'].columns

Index(['index_left', 'element_left', 'id_left', 'landuse_left', 'name_left',
       'barrier_left', 'note_left', 'opening_hours_left', 'access_left',
       'description_left', 'leisure_left', 'tourism_left', 'surface_left',
       'landcover_left', 'amenity_left', 'natural_left', 'type_left',
       'area_left', 'indoor_left', 'operator_left', 'playground_left',
       'fee_left', 'charge_left', 'fence_type_left', 'wall_left',
       'shelter_left', 'seats_left', 'covered_left', 'addr:city_left',
       'addr:housenumber_left', 'addr:postcode_left', 'addr:street_left',
       'element_right', 'id_right', 'landuse_right', 'name_right',
       'barrier_right', 'note_right', 'opening_hours_right', 'access_right',
       'description_right', 'leisure_right', 'tourism_right', 'surface_right',
       'landcover_right', 'amenity_right', 'natural_right', 'type_right',
       'geometry', 'area_right', 'indoor_right', 'operator_right',
       'playground_right', 'fee_right', 'charge_right', 'fe

In [284]:
clean_joined_green_space[(clean_joined_green_space['id_right'] == '365345')][['id_left', 'id_right', 'area_left', 'area_right', 'name_right', 'element_right']]

Unnamed: 0,id_left,id_right,area_left,area_right,name_right,element_right
2468,157942,365345,94969.782463,1.517605e+06,Großer Garten,relation
2469,157943,365345,100603.258852,1.517605e+06,Großer Garten,relation
2470,157944,365345,285535.962086,1.517605e+06,Großer Garten,relation
2471,396375,365345,1493.478101,1.517605e+06,Großer Garten,relation
2472,421481,365345,181451.349952,1.517605e+06,Großer Garten,relation
...,...,...,...,...,...,...
9942,28889290,365345,150.398311,1.517605e+06,Großer Garten,relation
9943,28889293,365345,49.059637,1.517605e+06,Großer Garten,relation
9944,28889296,365345,381.918061,1.517605e+06,Großer Garten,relation
9945,28889298,365345,71.053700,1.517605e+06,Großer Garten,relation


In [285]:
clean_joined_green_space['id_right'].unique()

array(['3807857', '238260545', '239351758', ..., '30642601', '431068944',
       '431069414'], dtype=object)

In [286]:
clean_joined_green_space[(clean_joined_green_space['id_right'] == '1340028517')][['id_left', 'id_right', 'area_left', 'area_right', 'name_right', 'element_right']]

Unnamed: 0,id_left,id_right,area_left,area_right,name_right,element_right
6122,3645255648,1340028517,0.0,8049.696317,,way
9828,-999,1340028517,,8049.696317,,way
13454,-999,1340028517,,8049.696317,,way


In [287]:
clean_joined_green_space[clean_joined_green_space['id_left'] == '359896413']

Unnamed: 0,index_left,element_left,id_left,landuse_left,name_left,barrier_left,note_left,opening_hours_left,access_left,description_left,...,charge_right,fence_type_right,wall_right,shelter_right,seats_right,covered_right,addr:city_right,addr:housenumber_right,addr:postcode_right,addr:street_right


In [288]:
# green_leisure_area[green_leisure_area['id'] == 359896413].explore()

In [289]:
# clean_joined_green_space[clean_joined_green_space['id_right'] == '1340028517'].explore()

In [290]:
clean_joined_green_space = clean_joined_green_space.drop_duplicates(subset=['id_right', 'id_left'])

In [291]:
clean_joined_green_space[(clean_joined_green_space['id_right'] == '1340028517')][['id_left', 'id_right', 'area_left', 'area_right', 'name_right', 'element_right']]

Unnamed: 0,id_left,id_right,area_left,area_right,name_right,element_right
6122,3645255648,1340028517,0.0,8049.696317,,way
9828,-999,1340028517,,8049.696317,,way


In [292]:
clean_joined_green_space

Unnamed: 0,index_left,element_left,id_left,landuse_left,name_left,barrier_left,note_left,opening_hours_left,access_left,description_left,...,charge_right,fence_type_right,wall_right,shelter_right,seats_right,covered_right,addr:city_right,addr:housenumber_right,addr:postcode_right,addr:street_right
18,1455.0,way,709615762,meadow,,,,,,,...,,,,,,,,,,
685,1058.0,way,345110108,grass,,,,,,,...,,,,,,,,,,
709,1080.0,way,352739233,forest,,,,,,,...,,,,,,,,,,
1215,1942.0,way,928898643,meadow,Glatthaferwiese am Elbufer Johannstadt,,,,,Artenreiche Glatthaferwiese mit hohem floristi...,...,,,,,,,,,,
1408,1404.0,way,578835840,forest,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13567,,,-999,,,,,,,,...,,,,,,,,,,
13598,,,-999,,,,,,,,...,,,,,,,,,,
13615,3.0,node,4352882229,,,,,,,,...,,,,,,,,,,
13879,,,-999,,,,,,,,...,,,,,,,,,,


## Filter indoor space out

In [293]:
clean_joined_green_space = clean_joined_green_space[(clean_joined_green_space['indoor_left'] != 'yes')|
                                                    (clean_joined_green_space['indoor_right'] != 'yes')]

## Clean columns

In [294]:
clean_joined_green_space.columns

Index(['index_left', 'element_left', 'id_left', 'landuse_left', 'name_left',
       'barrier_left', 'note_left', 'opening_hours_left', 'access_left',
       'description_left', 'leisure_left', 'tourism_left', 'surface_left',
       'landcover_left', 'amenity_left', 'natural_left', 'type_left',
       'area_left', 'indoor_left', 'operator_left', 'playground_left',
       'fee_left', 'charge_left', 'fence_type_left', 'wall_left',
       'shelter_left', 'seats_left', 'covered_left', 'addr:city_left',
       'addr:housenumber_left', 'addr:postcode_left', 'addr:street_left',
       'element_right', 'id_right', 'landuse_right', 'name_right',
       'barrier_right', 'note_right', 'opening_hours_right', 'access_right',
       'description_right', 'leisure_right', 'tourism_right', 'surface_right',
       'landcover_right', 'amenity_right', 'natural_right', 'type_right',
       'geometry', 'area_right', 'indoor_right', 'operator_right',
       'playground_right', 'fee_right', 'charge_right', 'fe

In [295]:
clean_joined_green_space = clean_joined_green_space.drop(columns=['index_left','shelter_left', 'shelter_right',
                                                                  'seats_left', 'seats_right', 'indoor_left', 'indoor_right',
                                                                  'covered_left', 'covered_right','addr:city_left', 'addr:city_right',
                                                                  'addr:housenumber_left', 'addr:housenumber_right', 
                                                                  'addr:postcode_left', 'addr:postcode_right', 
                                                                  'addr:street_left', 'addr:street_right'])

# Classify public and private green spaces

## Filter out the polygon that area < 100 m2 (10m x 10m)

In [296]:
clean_joined_green_space.shape[0]

3617

In [297]:
clean_joined_green_space = clean_joined_green_space[clean_joined_green_space['area_right'] > 100]

In [298]:
clean_joined_green_space.shape[0]

2754

## Filter out the polygon that has surface isn't grass (if the surface is indicated)

In [378]:
clean_joined_green_space['surface_right'].unique()

array([None, 'grass', 'artificial_turf', 'tartan', 'asphalt', 'sand',
       'fine_gravel', 'clay', 'gravel', 'sett', 'concrete', 'dirt',
       'compacted'], dtype=object)

In [382]:
clean_joined_green_space = clean_joined_green_space[~((clean_joined_green_space['surface_right']!='grass')&(~clean_joined_green_space['surface_right'].isna()))]

In [383]:
clean_joined_green_space.shape[0]

2660

## 1. Use fee and charge tag to classify private area

In [384]:
clean_joined_green_space.loc[(clean_joined_green_space['fee_left'] == 'yes')|
                             (clean_joined_green_space['fee_right'] == 'yes')|
                             (clean_joined_green_space['charge_left'].isna() == False)|
                             (clean_joined_green_space['charge_right'].isna() == False), 'is_public'] = 'no'

In [385]:
clean_joined_green_space[clean_joined_green_space['is_public'] == 'no']

Unnamed: 0,element_left,id_left,landuse_left,name_left,barrier_left,note_left,opening_hours_left,access_left,description_left,leisure_left,...,type_right,geometry,area_right,operator_right,playground_right,fee_right,charge_right,fence_type_right,wall_right,is_public
18,way,709615762,meadow,,,,,,,,...,multipolygon,"POLYGON ((411886.704 5658882.714, 411867.463 5...",64803.254592,,,,,,,no
2616,way,120291177,farmland,,,,,,,,...,,"POLYGON ((412971.239 5655508.351, 412999.009 5...",51057.600112,TU Dresden,,,,,,no
2648,,-999,,,,,,,,,...,,"POLYGON ((410965.582 5654592.653, 411000.094 5...",4872.391375,Studentenwerk Dresden,,,,,,no
2748,,-999,,,,,,,,,...,,"POLYGON ((408533.062 5654930.365, 408534.084 5...",998.696967,,,,,,,no
2851,,-999,,,,,,,,,...,,"POLYGON ((411835.982 5658830.901, 411842.71 56...",312.996988,,,,,,,no
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6054,,-999,,,,,,,,,...,,"POLYGON ((410476.155 5654097.709, 410447.197 5...",6781.882997,,,,,,,no
6346,node,11486623472,,,,,,,,pitch,...,,"POLYGON ((410965.582 5654592.653, 411000.094 5...",4872.391375,Studentenwerk Dresden,,,,,,no
6347,node,11486623473,,,,,,,,pitch,...,,"POLYGON ((410965.582 5654592.653, 411000.094 5...",4872.391375,Studentenwerk Dresden,,,,,,no
7748,,-999,,,,,,,,,...,,"POLYGON ((410382.619 5658309.991, 410353.891 5...",17069.166334,,,,,,,no


## 2. Use access tag to classify
- Access = yes/permissive -> public
- Access = no/private/pcustomers/restricted -> private

Note: Consider both *_right* and *_left* column (Assumption is if the access of small polygon is yes, the access of larger polygon is yes as well)

In [386]:
clean_joined_green_space[((clean_joined_green_space['access_right'] == 'yes')|(clean_joined_green_space['access_left'] == 'yes')) & (clean_joined_green_space['is_public'] != 'no')].explore()

In [387]:
clean_joined_green_space.loc[((clean_joined_green_space['access_right'] == 'yes')|
                              (clean_joined_green_space['access_left'] == 'yes')|
                              (clean_joined_green_space['access_right'] == 'permissive')|
                              (clean_joined_green_space['access_left'] == 'permissive')) & 
                              (clean_joined_green_space['is_public'] != 'no'), 'is_public'] = 'yes'

In [388]:
clean_joined_green_space[clean_joined_green_space['is_public'].isna()]

Unnamed: 0,element_left,id_left,landuse_left,name_left,barrier_left,note_left,opening_hours_left,access_left,description_left,leisure_left,...,type_right,geometry,area_right,operator_right,playground_right,fee_right,charge_right,fence_type_right,wall_right,is_public
685,way,345110108,grass,,,,,,,,...,,"POLYGON ((410265.983 5656621.792, 410307.939 5...",4710.434439,,,,,,,
709,way,352739233,forest,,,,,,,,...,,"POLYGON ((407999.45 5658340.923, 407999.256 56...",15264.595836,,,,,,,
1215,way,928898643,meadow,Glatthaferwiese am Elbufer Johannstadt,,,,,Artenreiche Glatthaferwiese mit hohem floristi...,,...,,"POLYGON ((414018.513 5657529.482, 414019.011 5...",862049.377371,,,,,,,
1408,way,578835840,forest,,,,,,,,...,,"POLYGON ((412720.216 5655401.923, 412766.163 5...",10209.178293,,,,,,,
1409,way,776252349,forest,,,,,,,,...,,"POLYGON ((412720.216 5655401.923, 412766.163 5...",10209.178293,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10500,,-999,,,,,,,,,...,,"POLYGON ((411430.771 5656391.192, 411421.07 56...",2807.714709,,,,,,,
10571,way,359275692,,,,,,,,,...,,"POLYGON ((411602.606 5654709.696, 411468.754 5...",11211.585017,,,,,,,
11252,node,12213145709,,,,,,,,,...,,"POLYGON ((408974.199 5658841.538, 408974.971 5...",257.284645,,,,,,,
12218,,-999,,,,,,,,,...,,"POLYGON ((412445.718 5653910.036, 412416.79 56...",13966.357916,,,,,,,


In [389]:
access_tag = clean_joined_green_space['access_right'].unique().tolist()
for i in clean_joined_green_space['access_left'].unique().tolist():
    if i not in access_tag:
        access_tag.append(i)

In [390]:
access_tag = [i for i in access_tag if (str(i) != 'nan')&(i is not None)]

In [391]:
access_tag

['private', 'yes', 'permissive', 'customers', 'permit']

In [392]:
private_tag = list(access_tag)
private_tag.remove('yes')
private_tag.remove('permissive')
private_tag

['private', 'customers', 'permit']

In [393]:
clean_joined_green_space[(clean_joined_green_space['access_right'].isin(private_tag)|clean_joined_green_space['access_left'].isin(private_tag))& clean_joined_green_space['is_public'].isna()].explore()

  clean_joined_green_space[(clean_joined_green_space['access_right'].isin(private_tag)|clean_joined_green_space['access_left'].isin(private_tag))& clean_joined_green_space['is_public'].isna()].explore()


In [394]:
clean_joined_green_space[(clean_joined_green_space['access_right'].isin(private_tag)|clean_joined_green_space['access_left'].isin(private_tag))& clean_joined_green_space['is_public'].isna()].shape[0]

0

In [395]:
clean_joined_green_space.loc[(clean_joined_green_space['access_right'].isin(private_tag)|
                              clean_joined_green_space['access_left'].isin(private_tag))& 
                              clean_joined_green_space['is_public'].isna(), 'is_public'] = 'no'

In [396]:
clean_joined_green_space[clean_joined_green_space['is_public'].isna()]

Unnamed: 0,element_left,id_left,landuse_left,name_left,barrier_left,note_left,opening_hours_left,access_left,description_left,leisure_left,...,type_right,geometry,area_right,operator_right,playground_right,fee_right,charge_right,fence_type_right,wall_right,is_public
685,way,345110108,grass,,,,,,,,...,,"POLYGON ((410265.983 5656621.792, 410307.939 5...",4710.434439,,,,,,,
709,way,352739233,forest,,,,,,,,...,,"POLYGON ((407999.45 5658340.923, 407999.256 56...",15264.595836,,,,,,,
1215,way,928898643,meadow,Glatthaferwiese am Elbufer Johannstadt,,,,,Artenreiche Glatthaferwiese mit hohem floristi...,,...,,"POLYGON ((414018.513 5657529.482, 414019.011 5...",862049.377371,,,,,,,
1408,way,578835840,forest,,,,,,,,...,,"POLYGON ((412720.216 5655401.923, 412766.163 5...",10209.178293,,,,,,,
1409,way,776252349,forest,,,,,,,,...,,"POLYGON ((412720.216 5655401.923, 412766.163 5...",10209.178293,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10500,,-999,,,,,,,,,...,,"POLYGON ((411430.771 5656391.192, 411421.07 56...",2807.714709,,,,,,,
10571,way,359275692,,,,,,,,,...,,"POLYGON ((411602.606 5654709.696, 411468.754 5...",11211.585017,,,,,,,
11252,node,12213145709,,,,,,,,,...,,"POLYGON ((408974.199 5658841.538, 408974.971 5...",257.284645,,,,,,,
12218,,-999,,,,,,,,,...,,"POLYGON ((412445.718 5653910.036, 412416.79 56...",13966.357916,,,,,,,


## 3. Use leisure=park tag to classify public area

In [397]:
clean_joined_green_space[(clean_joined_green_space['leisure_right'] == 'park')&(clean_joined_green_space['is_public'].isna())].explore()

  clean_joined_green_space[(clean_joined_green_space['leisure_right'] == 'park')&(clean_joined_green_space['is_public'].isna())].explore()


In [398]:
clean_joined_green_space.loc[(clean_joined_green_space['leisure_right'] == 'park')&(clean_joined_green_space['is_public'].isna()), 'is_public'] = 'yes'

In [399]:
clean_joined_green_space[clean_joined_green_space['is_public'].isna()].shape[0]

2099

## 4. Use barrier tag to classify private area

Need to confirm that **allotments** is public or private? (Maybe in case of Germany/Dresden first)

In [400]:
clean_joined_green_space.columns

Index(['element_left', 'id_left', 'landuse_left', 'name_left', 'barrier_left',
       'note_left', 'opening_hours_left', 'access_left', 'description_left',
       'leisure_left', 'tourism_left', 'surface_left', 'landcover_left',
       'amenity_left', 'natural_left', 'type_left', 'area_left',
       'operator_left', 'playground_left', 'fee_left', 'charge_left',
       'fence_type_left', 'wall_left', 'element_right', 'id_right',
       'landuse_right', 'name_right', 'barrier_right', 'note_right',
       'opening_hours_right', 'access_right', 'description_right',
       'leisure_right', 'tourism_right', 'surface_right', 'landcover_right',
       'amenity_right', 'natural_right', 'type_right', 'geometry',
       'area_right', 'operator_right', 'playground_right', 'fee_right',
       'charge_right', 'fence_type_right', 'wall_right', 'is_public'],
      dtype='object')

In [401]:
print(clean_joined_green_space['barrier_right'].unique())
print(clean_joined_green_space['barrier_left'].unique())
print('-'*20)
print(clean_joined_green_space['fence_type_right'].unique())
print(clean_joined_green_space['fence_type_left'].unique())

['fence' None 'wall']
[None 'hedge' nan 'fence']
--------------------
[None]
[None]


In [402]:
# clean_joined_green_space[((clean_joined_green_space['barrier_right'] == 'fence')|(clean_joined_green_space['barrier_right'] == 'wall'))&(clean_joined_green_space['is_public'].isna())].explore()

In [403]:
clean_joined_green_space[(~clean_joined_green_space['barrier_right'].isna())&(clean_joined_green_space['is_public'].isna())].shape[0]

0

In [404]:
clean_joined_green_space.loc[(~clean_joined_green_space['barrier_right'].isna())&(clean_joined_green_space['is_public'].isna()), 'is_public'] = 'no'

In [405]:
clean_joined_green_space[clean_joined_green_space['is_public'].isna()].shape[0]

2099

## 5. Use another leisure and landuse tag

leisure=nature_reserve and landuse=forest --> public

In [406]:
clean_joined_green_space.loc[((clean_joined_green_space['leisure_right']=='nature_reserve')|
                             (clean_joined_green_space['landuse_right']=='forest'))&
                             (clean_joined_green_space['is_public'].isna()), 'is_public'] = 'yes'

## Should I take a look on note/description?

but it will be too specific and not generic for other study areas since it is written in local language

In [407]:
clean_joined_green_space[(clean_joined_green_space['tourism_left'] == 'picnic_site')|
                         (clean_joined_green_space['tourism_left'] == 'camp_site')|
                         (clean_joined_green_space['tourism_right'] == 'picnic_site')|
                         (clean_joined_green_space['tourism_right'] == 'camp_site')].explore()

## Calculate additional information to classify public/private area

1. footpath length
2. presence/number of bench and playground
3. proximity to road?

In [408]:
unlabel_green_space = clean_joined_green_space[clean_joined_green_space['is_public'].isna()]

In [409]:
unlabel_green_space.shape[0]

2099

### Calculate the number of bench, playground, picnic table, and waste basket for each polygon

In [410]:
nodup_green_space = unlabel_green_space.drop_duplicates(subset=['id_right'])

In [411]:
nodup_green_space = nodup_green_space[['id_right', 'geometry', 'area_right', 'description_right', 'landcover_right', 'is_public']]

In [412]:
poi_amenity_greenspace = poi_amenity.sjoin(nodup_green_space, how='inner', predicate='within')
poi_leisure_greenspace = poi_leisure.sjoin(nodup_green_space, how='inner', predicate='within')

In [413]:
print(poi_amenity_greenspace.shape[0], poi_leisure_greenspace.shape[0])

334 64


In [414]:
group_poi_amenity = poi_amenity_greenspace.groupby(by=['id_right', 'amenity']).size().reset_index(name='count')
group_poi_leisure = poi_leisure_greenspace.groupby(by=['id_right', 'leisure']).size().reset_index(name='count')

In [415]:
group_poi_amenity = pd.pivot_table(group_poi_amenity, values='count', index='id_right', columns='amenity', fill_value=0)
group_poi_leisure = pd.pivot_table(group_poi_leisure, values='count', index='id_right', columns='leisure', fill_value=0)

In [416]:
nodup_green_space = nodup_green_space.merge(group_poi_amenity, how='left', left_on='id_right', right_on='id_right')

In [417]:
nodup_green_space = nodup_green_space.merge(group_poi_leisure, how='left', left_on='id_right', right_on='id_right')

In [418]:
nodup_green_space.columns

Index(['id_right', 'geometry', 'area_right', 'description_right',
       'landcover_right', 'is_public', 'bench', 'waste_basket', 'picnic_table',
       'playground'],
      dtype='object')

In [419]:
nodup_green_space[['bench', 'waste_basket', 'picnic_table', 'playground']] = nodup_green_space[['bench', 'waste_basket', 'picnic_table', 'playground']].fillna(0)

In [420]:
nodup_green_space

Unnamed: 0,id_right,geometry,area_right,description_right,landcover_right,is_public,bench,waste_basket,picnic_table,playground
0,238260545,"POLYGON ((410265.983 5656621.792, 410307.939 5...",4710.434439,,,,14.0,7.0,0.0,0.0
1,239351758,"POLYGON ((407999.45 5658340.923, 407999.256 56...",15264.595836,,,,0.0,0.0,0.0,0.0
2,386752288,"POLYGON ((414018.513 5657529.482, 414019.011 5...",862049.377371,,,,19.0,2.0,0.0,1.0
3,578835835,"POLYGON ((412720.216 5655401.923, 412766.163 5...",10209.178293,,,,0.0,0.0,0.0,0.0
4,2238744,"POLYGON ((410559.225 5657411.381, 410556.399 5...",4735.862972,,,,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
2011,790807841,"POLYGON ((411854.949 5658349.665, 411855.349 5...",211.186821,,,,3.0,1.0,0.0,0.0
2012,939736482,"POLYGON ((412741.088 5657358.37, 412845.575 56...",3846.025652,,,,0.0,0.0,0.0,0.0
2013,1106153650,"POLYGON ((412199.125 5655367.91, 412247.524 56...",3355.935764,,,,0.0,0.0,0.0,0.0
2014,1240429634,"POLYGON ((414780.498 5655461.727, 414779.512 5...",4414.140850,,,,0.0,0.0,0.0,1.0


In [421]:
# unlabel_green_space[unlabel_green_space['id_right'] == '238260545']

### Calculate footpath length in each polygon

In [422]:
footpath_network_edges

Unnamed: 0,u,v,key,osmid,oneway,reversed,length,junction,est_width,area,geometry
0,534608,30436919,0,762891172,False,False,12.554889,,,,"LINESTRING (412051.57 5653732.987, 412056.48 5..."
1,534608,371857221,0,496200351,False,False,22.612003,,,,"LINESTRING (412051.57 5653732.987, 412053.287 ..."
2,534608,367835353,0,762891166,False,True,15.503632,,,,"LINESTRING (412051.57 5653732.987, 412052.281 ..."
3,534608,365507697,0,762891171,False,False,16.324191,,,,"LINESTRING (412051.57 5653732.987, 412049.069 ..."
4,534608,371857211,0,762904347,False,True,23.000308,,,,"LINESTRING (412051.57 5653732.987, 412050.061 ..."
...,...,...,...,...,...,...,...,...,...,...,...
71131,12473900290,310293186,0,"[ 761313003, 1348520222 ]",False,"[ false, true ]",27.011475,,,,"LINESTRING (412313.604 5657723.769, 412314.944..."
71132,12479963198,3645255472,0,359896435,False,True,14.155417,,,,"LINESTRING (411113.901 5655919.431, 411126.568..."
71133,12479963198,3645255478,0,1349159900,False,False,19.144944,,,,"LINESTRING (411113.901 5655919.431, 411107.586..."
71134,12479963198,4795788254,0,359896435,False,False,92.410376,,,,"LINESTRING (411113.901 5655919.431, 411050.372..."


In [423]:
footpath_network_edges = footpath_network_edges.to_crs(epsg=32633)

In [424]:
footpath_green_space = nodup_green_space.sjoin(footpath_network_edges, how='left', predicate='intersects')

In [425]:
nodup_green_space.shape[0]

2016

In [426]:
footpath_length_sum = footpath_green_space.groupby(by='id_right')['length'].sum().reset_index()

In [427]:
nodup_green_space = nodup_green_space.merge(footpath_length_sum, how='inner', left_on='id_right', right_on='id_right')

In [428]:
nodup_green_space.head()

Unnamed: 0,id_right,geometry,area_right,description_right,landcover_right,is_public,bench,waste_basket,picnic_table,playground,length
0,238260545,"POLYGON ((410265.983 5656621.792, 410307.939 5...",4710.434439,,,,14.0,7.0,0.0,0.0,653.714744
1,239351758,"POLYGON ((407999.45 5658340.923, 407999.256 56...",15264.595836,,,,0.0,0.0,0.0,0.0,0.0
2,386752288,"POLYGON ((414018.513 5657529.482, 414019.011 5...",862049.377371,,,,19.0,2.0,0.0,1.0,20159.17661
3,578835835,"POLYGON ((412720.216 5655401.923, 412766.163 5...",10209.178293,,,,0.0,0.0,0.0,0.0,0.0
4,2238744,"POLYGON ((410559.225 5657411.381, 410556.399 5...",4735.862972,,,,0.0,0.0,0.0,0.0,0.0


In [429]:
nodup_green_space.columns

Index(['id_right', 'geometry', 'area_right', 'description_right',
       'landcover_right', 'is_public', 'bench', 'waste_basket', 'picnic_table',
       'playground', 'length'],
      dtype='object')

In [430]:
test = nodup_green_space[['id_right', 'area_right', 'bench', 'waste_basket', 'picnic_table','playground', 'length']]

In [431]:
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

In [432]:
#create scaled DataFrame where each variable has mean of 0 and standard dev of 1
scaled_df = StandardScaler().fit_transform(test[['area_right', 'bench', 'waste_basket', 'picnic_table','playground', 'length']])

In [433]:
scaled_df

array([[ 3.42518740e-02,  1.28305405e+01,  1.34509323e+01,
        -3.15126746e-02, -1.67337926e-01,  7.44660349e-01],
       [ 4.40348399e-01, -1.15127646e-01, -7.95799688e-02,
        -3.15126746e-02, -1.67337926e-01, -1.54229675e-01],
       [ 3.30224119e+01,  1.74539933e+01,  3.78628069e+00,
        -3.15126746e-02,  5.27384366e+00,  2.75656327e+01],
       ...,
       [-1.78656921e-02, -1.15127646e-01, -7.95799688e-02,
        -3.15126746e-02, -1.67337926e-01, -1.54229675e-01],
       [ 2.28512717e-02, -1.15127646e-01, -7.95799688e-02,
        -3.15126746e-02,  5.27384366e+00,  6.76222482e-01],
       [-7.07584683e-02,  4.50832525e+00, -7.95799688e-02,
        -3.15126746e-02, -1.67337926e-01,  6.23626628e-01]])

In [434]:
#instantiate the k-means class, using optimal number of clusters
kmeans = KMeans(init="random", n_clusters=2, n_init=10, random_state=1)

#fit k-means algorithm to data
kmeans.fit(scaled_df)

#view cluster assignments for each observation
kmeans.labels_

array([0, 1, 0, ..., 1, 1, 1], dtype=int32)

In [435]:
test['cluster'] = kmeans.labels_

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test['cluster'] = kmeans.labels_


In [436]:
test[test['cluster'] == 0]

Unnamed: 0,id_right,area_right,bench,waste_basket,picnic_table,playground,length,cluster
0,238260545,4710.434439,14.0,7.0,0.0,0.0,653.714744,0
2,386752288,862049.377371,19.0,2.0,0.0,1.0,20159.17661,0
575,24355262,26712.04996,25.0,15.0,0.0,1.0,2455.090927,0
585,28245413,299109.652945,26.0,14.0,0.0,0.0,9635.978899,0


In [437]:
test[test['cluster'] == 1]

Unnamed: 0,id_right,area_right,bench,waste_basket,picnic_table,playground,length,cluster
1,239351758,15264.595836,0.0,0.0,0.0,0.0,0.000000,1
3,578835835,10209.178293,0.0,0.0,0.0,0.0,0.000000,1
4,2238744,4735.862972,0.0,0.0,0.0,0.0,0.000000,1
5,2238759,1875.199942,0.0,0.0,0.0,0.0,64.358431,1
6,3257183,524.415515,0.0,0.0,0.0,0.0,176.753749,1
...,...,...,...,...,...,...,...,...
2011,790807841,211.186821,3.0,1.0,0.0,0.0,0.000000,1
2012,939736482,3846.025652,0.0,0.0,0.0,0.0,0.000000,1
2013,1106153650,3355.935764,0.0,0.0,0.0,0.0,0.000000,1
2014,1240429634,4414.140850,0.0,0.0,0.0,1.0,603.943536,1


In [438]:
test[(test['bench'] == 0)&(test['waste_basket'] == 0)&(test['picnic_table'] == 0)&(test['playground'] == 0)&(test['length'] == 0)]

Unnamed: 0,id_right,area_right,bench,waste_basket,picnic_table,playground,length,cluster
1,239351758,15264.595836,0.0,0.0,0.0,0.0,0.0,1
3,578835835,10209.178293,0.0,0.0,0.0,0.0,0.0,1
4,2238744,4735.862972,0.0,0.0,0.0,0.0,0.0,1
7,13859501,6664.450529,0.0,0.0,0.0,0.0,0.0,1
8,20492088,6730.926696,0.0,0.0,0.0,0.0,0.0,1
...,...,...,...,...,...,...,...,...
2005,1339254067,748.095852,0.0,0.0,0.0,0.0,0.0,1
2007,1342392212,1964.603199,0.0,0.0,0.0,0.0,0.0,1
2008,28912699,1807.557948,0.0,0.0,0.0,0.0,0.0,1
2012,939736482,3846.025652,0.0,0.0,0.0,0.0,0.0,1


In [439]:
nodup_green_space_new = nodup_green_space.merge(test[['id_right', 'cluster']], how='left', left_on='id_right', right_on='id_right')

In [440]:
nodup_green_space_new[(nodup_green_space_new['bench'] == 0)&(nodup_green_space_new['waste_basket'] == 0)&(nodup_green_space_new['picnic_table'] == 0)&(nodup_green_space_new['playground'] == 0)&(nodup_green_space_new['length'] == 0)].explore()

In [441]:
testnew = clean_joined_green_space[clean_joined_green_space['is_public'].isna()].drop_duplicates(subset=['id_right'])

In [442]:
testnew['landcover_right'].unique()

array([None, 'grass'], dtype=object)

In [443]:
testnew['surface_right'].unique()

array([None, 'grass'], dtype=object)

In [446]:
clean_joined_green_space[clean_joined_green_space['is_public'] == 'no'].explore()