In [1]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon
from glob2 import glob
import osmnx as ox

## Load data using OSMnx 
https://osmnx.readthedocs.io/en/stable/osmnx.html#module-osmnx.geometries

In [2]:
green=ox.geometries.geometries_from_bbox(51.691874,51.28676,0.334016,-0.510375,
                                         {'leisure':'park', 'landuse':'forest'})

In [3]:
#keep only polygons

#exclude Point
green=green[green['geometry'].geom_type.ne('Point')]
#exclude LineString
green=green[green['geometry'].geom_type.ne('LineString')]

In [4]:
green.head()

Unnamed: 0,unique_id,osmid,element_type,highway,geometry,created_by,source,barrier,name,access,...,multipolygon,contact:email,start_date:note,disused,previous_id,access_land,leisure_1,architect,previously,source:geometry
11,way/3184264,3184264,way,,"POLYGON ((-0.33801 51.29715, -0.33716 51.29791...",,,,,,...,,,,,,,,,,
12,way/3385169,3385169,way,,"POLYGON ((-0.41102 51.44907, -0.41079 51.44955...",,,,Glebelands Playing Fields Park,,...,,,,,,,,,,
13,way/3401837,3401837,way,,"POLYGON ((-0.38753 51.42046, -0.38732 51.42062...",,,,Hatherop Park,,...,,,,,,,,,,
14,way/3430579,3430579,way,,"POLYGON ((-0.36641 51.42020, -0.36736 51.41948...",,,,Carlisle Park,,...,,,,,,,,,,
15,way/3541791,3541791,way,,"POLYGON ((-0.34399 51.40805, -0.34262 51.40746...",,,,Hampton Court Green,,...,,,,,,,,,,


In [5]:
green.shape

(5446, 171)

In [6]:
green.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

## Exclude the overlapping geometries 

In [7]:
geom = green.geometry.unary_union

In [8]:
green_unique = gpd.GeoDataFrame(geometry=[geom],crs="epsg:4326")

In [9]:
green_unique.head()

Unnamed: 0,geometry
0,"MULTIPOLYGON (((-0.24209 51.45606, -0.24205 51..."


In [10]:
green_unique.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [11]:
green_unique = green_unique.explode().reset_index(drop=True)

In [12]:
green_unique.head()

Unnamed: 0,geometry
0,"POLYGON ((-0.24209 51.45606, -0.24205 51.45651..."
1,"POLYGON ((-0.03133 51.43362, -0.03034 51.43356..."
2,"POLYGON ((0.06302 51.43869, 0.06307 51.43878, ..."
3,"POLYGON ((-0.09444 51.28794, -0.09155 51.28767..."
4,"POLYGON ((0.20140 51.28944, 0.20134 51.28899, ..."


In [13]:
green_unique.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [14]:
green_unique.shape

(4838, 1)

## Load observations

In [15]:
data_folder = '../data/data_muki/data'
london_df = pd.concat([pd.read_csv(f).assign(challenge=f.replace('.csv','')) for f in glob(data_folder+'/CNC London*.csv')])

In [16]:
london_df.shape

(13730, 35)

In [17]:
london_df.head()

Unnamed: 0,id,observed_on_string,observed_on,time_observed_at,time_zone,user_id,user_login,created_at,updated_at,quality_grade,...,taxon_geoprivacy,coordinates_obscured,positioning_method,positioning_device,species_guess,scientific_name,common_name,iconic_taxon_name,taxon_id,challenge
0,11479007,2018-04-27 7:52:34 am BST,2018-04-27,2018-04-27 07:52:34 UTC,UTC,159021,muki,2018-04-27 07:06:17 UTC,2018-04-27 09:26:56 UTC,research,...,,False,gps,gps,lesser celandine,Ficaria verna,lesser celandine,Plantae,204321.0,../data/data_muki/data\CNC London 2018
1,11480212,2018-04-27 8:39:41 am BST,2018-04-27,2018-04-27 07:39:41 UTC,London,664459,lucyrobinsonnhm,2018-04-27 08:02:52 UTC,2019-01-29 21:16:34 UTC,research,...,open,False,gps,gps,Pigeon biset domestique,Columba livia domestica,Feral Pigeon,Aves,122767.0,../data/data_muki/data\CNC London 2018
2,11480902,Fri Apr 27 2018 08:34:31 GMT+0100 (GMT+1),2018-04-27,2018-04-27 06:34:31 UTC,Amsterdam,908315,tess26,2018-04-27 08:42:01 UTC,2018-04-27 11:01:49 UTC,casual,...,,False,,,,Hyacinthoides hispanica,Spanish Bluebell,Plantae,57635.0,../data/data_muki/data\CNC London 2018
3,11481097,Fri Apr 27 2018 09:54:35 GMT+0100 (GMT+1),2018-04-27,2018-04-27 07:54:35 UTC,Amsterdam,796473,bryonycross,2018-04-27 08:54:45 UTC,2018-04-27 20:23:41 UTC,research,...,,False,,,Green Alkanet,Pentaglottis sempervirens,Green Alkanet,Plantae,166466.0,../data/data_muki/data\CNC London 2018
4,11482247,2018-04-27 8:42:21 am BST,2018-04-27,2018-04-27 07:42:21 UTC,London,664459,lucyrobinsonnhm,2018-04-27 10:09:55 UTC,2018-04-27 15:37:31 UTC,research,...,,False,gps,gps,London Plane,Platanus × hispanica,London Plane,Plantae,552449.0,../data/data_muki/data\CNC London 2018


In [18]:
london_df['latitude'], london_df['longitude']

(0       51.550636
 1       51.495277
 2       51.472981
 3       51.508076
 4       51.496113
           ...    
 5756    51.374607
 5757    51.540178
 5758    51.564122
 5759    51.503484
 5760    51.411946
 Name: latitude, Length: 13730, dtype: float64, 0      -0.228487
 1      -0.173611
 2      -0.169102
 3      -0.161516
 4      -0.173889
           ...   
 5756   -0.171098
 5757   -0.329124
 5758   -0.334986
 5759   -0.346972
 5760   -0.288115
 Name: longitude, Length: 13730, dtype: float64)

In [19]:
#create the points inverting the coordinates
geometry_invert = [Point(xy) for xy in zip(london_df.longitude, london_df.latitude)]

In [20]:
#crete a geodataframe with the observations
observations_gdf = gpd.GeoDataFrame(london_df, crs="epsg:4326", geometry=geometry_invert)

In [21]:
observations_gdf.shape

(13730, 36)

In [22]:
observations_gdf.head()

Unnamed: 0,id,observed_on_string,observed_on,time_observed_at,time_zone,user_id,user_login,created_at,updated_at,quality_grade,...,coordinates_obscured,positioning_method,positioning_device,species_guess,scientific_name,common_name,iconic_taxon_name,taxon_id,challenge,geometry
0,11479007,2018-04-27 7:52:34 am BST,2018-04-27,2018-04-27 07:52:34 UTC,UTC,159021,muki,2018-04-27 07:06:17 UTC,2018-04-27 09:26:56 UTC,research,...,False,gps,gps,lesser celandine,Ficaria verna,lesser celandine,Plantae,204321.0,../data/data_muki/data\CNC London 2018,POINT (-0.22849 51.55064)
1,11480212,2018-04-27 8:39:41 am BST,2018-04-27,2018-04-27 07:39:41 UTC,London,664459,lucyrobinsonnhm,2018-04-27 08:02:52 UTC,2019-01-29 21:16:34 UTC,research,...,False,gps,gps,Pigeon biset domestique,Columba livia domestica,Feral Pigeon,Aves,122767.0,../data/data_muki/data\CNC London 2018,POINT (-0.17361 51.49528)
2,11480902,Fri Apr 27 2018 08:34:31 GMT+0100 (GMT+1),2018-04-27,2018-04-27 06:34:31 UTC,Amsterdam,908315,tess26,2018-04-27 08:42:01 UTC,2018-04-27 11:01:49 UTC,casual,...,False,,,,Hyacinthoides hispanica,Spanish Bluebell,Plantae,57635.0,../data/data_muki/data\CNC London 2018,POINT (-0.16910 51.47298)
3,11481097,Fri Apr 27 2018 09:54:35 GMT+0100 (GMT+1),2018-04-27,2018-04-27 07:54:35 UTC,Amsterdam,796473,bryonycross,2018-04-27 08:54:45 UTC,2018-04-27 20:23:41 UTC,research,...,False,,,Green Alkanet,Pentaglottis sempervirens,Green Alkanet,Plantae,166466.0,../data/data_muki/data\CNC London 2018,POINT (-0.16152 51.50808)
4,11482247,2018-04-27 8:42:21 am BST,2018-04-27,2018-04-27 07:42:21 UTC,London,664459,lucyrobinsonnhm,2018-04-27 10:09:55 UTC,2018-04-27 15:37:31 UTC,research,...,False,gps,gps,London Plane,Platanus × hispanica,London Plane,Plantae,552449.0,../data/data_muki/data\CNC London 2018,POINT (-0.17389 51.49611)


In [23]:
observations_gdf['longitude'].isna().sum()

0

## Spatial join

In [24]:
# spatial join observations and observations.
points_polys = gpd.sjoin(observations_gdf, green_unique, how='inner')

In [25]:
points_polys.head()

Unnamed: 0,id,observed_on_string,observed_on,time_observed_at,time_zone,user_id,user_login,created_at,updated_at,quality_grade,...,positioning_method,positioning_device,species_guess,scientific_name,common_name,iconic_taxon_name,taxon_id,challenge,geometry,index_right
3,11481097,Fri Apr 27 2018 09:54:35 GMT+0100 (GMT+1),2018-04-27,2018-04-27 07:54:35 UTC,Amsterdam,796473,bryonycross,2018-04-27 08:54:45 UTC,2018-04-27 20:23:41 UTC,research,...,,,Green Alkanet,Pentaglottis sempervirens,Green Alkanet,Plantae,166466.0,../data/data_muki/data\CNC London 2018,POINT (-0.16152 51.50808),2551
5,11482706,Fri Apr 27 2018 11:40:06 GMT+0100 (GMT+1),2018-04-27,2018-04-27 09:40:06 UTC,Amsterdam,796473,bryonycross,2018-04-27 10:40:31 UTC,2018-04-27 17:18:08 UTC,casual,...,,,,Quercus stellata,post oak,Plantae,119269.0,../data/data_muki/data\CNC London 2018,POINT (-0.16758 51.51051),2551
6,11483083,Fri Apr 27 2018 12:01:15 GMT+0100 (GMT+1),2018-04-27,2018-04-27 10:01:15 UTC,Amsterdam,796473,bryonycross,2018-04-27 11:01:48 UTC,2018-04-27 17:18:09 UTC,needs_id,...,,,,Oligochaeta,Earthworms and allies,Animalia,333586.0,../data/data_muki/data\CNC London 2018,POINT (-0.16793 51.51077),2551
10,11484564,Fri Apr 27 2018 12:58:17 GMT+0100 (GMT+1),2018-04-27,2018-04-27 10:58:17 UTC,Amsterdam,796473,bryonycross,2018-04-27 11:58:34 UTC,2018-04-27 17:18:10 UTC,research,...,,,Garden Snail,Cornu aspersum,Garden Snail,Mollusca,480298.0,../data/data_muki/data\CNC London 2018,POINT (-0.16176 51.50792),2551
11,11484587,Fri Apr 27 2018 12:58:55 GMT+0100 (GMT+1),2018-04-27,2018-04-27 10:58:55 UTC,Amsterdam,796473,bryonycross,2018-04-27 11:59:13 UTC,2018-04-27 17:18:11 UTC,needs_id,...,,,,Arion rufus,Chocolate Arion,Mollusca,84755.0,../data/data_muki/data\CNC London 2018,POINT (-0.16168 51.50783),2551


In [26]:
points_polys.shape

(5847, 37)

In [27]:
points_polys.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

## Set flag for greenspaces

In [28]:
ids_with_greenspace = points_polys.id.to_list()

In [29]:
new_london = london_df.set_index('id')
new_london.head()

Unnamed: 0_level_0,observed_on_string,observed_on,time_observed_at,time_zone,user_id,user_login,created_at,updated_at,quality_grade,license,...,coordinates_obscured,positioning_method,positioning_device,species_guess,scientific_name,common_name,iconic_taxon_name,taxon_id,challenge,geometry
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
11479007,2018-04-27 7:52:34 am BST,2018-04-27,2018-04-27 07:52:34 UTC,UTC,159021,muki,2018-04-27 07:06:17 UTC,2018-04-27 09:26:56 UTC,research,CC-BY,...,False,gps,gps,lesser celandine,Ficaria verna,lesser celandine,Plantae,204321.0,../data/data_muki/data\CNC London 2018,POINT (-0.22849 51.55064)
11480212,2018-04-27 8:39:41 am BST,2018-04-27,2018-04-27 07:39:41 UTC,London,664459,lucyrobinsonnhm,2018-04-27 08:02:52 UTC,2019-01-29 21:16:34 UTC,research,CC0,...,False,gps,gps,Pigeon biset domestique,Columba livia domestica,Feral Pigeon,Aves,122767.0,../data/data_muki/data\CNC London 2018,POINT (-0.17361 51.49528)
11480902,Fri Apr 27 2018 08:34:31 GMT+0100 (GMT+1),2018-04-27,2018-04-27 06:34:31 UTC,Amsterdam,908315,tess26,2018-04-27 08:42:01 UTC,2018-04-27 11:01:49 UTC,casual,,...,False,,,,Hyacinthoides hispanica,Spanish Bluebell,Plantae,57635.0,../data/data_muki/data\CNC London 2018,POINT (-0.16910 51.47298)
11481097,Fri Apr 27 2018 09:54:35 GMT+0100 (GMT+1),2018-04-27,2018-04-27 07:54:35 UTC,Amsterdam,796473,bryonycross,2018-04-27 08:54:45 UTC,2018-04-27 20:23:41 UTC,research,CC-BY-NC,...,False,,,Green Alkanet,Pentaglottis sempervirens,Green Alkanet,Plantae,166466.0,../data/data_muki/data\CNC London 2018,POINT (-0.16152 51.50808)
11482247,2018-04-27 8:42:21 am BST,2018-04-27,2018-04-27 07:42:21 UTC,London,664459,lucyrobinsonnhm,2018-04-27 10:09:55 UTC,2018-04-27 15:37:31 UTC,research,CC0,...,False,gps,gps,London Plane,Platanus × hispanica,London Plane,Plantae,552449.0,../data/data_muki/data\CNC London 2018,POINT (-0.17389 51.49611)


In [30]:
new_london['greenspace_flag'] = '0'
new_london.loc[ids_with_greenspace,'greenspace_flag'] = '1'

In [31]:
new_london.greenspace_flag.value_counts()

0    7883
1    5847
Name: greenspace_flag, dtype: int64

In [32]:
new_london.head()

Unnamed: 0_level_0,observed_on_string,observed_on,time_observed_at,time_zone,user_id,user_login,created_at,updated_at,quality_grade,license,...,positioning_method,positioning_device,species_guess,scientific_name,common_name,iconic_taxon_name,taxon_id,challenge,geometry,greenspace_flag
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
11479007,2018-04-27 7:52:34 am BST,2018-04-27,2018-04-27 07:52:34 UTC,UTC,159021,muki,2018-04-27 07:06:17 UTC,2018-04-27 09:26:56 UTC,research,CC-BY,...,gps,gps,lesser celandine,Ficaria verna,lesser celandine,Plantae,204321.0,../data/data_muki/data\CNC London 2018,POINT (-0.22849 51.55064),0
11480212,2018-04-27 8:39:41 am BST,2018-04-27,2018-04-27 07:39:41 UTC,London,664459,lucyrobinsonnhm,2018-04-27 08:02:52 UTC,2019-01-29 21:16:34 UTC,research,CC0,...,gps,gps,Pigeon biset domestique,Columba livia domestica,Feral Pigeon,Aves,122767.0,../data/data_muki/data\CNC London 2018,POINT (-0.17361 51.49528),0
11480902,Fri Apr 27 2018 08:34:31 GMT+0100 (GMT+1),2018-04-27,2018-04-27 06:34:31 UTC,Amsterdam,908315,tess26,2018-04-27 08:42:01 UTC,2018-04-27 11:01:49 UTC,casual,,...,,,,Hyacinthoides hispanica,Spanish Bluebell,Plantae,57635.0,../data/data_muki/data\CNC London 2018,POINT (-0.16910 51.47298),0
11481097,Fri Apr 27 2018 09:54:35 GMT+0100 (GMT+1),2018-04-27,2018-04-27 07:54:35 UTC,Amsterdam,796473,bryonycross,2018-04-27 08:54:45 UTC,2018-04-27 20:23:41 UTC,research,CC-BY-NC,...,,,Green Alkanet,Pentaglottis sempervirens,Green Alkanet,Plantae,166466.0,../data/data_muki/data\CNC London 2018,POINT (-0.16152 51.50808),1
11482247,2018-04-27 8:42:21 am BST,2018-04-27,2018-04-27 07:42:21 UTC,London,664459,lucyrobinsonnhm,2018-04-27 10:09:55 UTC,2018-04-27 15:37:31 UTC,research,CC0,...,gps,gps,London Plane,Platanus × hispanica,London Plane,Plantae,552449.0,../data/data_muki/data\CNC London 2018,POINT (-0.17389 51.49611),0


In [33]:
#export as csv
new_london.to_csv('../data/outputs/london_data_with_greenspace_flag_v2.csv')

In [34]:
#compare with Florence's csv export
new_london_v0=pd.read_csv('../data/outputs/london_data_with_greenspace_flag_v1.csv')

In [35]:
new_london_v0.shape, new_london.shape

((13730, 38), (13730, 36))

In [36]:
#no columns E and N in the new df
new_london_v0.columns, new_london.columns

(Index(['id', 'observed_on_string', 'observed_on', 'time_observed_at',
        'time_zone', 'user_id', 'user_login', 'created_at', 'updated_at',
        'quality_grade', 'license', 'url', 'image_url', 'sound_url', 'tag_list',
        'description', 'num_identification_agreements',
        'num_identification_disagreements', 'captive_cultivated',
        'oauth_application_id', 'place_guess', 'latitude', 'longitude',
        'positional_accuracy', 'geoprivacy', 'taxon_geoprivacy',
        'coordinates_obscured', 'positioning_method', 'positioning_device',
        'species_guess', 'scientific_name', 'common_name', 'iconic_taxon_name',
        'taxon_id', 'challenge', 'E', 'N', 'greenspace_flag'],
       dtype='object'),
 Index(['observed_on_string', 'observed_on', 'time_observed_at', 'time_zone',
        'user_id', 'user_login', 'created_at', 'updated_at', 'quality_grade',
        'license', 'url', 'image_url', 'sound_url', 'tag_list', 'description',
        'num_identification_agreement

In [37]:
new_london_v0.greenspace_flag.value_counts()

0    8235
1    5495
Name: greenspace_flag, dtype: int64

In [38]:
new_london.greenspace_flag.value_counts()

0    7883
1    5847
Name: greenspace_flag, dtype: int64

In [39]:
5847-5495

352

### 352 more observations (n=13730) with greenspace_flag = 1  