# Get the greenspaces from London and set the flags

In [1]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon
from glob2 import glob
import osmnx as ox

In [2]:
green=gpd.read_file('../data/outputs/greenspaces_ldn_final.shp')

In [3]:
green.shape

(32758, 2)

In [4]:
green.head()

Unnamed: 0,FID,geometry
0,0,"POLYGON ((-0.16633 51.21638, -0.16650 51.21593..."
1,1,"POLYGON ((-0.34201 51.21249, -0.34125 51.21270..."
2,2,"POLYGON ((0.00256 51.23937, 0.00299 51.23896, ..."
3,3,"POLYGON ((-0.23174 51.24215, -0.23167 51.24220..."
4,4,"POLYGON ((-0.32467 51.24743, -0.32462 51.24744..."


In [5]:
#get California as gdf to check anomalies later. Points outside England will be excluded
engl=ox.geocode_to_gdf('England')

In [6]:
engl

Unnamed: 0,geometry,bbox_north,bbox_south,bbox_east,bbox_west,place_id,osm_type,osm_id,lat,lon,display_name,class,type,importance
0,"POLYGON ((-6.70475 49.94700, -6.70473 49.94500...",55.917,49.674,2.091912,-6.704749,258567595,relation,58447,52.531021,-1.264906,"England, United Kingdom",boundary,administrative,0.948749


## Load observations (API)

In [7]:
data_folder = '../data/observations_final'
df = pd.concat([pd.read_csv(f).assign(challenge=f.replace('.csv','')) for f in glob(data_folder+'/CNC_London_*.csv')])

In [8]:
df.shape

(13750, 39)

In [9]:
#check for incorrect points (lat)
df['latitude'].min(), df['latitude'].max()

(51.2037968565, 51.798367500299996)

In [10]:
#check for incorrect points (lon). Clearly something wrong!
df['longitude'].min(), df['longitude'].max()

(-0.5176091555, 0.2358256956)

## Get only the points inside CA

In [11]:
df=df[df['latitude'].le(engl['bbox_north'].iloc[0]) \
                  & df['latitude'].ge(engl['bbox_south'].iloc[0]) \
                  & df['longitude'].le(engl['bbox_east'].iloc[0]) \
                  & df['longitude'].ge(engl['bbox_west'].iloc[0])]

In [12]:
df.shape

(13750, 39)

In [13]:
df.head()

Unnamed: 0,id,observed_on_string,time_observed_at,created_time_zone,created_at,updated_at,description,user_id,user_login,quality_grade,...,iconic_taxon_name,taxon_rank,taxon_parent_id,taxon_native,taxon_endemic,taxon_threatened,taxon_search_rank,taxon_observations,identifications,challenge
0,11479007,2018-04-27 7:52:34 am BST,2018-04-27T07:52:34+00:00,Etc/UTC,2018-04-27T07:06:17+00:00,2018-04-27T09:26:56+00:00,,159021,muki,research,...,Plantae,species,204212,True,False,False,27624,27624,"[{'user_id': 353381, 'category': 'improving', ...",../data/observations_final\CNC_London_2018
1,11480212,2018-04-27 8:39:41 am BST,2018-04-27T08:39:41+01:00,Europe/London,2018-04-27T09:02:52+01:00,2019-01-29T21:16:34+00:00,,664459,lucyrobinsonnhm,research,...,Aves,variety,3017,False,False,False,43329,43329,"[{'user_id': 29729, 'category': 'supporting', ...",../data/observations_final\CNC_London_2018
2,11480902,Fri Apr 27 2018 08:34:31 GMT+0100 (GMT+1),2018-04-27T08:34:31+02:00,Europe/Amsterdam,2018-04-27T10:42:01+02:00,2018-04-27T13:01:49+02:00,,908315,tess26,casual,...,Plantae,species,56095,False,False,False,2813,2813,"[{'user_id': 908315, 'category': 'leading', 'd...",../data/observations_final\CNC_London_2018
3,11481097,Fri Apr 27 2018 09:54:35 GMT+0100 (GMT+1),2018-04-27T09:54:35+02:00,Europe/Amsterdam,2018-04-27T10:54:45+02:00,2018-04-27T22:23:41+02:00,,796473,bryonycross,research,...,Plantae,species,156638,False,False,False,4639,4639,"[{'user_id': 160, 'category': 'improving', 'di...",../data/observations_final\CNC_London_2018
4,11482247,2018-04-27 8:42:21 am BST,2018-04-27T08:42:21+01:00,Europe/London,2018-04-27T11:09:55+01:00,2018-04-27T16:37:31+01:00,,664459,lucyrobinsonnhm,research,...,Plantae,hybrid,49664,False,False,False,5866,5866,"[{'user_id': 160, 'category': 'supporting', 'd...",../data/observations_final\CNC_London_2018


## Create gdf using the observations

In [14]:
df['longitude'].isna().sum(), df['latitude'].isna().sum()

(0, 0)

In [15]:
#create the points inverting the coordinates
geometry_invert = [Point(xy) for xy in zip(df.longitude, df.latitude)]

In [16]:
#crete a geodataframe with the observations
observations_gdf = gpd.GeoDataFrame(df, crs='epsg:4326', geometry=geometry_invert)

In [17]:
observations_gdf.shape

(13750, 40)

In [18]:
observations_gdf.head()

Unnamed: 0,id,observed_on_string,time_observed_at,created_time_zone,created_at,updated_at,description,user_id,user_login,quality_grade,...,taxon_rank,taxon_parent_id,taxon_native,taxon_endemic,taxon_threatened,taxon_search_rank,taxon_observations,identifications,challenge,geometry
0,11479007,2018-04-27 7:52:34 am BST,2018-04-27T07:52:34+00:00,Etc/UTC,2018-04-27T07:06:17+00:00,2018-04-27T09:26:56+00:00,,159021,muki,research,...,species,204212,True,False,False,27624,27624,"[{'user_id': 353381, 'category': 'improving', ...",../data/observations_final\CNC_London_2018,POINT (-0.22849 51.55064)
1,11480212,2018-04-27 8:39:41 am BST,2018-04-27T08:39:41+01:00,Europe/London,2018-04-27T09:02:52+01:00,2019-01-29T21:16:34+00:00,,664459,lucyrobinsonnhm,research,...,variety,3017,False,False,False,43329,43329,"[{'user_id': 29729, 'category': 'supporting', ...",../data/observations_final\CNC_London_2018,POINT (-0.17361 51.49528)
2,11480902,Fri Apr 27 2018 08:34:31 GMT+0100 (GMT+1),2018-04-27T08:34:31+02:00,Europe/Amsterdam,2018-04-27T10:42:01+02:00,2018-04-27T13:01:49+02:00,,908315,tess26,casual,...,species,56095,False,False,False,2813,2813,"[{'user_id': 908315, 'category': 'leading', 'd...",../data/observations_final\CNC_London_2018,POINT (-0.16910 51.47298)
3,11481097,Fri Apr 27 2018 09:54:35 GMT+0100 (GMT+1),2018-04-27T09:54:35+02:00,Europe/Amsterdam,2018-04-27T10:54:45+02:00,2018-04-27T22:23:41+02:00,,796473,bryonycross,research,...,species,156638,False,False,False,4639,4639,"[{'user_id': 160, 'category': 'improving', 'di...",../data/observations_final\CNC_London_2018,POINT (-0.16152 51.50808)
4,11482247,2018-04-27 8:42:21 am BST,2018-04-27T08:42:21+01:00,Europe/London,2018-04-27T11:09:55+01:00,2018-04-27T16:37:31+01:00,,664459,lucyrobinsonnhm,research,...,hybrid,49664,False,False,False,5866,5866,"[{'user_id': 160, 'category': 'supporting', 'd...",../data/observations_final\CNC_London_2018,POINT (-0.17389 51.49611)


## Spatial join

In [19]:
# spatial join observations and observations.
points_polys = gpd.sjoin(observations_gdf, green, how='inner')

In [20]:
points_polys.head()

Unnamed: 0,id,observed_on_string,time_observed_at,created_time_zone,created_at,updated_at,description,user_id,user_login,quality_grade,...,taxon_native,taxon_endemic,taxon_threatened,taxon_search_rank,taxon_observations,identifications,challenge,geometry,index_right,FID
3,11481097,Fri Apr 27 2018 09:54:35 GMT+0100 (GMT+1),2018-04-27T09:54:35+02:00,Europe/Amsterdam,2018-04-27T10:54:45+02:00,2018-04-27T22:23:41+02:00,,796473,bryonycross,research,...,False,False,False,4639,4639,"[{'user_id': 160, 'category': 'improving', 'di...",../data/observations_final\CNC_London_2018,POINT (-0.16152 51.50808),32756,32756
5,11482706,Fri Apr 27 2018 11:40:06 GMT+0100 (GMT+1),2018-04-27T11:40:06+02:00,Europe/Amsterdam,2018-04-27T12:40:31+02:00,2018-04-27T19:18:08+02:00,,796473,bryonycross,casual,...,False,False,False,6135,6135,"[{'user_id': 796473, 'category': 'leading', 'd...",../data/observations_final\CNC_London_2018,POINT (-0.16758 51.51051),32756,32756
6,11483083,Fri Apr 27 2018 12:01:15 GMT+0100 (GMT+1),2018-04-27T12:01:15+02:00,Europe/Amsterdam,2018-04-27T13:01:48+02:00,2018-04-27T19:18:09+02:00,,796473,bryonycross,needs_id,...,False,False,False,42225,42225,"[{'user_id': 796473, 'category': 'leading', 'd...",../data/observations_final\CNC_London_2018,POINT (-0.16793 51.51077),32756,32756
10,11484564,Fri Apr 27 2018 12:58:17 GMT+0100 (GMT+1),2018-04-27T12:58:17+02:00,Europe/Amsterdam,2018-04-27T13:58:34+02:00,2018-04-27T19:18:10+02:00,"Large tree, medium sized leaves that are mostl...",796473,bryonycross,research,...,False,False,False,28819,28819,"[{'user_id': 796473, 'category': 'improving', ...",../data/observations_final\CNC_London_2018,POINT (-0.16176 51.50792),32756,32756
11,11484587,Fri Apr 27 2018 12:58:55 GMT+0100 (GMT+1),2018-04-27T12:58:55+02:00,Europe/Amsterdam,2018-04-27T13:59:13+02:00,2018-04-27T19:18:11+02:00,"Large tree, medium sized leaves that are mostl...",796473,bryonycross,needs_id,...,False,False,False,4080,4080,"[{'user_id': 796473, 'category': 'leading', 'd...",../data/observations_final\CNC_London_2018,POINT (-0.16168 51.50783),32756,32756


In [21]:
points_polys.shape

(8356, 42)

In [22]:
points_polys.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

## Set flag for greenspaces

In [23]:
ids_with_greenspace = points_polys.id.to_list()

In [24]:
new_df = df.set_index('id')
new_df.head()

Unnamed: 0_level_0,observed_on_string,time_observed_at,created_time_zone,created_at,updated_at,description,user_id,user_login,quality_grade,reviewed_by,...,taxon_rank,taxon_parent_id,taxon_native,taxon_endemic,taxon_threatened,taxon_search_rank,taxon_observations,identifications,challenge,geometry
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
11479007,2018-04-27 7:52:34 am BST,2018-04-27T07:52:34+00:00,Etc/UTC,2018-04-27T07:06:17+00:00,2018-04-27T09:26:56+00:00,,159021,muki,research,"[160, 353381, 599476, 664459, 743476]",...,species,204212,True,False,False,27624,27624,"[{'user_id': 353381, 'category': 'improving', ...",../data/observations_final\CNC_London_2018,POINT (-0.22849 51.55064)
11480212,2018-04-27 8:39:41 am BST,2018-04-27T08:39:41+01:00,Europe/London,2018-04-27T09:02:52+01:00,2019-01-29T21:16:34+00:00,,664459,lucyrobinsonnhm,research,"[29729, 42615, 599476, 664459, 790453, 1082970]",...,variety,3017,False,False,False,43329,43329,"[{'user_id': 29729, 'category': 'supporting', ...",../data/observations_final\CNC_London_2018,POINT (-0.17361 51.49528)
11480902,Fri Apr 27 2018 08:34:31 GMT+0100 (GMT+1),2018-04-27T08:34:31+02:00,Europe/Amsterdam,2018-04-27T10:42:01+02:00,2018-04-27T13:01:49+02:00,,908315,tess26,casual,[908315],...,species,56095,False,False,False,2813,2813,"[{'user_id': 908315, 'category': 'leading', 'd...",../data/observations_final\CNC_London_2018,POINT (-0.16910 51.47298)
11481097,Fri Apr 27 2018 09:54:35 GMT+0100 (GMT+1),2018-04-27T09:54:35+02:00,Europe/Amsterdam,2018-04-27T10:54:45+02:00,2018-04-27T22:23:41+02:00,,796473,bryonycross,research,"[160, 353381, 599476, 796473]",...,species,156638,False,False,False,4639,4639,"[{'user_id': 160, 'category': 'improving', 'di...",../data/observations_final\CNC_London_2018,POINT (-0.16152 51.50808)
11482247,2018-04-27 8:42:21 am BST,2018-04-27T08:42:21+01:00,Europe/London,2018-04-27T11:09:55+01:00,2018-04-27T16:37:31+01:00,,664459,lucyrobinsonnhm,research,"[160, 353381, 599476, 664459]",...,hybrid,49664,False,False,False,5866,5866,"[{'user_id': 160, 'category': 'supporting', 'd...",../data/observations_final\CNC_London_2018,POINT (-0.17389 51.49611)


In [25]:
new_df['greenspace_flag'] = '0'
new_df.loc[ids_with_greenspace,'greenspace_flag'] = '1'

In [26]:
new_df.greenspace_flag.value_counts()

1    8356
0    5394
Name: greenspace_flag, dtype: int64

In [27]:
#export as csv
new_df.to_csv('../data/outputs/ldn_data_with_greenspace_flag_v2.csv')