In [17]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon
from shapely.affinity import translate
from glob2 import glob

## Load data

In [3]:
#load geojson
park_forest_overpass=gpd.read_file('../data/geojson/london_park_forest.json', drive='GeoJSON')

In [4]:
#the geojson can be called using the code below 
#https://overpass-turbo.eu/
'''

/*
This is an example Overpass query.
Try it out by pressing the Run button above!
You can find more examples with the Load tool.
*/
(
nwr[leisure=park](51.251601468176545,-0.720977783203125,51.72702815704774,0.545196533203125);
nwr[landuse=forest](51.251601468176545,-0.720977783203125,51.72702815704774,0.545196533203125);
);
out geom;

'''
''''''

''

In [5]:
#keep only polygons

#exclude Point
park_forest_overpass=park_forest_overpass[park_forest_overpass['geometry'].geom_type.ne('Point')]
#exclude LineString
park_forest_overpass=park_forest_overpass[park_forest_overpass['geometry'].geom_type.ne('LineString')]

In [6]:
park_forest_overpass.head()

Unnamed: 0,id,@id,landuse,name,type,leisure,wikidata,wikipedia,source,source:name,...,multipolygon,parking,note:history,construction,not:designation,disused:leisure,garden:type,bench,species:en,geometry
0,relation/12098,relation/12098,forest,Newmeadow Plantation,multipolygon,,,,,,...,,,,,,,,,,"POLYGON ((-0.60303 51.45948, -0.60368 51.45910..."
1,relation/18721,relation/18721,,Horndon,multipolygon,park,,,,,...,,,,,,,,,,"POLYGON ((0.39837 51.52229, 0.39910 51.52251, ..."
2,relation/28717,relation/28717,forest,,multipolygon,,,,,,...,,,,,,,,,,"POLYGON ((-0.19293 51.59804, -0.19296 51.59791..."
3,relation/35150,relation/35150,grass,Raphael Park,multipolygon,park,Q7294001,en:Raphael Park,,,...,,,,,,,,,,"POLYGON ((0.18899 51.59232, 0.18928 51.59262, ..."
4,relation/70348,relation/70348,,Tanner Street Park,multipolygon,park,,,,,...,,,,,,,,,,"POLYGON ((-0.07945 51.49964, -0.07934 51.50012..."


In [7]:
park_forest_overpass.shape

(6740, 180)

In [8]:
park_forest_overpass.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

## Exclude the overlapping geometries 

In [9]:
geom = park_forest_overpass.geometry.unary_union

In [10]:
park_forest_unique = gpd.GeoDataFrame(geometry=[geom],crs="epsg:4326")

In [11]:
park_forest_unique.head()

Unnamed: 0,geometry
0,"MULTIPOLYGON (((0.06496 51.43832, 0.06488 51.4..."


In [12]:
park_forest_unique.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [13]:
park_forest_unique = park_forest_unique.explode().reset_index(drop=True)

In [14]:
park_forest_unique.head()

Unnamed: 0,geometry
0,"POLYGON ((0.06496 51.43832, 0.06488 51.43848, ..."
1,"POLYGON ((-0.66613 51.42098, -0.66546 51.42124..."
2,"POLYGON ((-0.03131 51.43360, -0.03027 51.43355..."
3,"POLYGON ((-0.24209 51.45606, -0.24205 51.45651..."
4,"POLYGON ((-0.51873 51.48176, -0.51901 51.48187..."


In [15]:
park_forest_unique.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [16]:
park_forest_unique.shape

(5966, 1)

## Load observations

In [18]:
data_folder = '../data/data_muki/data'
london_df = pd.concat([pd.read_csv(f).assign(challenge=f.replace('.csv','')) for f in glob(data_folder+'/CNC London*.csv')])

In [19]:
london_df.shape

(13730, 35)

In [20]:
london_df.head()

Unnamed: 0,id,observed_on_string,observed_on,time_observed_at,time_zone,user_id,user_login,created_at,updated_at,quality_grade,...,taxon_geoprivacy,coordinates_obscured,positioning_method,positioning_device,species_guess,scientific_name,common_name,iconic_taxon_name,taxon_id,challenge
0,11479007,2018-04-27 7:52:34 am BST,2018-04-27,2018-04-27 07:52:34 UTC,UTC,159021,muki,2018-04-27 07:06:17 UTC,2018-04-27 09:26:56 UTC,research,...,,False,gps,gps,lesser celandine,Ficaria verna,lesser celandine,Plantae,204321.0,../data/data_muki/data\CNC London 2018
1,11480212,2018-04-27 8:39:41 am BST,2018-04-27,2018-04-27 07:39:41 UTC,London,664459,lucyrobinsonnhm,2018-04-27 08:02:52 UTC,2019-01-29 21:16:34 UTC,research,...,open,False,gps,gps,Pigeon biset domestique,Columba livia domestica,Feral Pigeon,Aves,122767.0,../data/data_muki/data\CNC London 2018
2,11480902,Fri Apr 27 2018 08:34:31 GMT+0100 (GMT+1),2018-04-27,2018-04-27 06:34:31 UTC,Amsterdam,908315,tess26,2018-04-27 08:42:01 UTC,2018-04-27 11:01:49 UTC,casual,...,,False,,,,Hyacinthoides hispanica,Spanish Bluebell,Plantae,57635.0,../data/data_muki/data\CNC London 2018
3,11481097,Fri Apr 27 2018 09:54:35 GMT+0100 (GMT+1),2018-04-27,2018-04-27 07:54:35 UTC,Amsterdam,796473,bryonycross,2018-04-27 08:54:45 UTC,2018-04-27 20:23:41 UTC,research,...,,False,,,Green Alkanet,Pentaglottis sempervirens,Green Alkanet,Plantae,166466.0,../data/data_muki/data\CNC London 2018
4,11482247,2018-04-27 8:42:21 am BST,2018-04-27,2018-04-27 07:42:21 UTC,London,664459,lucyrobinsonnhm,2018-04-27 10:09:55 UTC,2018-04-27 15:37:31 UTC,research,...,,False,gps,gps,London Plane,Platanus × hispanica,London Plane,Plantae,552449.0,../data/data_muki/data\CNC London 2018


In [21]:
london_df['latitude'], london_df['longitude']

(0       51.550636
 1       51.495277
 2       51.472981
 3       51.508076
 4       51.496113
           ...    
 5756    51.374607
 5757    51.540178
 5758    51.564122
 5759    51.503484
 5760    51.411946
 Name: latitude, Length: 13730, dtype: float64, 0      -0.228487
 1      -0.173611
 2      -0.169102
 3      -0.161516
 4      -0.173889
           ...   
 5756   -0.171098
 5757   -0.329124
 5758   -0.334986
 5759   -0.346972
 5760   -0.288115
 Name: longitude, Length: 13730, dtype: float64)

In [22]:
#create the points inverting the coordinates
geometry_invert = [Point(xy) for xy in zip(london_df.longitude, london_df.latitude)]

In [24]:
#crete a geodataframe with the observations
observations_gdf = gpd.GeoDataFrame(london_df, crs="epsg:4326", geometry=geometry_invert)

In [30]:
observations_gdf.shape

(13730, 36)

In [25]:
observations_gdf.head()

Unnamed: 0,id,observed_on_string,observed_on,time_observed_at,time_zone,user_id,user_login,created_at,updated_at,quality_grade,...,coordinates_obscured,positioning_method,positioning_device,species_guess,scientific_name,common_name,iconic_taxon_name,taxon_id,challenge,geometry
0,11479007,2018-04-27 7:52:34 am BST,2018-04-27,2018-04-27 07:52:34 UTC,UTC,159021,muki,2018-04-27 07:06:17 UTC,2018-04-27 09:26:56 UTC,research,...,False,gps,gps,lesser celandine,Ficaria verna,lesser celandine,Plantae,204321.0,../data/data_muki/data\CNC London 2018,POINT (-0.22849 51.55064)
1,11480212,2018-04-27 8:39:41 am BST,2018-04-27,2018-04-27 07:39:41 UTC,London,664459,lucyrobinsonnhm,2018-04-27 08:02:52 UTC,2019-01-29 21:16:34 UTC,research,...,False,gps,gps,Pigeon biset domestique,Columba livia domestica,Feral Pigeon,Aves,122767.0,../data/data_muki/data\CNC London 2018,POINT (-0.17361 51.49528)
2,11480902,Fri Apr 27 2018 08:34:31 GMT+0100 (GMT+1),2018-04-27,2018-04-27 06:34:31 UTC,Amsterdam,908315,tess26,2018-04-27 08:42:01 UTC,2018-04-27 11:01:49 UTC,casual,...,False,,,,Hyacinthoides hispanica,Spanish Bluebell,Plantae,57635.0,../data/data_muki/data\CNC London 2018,POINT (-0.16910 51.47298)
3,11481097,Fri Apr 27 2018 09:54:35 GMT+0100 (GMT+1),2018-04-27,2018-04-27 07:54:35 UTC,Amsterdam,796473,bryonycross,2018-04-27 08:54:45 UTC,2018-04-27 20:23:41 UTC,research,...,False,,,Green Alkanet,Pentaglottis sempervirens,Green Alkanet,Plantae,166466.0,../data/data_muki/data\CNC London 2018,POINT (-0.16152 51.50808)
4,11482247,2018-04-27 8:42:21 am BST,2018-04-27,2018-04-27 07:42:21 UTC,London,664459,lucyrobinsonnhm,2018-04-27 10:09:55 UTC,2018-04-27 15:37:31 UTC,research,...,False,gps,gps,London Plane,Platanus × hispanica,London Plane,Plantae,552449.0,../data/data_muki/data\CNC London 2018,POINT (-0.17389 51.49611)


## Spatial join

In [49]:
# spatial join observations and observations.
points_polys = gpd.sjoin(observations_gdf, park_forest_unique, how='inner')

In [50]:
points_polys.head()

Unnamed: 0,id,observed_on_string,observed_on,time_observed_at,time_zone,user_id,user_login,created_at,updated_at,quality_grade,...,positioning_method,positioning_device,species_guess,scientific_name,common_name,iconic_taxon_name,taxon_id,challenge,geometry,index_right
3,11481097,Fri Apr 27 2018 09:54:35 GMT+0100 (GMT+1),2018-04-27,2018-04-27 07:54:35 UTC,Amsterdam,796473,bryonycross,2018-04-27 08:54:45 UTC,2018-04-27 20:23:41 UTC,research,...,,,Green Alkanet,Pentaglottis sempervirens,Green Alkanet,Plantae,166466.0,../data/data_muki/data\CNC London 2018,POINT (-0.16152 51.50808),2427
5,11482706,Fri Apr 27 2018 11:40:06 GMT+0100 (GMT+1),2018-04-27,2018-04-27 09:40:06 UTC,Amsterdam,796473,bryonycross,2018-04-27 10:40:31 UTC,2018-04-27 17:18:08 UTC,casual,...,,,,Quercus stellata,post oak,Plantae,119269.0,../data/data_muki/data\CNC London 2018,POINT (-0.16758 51.51051),2427
6,11483083,Fri Apr 27 2018 12:01:15 GMT+0100 (GMT+1),2018-04-27,2018-04-27 10:01:15 UTC,Amsterdam,796473,bryonycross,2018-04-27 11:01:48 UTC,2018-04-27 17:18:09 UTC,needs_id,...,,,,Oligochaeta,Earthworms and allies,Animalia,333586.0,../data/data_muki/data\CNC London 2018,POINT (-0.16793 51.51077),2427
10,11484564,Fri Apr 27 2018 12:58:17 GMT+0100 (GMT+1),2018-04-27,2018-04-27 10:58:17 UTC,Amsterdam,796473,bryonycross,2018-04-27 11:58:34 UTC,2018-04-27 17:18:10 UTC,research,...,,,Garden Snail,Cornu aspersum,Garden Snail,Mollusca,480298.0,../data/data_muki/data\CNC London 2018,POINT (-0.16176 51.50792),2427
11,11484587,Fri Apr 27 2018 12:58:55 GMT+0100 (GMT+1),2018-04-27,2018-04-27 10:58:55 UTC,Amsterdam,796473,bryonycross,2018-04-27 11:59:13 UTC,2018-04-27 17:18:11 UTC,needs_id,...,,,,Arion rufus,Chocolate Arion,Mollusca,84755.0,../data/data_muki/data\CNC London 2018,POINT (-0.16168 51.50783),2427


In [51]:
points_polys.shape

(5847, 37)

In [52]:
points_polys.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

## Set flag for greenspaces

In [53]:
ids_with_greenspace = points_polys.id.to_list()

In [54]:
new_london = london_df.set_index('id')
new_london.head()

Unnamed: 0_level_0,observed_on_string,observed_on,time_observed_at,time_zone,user_id,user_login,created_at,updated_at,quality_grade,license,...,coordinates_obscured,positioning_method,positioning_device,species_guess,scientific_name,common_name,iconic_taxon_name,taxon_id,challenge,geometry
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
11479007,2018-04-27 7:52:34 am BST,2018-04-27,2018-04-27 07:52:34 UTC,UTC,159021,muki,2018-04-27 07:06:17 UTC,2018-04-27 09:26:56 UTC,research,CC-BY,...,False,gps,gps,lesser celandine,Ficaria verna,lesser celandine,Plantae,204321.0,../data/data_muki/data\CNC London 2018,POINT (-0.22849 51.55064)
11480212,2018-04-27 8:39:41 am BST,2018-04-27,2018-04-27 07:39:41 UTC,London,664459,lucyrobinsonnhm,2018-04-27 08:02:52 UTC,2019-01-29 21:16:34 UTC,research,CC0,...,False,gps,gps,Pigeon biset domestique,Columba livia domestica,Feral Pigeon,Aves,122767.0,../data/data_muki/data\CNC London 2018,POINT (-0.17361 51.49528)
11480902,Fri Apr 27 2018 08:34:31 GMT+0100 (GMT+1),2018-04-27,2018-04-27 06:34:31 UTC,Amsterdam,908315,tess26,2018-04-27 08:42:01 UTC,2018-04-27 11:01:49 UTC,casual,,...,False,,,,Hyacinthoides hispanica,Spanish Bluebell,Plantae,57635.0,../data/data_muki/data\CNC London 2018,POINT (-0.16910 51.47298)
11481097,Fri Apr 27 2018 09:54:35 GMT+0100 (GMT+1),2018-04-27,2018-04-27 07:54:35 UTC,Amsterdam,796473,bryonycross,2018-04-27 08:54:45 UTC,2018-04-27 20:23:41 UTC,research,CC-BY-NC,...,False,,,Green Alkanet,Pentaglottis sempervirens,Green Alkanet,Plantae,166466.0,../data/data_muki/data\CNC London 2018,POINT (-0.16152 51.50808)
11482247,2018-04-27 8:42:21 am BST,2018-04-27,2018-04-27 07:42:21 UTC,London,664459,lucyrobinsonnhm,2018-04-27 10:09:55 UTC,2018-04-27 15:37:31 UTC,research,CC0,...,False,gps,gps,London Plane,Platanus × hispanica,London Plane,Plantae,552449.0,../data/data_muki/data\CNC London 2018,POINT (-0.17389 51.49611)


In [55]:
new_london['greenspace_flag'] = '0'
new_london.loc[ids_with_greenspace,'greenspace_flag'] = '1'

In [56]:
new_london.greenspace_flag.value_counts()

0    7883
1    5847
Name: greenspace_flag, dtype: int64

In [57]:
new_london.head()

Unnamed: 0_level_0,observed_on_string,observed_on,time_observed_at,time_zone,user_id,user_login,created_at,updated_at,quality_grade,license,...,positioning_method,positioning_device,species_guess,scientific_name,common_name,iconic_taxon_name,taxon_id,challenge,geometry,greenspace_flag
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
11479007,2018-04-27 7:52:34 am BST,2018-04-27,2018-04-27 07:52:34 UTC,UTC,159021,muki,2018-04-27 07:06:17 UTC,2018-04-27 09:26:56 UTC,research,CC-BY,...,gps,gps,lesser celandine,Ficaria verna,lesser celandine,Plantae,204321.0,../data/data_muki/data\CNC London 2018,POINT (-0.22849 51.55064),0
11480212,2018-04-27 8:39:41 am BST,2018-04-27,2018-04-27 07:39:41 UTC,London,664459,lucyrobinsonnhm,2018-04-27 08:02:52 UTC,2019-01-29 21:16:34 UTC,research,CC0,...,gps,gps,Pigeon biset domestique,Columba livia domestica,Feral Pigeon,Aves,122767.0,../data/data_muki/data\CNC London 2018,POINT (-0.17361 51.49528),0
11480902,Fri Apr 27 2018 08:34:31 GMT+0100 (GMT+1),2018-04-27,2018-04-27 06:34:31 UTC,Amsterdam,908315,tess26,2018-04-27 08:42:01 UTC,2018-04-27 11:01:49 UTC,casual,,...,,,,Hyacinthoides hispanica,Spanish Bluebell,Plantae,57635.0,../data/data_muki/data\CNC London 2018,POINT (-0.16910 51.47298),0
11481097,Fri Apr 27 2018 09:54:35 GMT+0100 (GMT+1),2018-04-27,2018-04-27 07:54:35 UTC,Amsterdam,796473,bryonycross,2018-04-27 08:54:45 UTC,2018-04-27 20:23:41 UTC,research,CC-BY-NC,...,,,Green Alkanet,Pentaglottis sempervirens,Green Alkanet,Plantae,166466.0,../data/data_muki/data\CNC London 2018,POINT (-0.16152 51.50808),1
11482247,2018-04-27 8:42:21 am BST,2018-04-27,2018-04-27 07:42:21 UTC,London,664459,lucyrobinsonnhm,2018-04-27 10:09:55 UTC,2018-04-27 15:37:31 UTC,research,CC0,...,gps,gps,London Plane,Platanus × hispanica,London Plane,Plantae,552449.0,../data/data_muki/data\CNC London 2018,POINT (-0.17389 51.49611),0


In [58]:
#export as csv
new_london.to_csv('../data/outputs/london_data_with_greenspace_flag.csv')

In [59]:
#compare with Florence's csv export
new_london_v0=pd.read_csv('../data/outputs/london_data_with_greenspace_flag_v1.csv')

In [60]:
new_london_v0.shape, new_london.shape

((13730, 38), (13730, 36))

In [61]:
#no columns E and N in the new df
new_london_v0.columns, new_london.columns

(Index(['id', 'observed_on_string', 'observed_on', 'time_observed_at',
        'time_zone', 'user_id', 'user_login', 'created_at', 'updated_at',
        'quality_grade', 'license', 'url', 'image_url', 'sound_url', 'tag_list',
        'description', 'num_identification_agreements',
        'num_identification_disagreements', 'captive_cultivated',
        'oauth_application_id', 'place_guess', 'latitude', 'longitude',
        'positional_accuracy', 'geoprivacy', 'taxon_geoprivacy',
        'coordinates_obscured', 'positioning_method', 'positioning_device',
        'species_guess', 'scientific_name', 'common_name', 'iconic_taxon_name',
        'taxon_id', 'challenge', 'E', 'N', 'greenspace_flag'],
       dtype='object'),
 Index(['observed_on_string', 'observed_on', 'time_observed_at', 'time_zone',
        'user_id', 'user_login', 'created_at', 'updated_at', 'quality_grade',
        'license', 'url', 'image_url', 'sound_url', 'tag_list', 'description',
        'num_identification_agreement

In [64]:
new_london_v0.greenspace_flag.value_counts()

0    8235
1    5495
Name: greenspace_flag, dtype: int64

In [65]:
new_london.greenspace_flag.value_counts()

0    7883
1    5847
Name: greenspace_flag, dtype: int64

In [66]:
5847-5495

352

### 352 more observations (n=13730) with greenspace_flag = 1  