## Steps

#### Load observations, greenspaces, ocean and continetal waters
#### Exclude the greenspaces within ocean and continental waters (key: nature_reserve)
#### Exclude overlapping geometries 
#### Export files as .shp

In [1]:
#import libraries
import pandas as pd 
import numpy as np
from glob2 import glob
import geopandas as gpd 
import osmnx as ox

In [2]:
#get England as gdf to check anomalies. Points outside California will be excluded
engl=ox.geocode_to_gdf('England')

In [3]:
#load observations
data_folder = '../data/observations_final'
df = pd.concat([pd.read_csv(f).assign(challenge=f.replace('.csv','')) for f in glob(data_folder+'/CNC_London_*.csv')])

In [4]:
df.shape

(13750, 39)

In [5]:
#exclude anomalies
df=df[df['latitude'].le(engl['bbox_north'].iloc[0]) \
                  & df['latitude'].ge(engl['bbox_south'].iloc[0]) \
                  & df['longitude'].le(engl['bbox_east'].iloc[0]) \
                  & df['longitude'].ge(engl['bbox_west'].iloc[0])]

In [6]:
#create year 
df['year']=df['time_observed_at'].str[0:4].astype('int64')

In [7]:
df.shape

(13750, 40)

In [8]:
#load greenspaces
#http://download.geofabrik.de/europe/great-britain/england.html
landuse=gpd.read_file('../data/england-latest-free/gis_osm_landuse_a_free_1.shp')

In [9]:
landuse.shape

(1320022, 5)

In [10]:
landuse.head()

Unnamed: 0,osm_id,code,fclass,name,geometry
0,2838058,7202,park,,"POLYGON ((-0.09186 51.66414, -0.09146 51.66418..."
1,2903046,7202,park,,"POLYGON ((-0.08025 51.66596, -0.08005 51.66690..."
2,2903368,7202,park,,"POLYGON ((-0.08663 51.66215, -0.08653 51.66293..."
3,2903369,7202,park,,"POLYGON ((-0.08266 51.66106, -0.08227 51.66177..."
4,2903398,7202,park,Bush Hill Park,"POLYGON ((-0.07064 51.64713, -0.07058 51.64751..."


In [11]:
#define function to get coordinates using the centroids
def getXY(pt):
    return (pt.x, pt.y)

In [12]:
#get coordinates
landuse['centroid'] = landuse['geometry'].centroid
landuse['lon'], landuse['lat'] = [list(t) for t in zip(*map(getXY, landuse.centroid))]


  

  This is separate from the ipykernel package so we can avoid doing imports until


In [13]:
landuse.head()

Unnamed: 0,osm_id,code,fclass,name,geometry,centroid,lon,lat
0,2838058,7202,park,,"POLYGON ((-0.09186 51.66414, -0.09146 51.66418...",POINT (-0.09078 51.66464),-0.090778,51.664637
1,2903046,7202,park,,"POLYGON ((-0.08025 51.66596, -0.08005 51.66690...",POINT (-0.07854 51.66695),-0.078543,51.666949
2,2903368,7202,park,,"POLYGON ((-0.08663 51.66215, -0.08653 51.66293...",POINT (-0.08612 51.66253),-0.086119,51.662532
3,2903369,7202,park,,"POLYGON ((-0.08266 51.66106, -0.08227 51.66177...",POINT (-0.08162 51.66126),-0.081621,51.661264
4,2903398,7202,park,Bush Hill Park,"POLYGON ((-0.07064 51.64713, -0.07058 51.64751...",POINT (-0.06792 51.64850),-0.067916,51.648504


In [14]:
#limit greenspaces with observations as boundaries (lat)
l1 = landuse[landuse['lat'].ge(df.latitude.min()) & landuse['lat'].le(df.latitude.max())]

In [15]:
#limit greenspaces with observations as boundaries (lon)
l2 = l1[l1['lon'].ge(df.longitude.min()) & l1['lon'].le(df.longitude.max())]

In [16]:
landuse_clean=l2

In [17]:
landuse.shape, landuse_clean.shape

((1320022, 8), (84281, 8))

In [18]:
green=landuse_clean[landuse_clean['fclass'].eq('grass') | 
        landuse_clean['fclass'].eq('park') | 
        landuse_clean['fclass'].eq('forest') | 
        landuse_clean['fclass'].eq('scrub') |
        landuse_clean['fclass'].eq('vineyard') | 
        landuse_clean['fclass'].eq('meadow') |
        landuse_clean['fclass'].eq('orchard') |
        landuse_clean['fclass'].eq('heath') |
        landuse_clean['fclass'].eq('recreation_ground') |
        landuse_clean['fclass'].eq('allotments') | 
        landuse_clean['fclass'].eq('nature_reserve')]

In [19]:
green.shape

(53856, 8)

In [20]:
green.head()

Unnamed: 0,osm_id,code,fclass,name,geometry,centroid,lon,lat
0,2838058,7202,park,,"POLYGON ((-0.09186 51.66414, -0.09146 51.66418...",POINT (-0.09078 51.66464),-0.090778,51.664637
1,2903046,7202,park,,"POLYGON ((-0.08025 51.66596, -0.08005 51.66690...",POINT (-0.07854 51.66695),-0.078543,51.666949
2,2903368,7202,park,,"POLYGON ((-0.08663 51.66215, -0.08653 51.66293...",POINT (-0.08612 51.66253),-0.086119,51.662532
3,2903369,7202,park,,"POLYGON ((-0.08266 51.66106, -0.08227 51.66177...",POINT (-0.08162 51.66126),-0.081621,51.661264
4,2903398,7202,park,Bush Hill Park,"POLYGON ((-0.07064 51.64713, -0.07058 51.64751...",POINT (-0.06792 51.64850),-0.067916,51.648504


In [21]:
# drop centroid
green_clean = green.drop(['centroid'], axis=1)

In [22]:
green_clean.head()

Unnamed: 0,osm_id,code,fclass,name,geometry,lon,lat
0,2838058,7202,park,,"POLYGON ((-0.09186 51.66414, -0.09146 51.66418...",-0.090778,51.664637
1,2903046,7202,park,,"POLYGON ((-0.08025 51.66596, -0.08005 51.66690...",-0.078543,51.666949
2,2903368,7202,park,,"POLYGON ((-0.08663 51.66215, -0.08653 51.66293...",-0.086119,51.662532
3,2903369,7202,park,,"POLYGON ((-0.08266 51.66106, -0.08227 51.66177...",-0.081621,51.661264
4,2903398,7202,park,Bush Hill Park,"POLYGON ((-0.07064 51.64713, -0.07058 51.64751...",-0.067916,51.648504


In [23]:
#http://download.geofabrik.de/north-america/us/california/socal.html
water_cont=gpd.read_file('../data/greater-london-latest-free/gis_osm_water_a_free_1.shp')

In [24]:
water_cont.shape

(2532, 5)

In [25]:
water_cont['centroid'] = water_cont['geometry'].centroid
water_cont['lon'], water_cont['lat'] = [list(t) for t in zip(*map(getXY, water_cont.centroid))]


  """Entry point for launching an IPython kernel.

  


In [26]:
wc1 = water_cont[water_cont['lat'].ge(df.latitude.min()) & water_cont['lat'].le(df.latitude.max())]

In [27]:
wc2 = wc1[wc1['lon'].ge(df.longitude.min()) & wc1['lon'].le(df.longitude.max())]

In [28]:
water_cont_clean=wc2

In [29]:
water_cont.shape, water_cont_clean.shape

((2532, 8), (2463, 8))

In [30]:
water_cont_clean.head()

Unnamed: 0,osm_id,code,fclass,name,geometry,centroid,lon,lat
0,2954289,8201,reservoir,Brent Reservoir,"POLYGON ((-0.25741 51.56811, -0.25677 51.56838...",POINT (-0.24725 51.57059),-0.247249,51.570593
1,3142984,8200,water,Angel Pond,"POLYGON ((-0.22699 51.61751, -0.22698 51.61754...",POINT (-0.22685 51.61750),-0.226851,51.617501
2,3171810,8201,reservoir,Lockwood Reservoir,"POLYGON ((-0.05281 51.59067, -0.05273 51.59096...",POINT (-0.04814 51.59479),-0.048145,51.594794
3,3171879,8201,reservoir,Low Maynard Reservoir,"POLYGON ((-0.05173 51.58667, -0.05173 51.58676...",POINT (-0.04942 51.58891),-0.049421,51.588905
4,3172035,8201,reservoir,,"POLYGON ((-0.04755 51.57490, -0.04642 51.57543...",POINT (-0.04669 51.57493),-0.046694,51.574925


In [31]:
water_cont_clean_2=water_cont_clean.drop(['centroid'], axis=1)

In [32]:
green_clean_difference=gpd.overlay(green_clean, water_cont_clean_2, how='difference')

In [33]:
green=green_clean_difference

In [34]:
geom = green.geometry.unary_union

In [35]:
green_unique = gpd.GeoDataFrame(geometry=[geom],crs='epsg:4326')

In [36]:
green_unique.head()

Unnamed: 0,geometry
0,"MULTIPOLYGON (((-0.16633 51.21638, -0.16650 51..."


In [37]:
green_unique.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [38]:
green_unique = green_unique.explode().reset_index(drop=True)

In [39]:
green_unique.head()

Unnamed: 0,geometry
0,"POLYGON ((-0.16633 51.21638, -0.16650 51.21593..."
1,"POLYGON ((-0.34201 51.21249, -0.34125 51.21270..."
2,"POLYGON ((0.00256 51.23937, 0.00299 51.23896, ..."
3,"POLYGON ((-0.23174 51.24215, -0.23167 51.24220..."
4,"POLYGON ((-0.32467 51.24743, -0.32462 51.24744..."


In [40]:
green_unique.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [41]:
green_unique.shape

(32758, 1)

In [42]:
water_cont_clean.shape

(2463, 8)

In [43]:
geom = water_cont_clean.geometry.unary_union

In [44]:
water_cont_clean_unique = gpd.GeoDataFrame(geometry=[geom],crs='epsg:4326')

In [45]:
water_cont_clean_unique.head()

Unnamed: 0,geometry
0,"MULTIPOLYGON (((0.19779 51.45779, 0.19788 51.4..."


In [46]:
water_cont_clean_unique.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [47]:
water_cont_clean_unique = water_cont_clean_unique.explode().reset_index(drop=True)

In [48]:
water_cont_clean_unique.head()

Unnamed: 0,geometry
0,"POLYGON ((0.19779 51.45779, 0.19788 51.45788, ..."
1,"POLYGON ((0.02079 51.29146, 0.02079 51.29148, ..."
2,"POLYGON ((-0.12939 51.29254, -0.12937 51.29256..."
3,"POLYGON ((0.04901 51.29253, 0.04904 51.29256, ..."
4,"POLYGON ((0.04998 51.29271, 0.04998 51.29275, ..."


In [49]:
water_cont_clean_unique.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [50]:
water_cont_clean_unique.shape

(1884, 1)

In [51]:
green_unique.to_file('../data/outputs/greenspaces_ldn_final.shp')
water_cont_clean_unique.to_file('../data/outputs/bluespaces_ldn_final.shp')