## Steps

#### Load observations, greenspaces, ocean and continetal waters
#### Exclude the greenspaces within ocean and continental waters (key: nature_reserve)
#### Exclude overlapping geometries 
#### Export files as .shp

In [1]:
#import libraries
import pandas as pd 
import numpy as np
from glob2 import glob
import geopandas as gpd 
import osmnx as ox

In [2]:
#get California as gdf to check anomalies. Points outside California will be excluded
ca=ox.geocode_to_gdf('California')

In [3]:
#load observations
data_folder = '../data/observations_final'
df = pd.concat([pd.read_csv(f).assign(challenge=f.replace('.csv','')) for f in glob(data_folder+'/CNC_Los_Angeles_*.csv')])

In [4]:
df.shape

(91694, 39)

In [5]:
#exclude anomalies
df=df[df['latitude'].le(ca['bbox_north'].iloc[0]) \
                  & df['latitude'].ge(ca['bbox_south'].iloc[0]) \
                  & df['longitude'].le(ca['bbox_east'].iloc[0]) \
                  & df['longitude'].ge(ca['bbox_west'].iloc[0])]

In [6]:
#create year 
df['year']=df['time_observed_at'].str[0:4].astype('int64')

In [7]:
df.shape

(91693, 40)

In [8]:
#load greenspaces
#http://download.geofabrik.de/north-america/us/california/socal.html
landuse=gpd.read_file('../data/socal-latest-free/gis_osm_landuse_a_free_1.shp')

In [9]:
landuse.shape

(95166, 5)

In [10]:
landuse.head()

Unnamed: 0,osm_id,code,fclass,name,geometry
0,4263495,7202,park,Richard Alatorre Park,"POLYGON ((-118.18487 34.14372, -118.18482 34.1..."
1,4354434,7202,park,San Pascual Park,"POLYGON ((-118.17015 34.11971, -118.16885 34.1..."
2,4384742,7202,park,Elysian Park,"POLYGON ((-118.25161 34.08026, -118.25022 34.0..."
3,4584647,7202,park,Crystal Street Bike Park,"POLYGON ((-118.25603 34.10741, -118.25574 34.1..."
4,5988648,7204,industrial,,"POLYGON ((-117.09100 33.11698, -117.09081 33.1..."


In [11]:
#define function to get coordinates using the centroids
def getXY(pt):
    return (pt.x, pt.y)

In [12]:
#get coordinates
landuse['centroid'] = landuse['geometry'].centroid
landuse['lon'], landuse['lat'] = [list(t) for t in zip(*map(getXY, landuse.centroid))]


  

  This is separate from the ipykernel package so we can avoid doing imports until


In [13]:
landuse.head()

Unnamed: 0,osm_id,code,fclass,name,geometry,centroid,lon,lat
0,4263495,7202,park,Richard Alatorre Park,"POLYGON ((-118.18487 34.14372, -118.18482 34.1...",POINT (-118.18442 34.14370),-118.184418,34.1437
1,4354434,7202,park,San Pascual Park,"POLYGON ((-118.17015 34.11971, -118.16885 34.1...",POINT (-118.16865 34.11942),-118.168649,34.119417
2,4384742,7202,park,Elysian Park,"POLYGON ((-118.25161 34.08026, -118.25022 34.0...",POINT (-118.23881 34.08247),-118.238808,34.08247
3,4584647,7202,park,Crystal Street Bike Park,"POLYGON ((-118.25603 34.10741, -118.25574 34.1...",POINT (-118.25575 34.10746),-118.25575,34.107462
4,5988648,7204,industrial,,"POLYGON ((-117.09100 33.11698, -117.09081 33.1...",POINT (-117.08739 33.11560),-117.087392,33.115605


In [14]:
#limit greenspaces with observations as boundaries (lat)
l1 = landuse[landuse['lat'].ge(df.latitude.min()) & landuse['lat'].le(df.latitude.max())]

In [15]:
#limit greenspaces with observations as boundaries (lon)
l2 = l1[l1['lon'].ge(df.longitude.min()) & l1['lon'].le(df.longitude.max())]

In [16]:
landuse_clean=l2

In [17]:
landuse.shape, landuse_clean.shape

((95166, 8), (29841, 8))

In [18]:
green=landuse_clean[landuse_clean['fclass'].eq('grass') | 
        landuse_clean['fclass'].eq('park') | 
        landuse_clean['fclass'].eq('forest') | 
        landuse_clean['fclass'].eq('scrub') |
        landuse_clean['fclass'].eq('vineyard') | 
        landuse_clean['fclass'].eq('meadow') |
        landuse_clean['fclass'].eq('orchard') |
        landuse_clean['fclass'].eq('heath') |
        landuse_clean['fclass'].eq('recreation_ground') |
        landuse_clean['fclass'].eq('allotments') | 
        landuse_clean['fclass'].eq('nature_reserve')]

In [19]:
green.shape

(17017, 8)

In [20]:
green.head()

Unnamed: 0,osm_id,code,fclass,name,geometry,centroid,lon,lat
0,4263495,7202,park,Richard Alatorre Park,"POLYGON ((-118.18487 34.14372, -118.18482 34.1...",POINT (-118.18442 34.14370),-118.184418,34.1437
1,4354434,7202,park,San Pascual Park,"POLYGON ((-118.17015 34.11971, -118.16885 34.1...",POINT (-118.16865 34.11942),-118.168649,34.119417
2,4384742,7202,park,Elysian Park,"POLYGON ((-118.25161 34.08026, -118.25022 34.0...",POINT (-118.23881 34.08247),-118.238808,34.08247
3,4584647,7202,park,Crystal Street Bike Park,"POLYGON ((-118.25603 34.10741, -118.25574 34.1...",POINT (-118.25575 34.10746),-118.25575,34.107462
27,15478927,7202,park,Glen Alla Park,"POLYGON ((-118.43257 33.98428, -118.43127 33.9...",POINT (-118.43155 33.98405),-118.431547,33.984049


In [21]:
# drop centroid
green_clean = green.drop(['centroid'], axis=1)

In [22]:
green_clean.head()

Unnamed: 0,osm_id,code,fclass,name,geometry,lon,lat
0,4263495,7202,park,Richard Alatorre Park,"POLYGON ((-118.18487 34.14372, -118.18482 34.1...",-118.184418,34.1437
1,4354434,7202,park,San Pascual Park,"POLYGON ((-118.17015 34.11971, -118.16885 34.1...",-118.168649,34.119417
2,4384742,7202,park,Elysian Park,"POLYGON ((-118.25161 34.08026, -118.25022 34.0...",-118.238808,34.08247
3,4584647,7202,park,Crystal Street Bike Park,"POLYGON ((-118.25603 34.10741, -118.25574 34.1...",-118.25575,34.107462
27,15478927,7202,park,Glen Alla Park,"POLYGON ((-118.43257 33.98428, -118.43127 33.9...",-118.431547,33.984049


In [23]:
#https://osmdata.openstreetmap.de/data/water-polygons.html
water=gpd.read_file('../data/water-polygons-split-4326/water-polygons-split-4326/water_polygons.shp')

In [24]:
water.shape

(53282, 3)

In [25]:
water.head()

Unnamed: 0,x,y,geometry
0,1,41,"POLYGON ((0.99933 40.99950, 0.99933 41.04319, ..."
1,-11,-72,"POLYGON ((-11.00158 -71.04396, -11.00158 -71.0..."
2,-11,-72,"POLYGON ((-10.81949 -70.99950, -10.75741 -70.9..."
3,148,-11,"POLYGON ((147.99949 -11.00050, 147.99949 -10.1..."
4,-25,81,"POLYGON ((-25.00338 81.58330, -25.00338 81.720..."


In [26]:
water['centroid'] = water['geometry'].centroid
water['lon'], water['lat'] = [list(t) for t in zip(*map(getXY, water.centroid))]


  """Entry point for launching an IPython kernel.

  


In [27]:
water.head()

Unnamed: 0,x,y,geometry,centroid,lon,lat
0,1,41,"POLYGON ((0.99933 40.99950, 0.99933 41.04319, ...",POINT (1.61701 41.09437),1.617007,41.094369
1,-11,-72,"POLYGON ((-11.00158 -71.04396, -11.00158 -71.0...",POINT (-10.97495 -71.01612),-10.97495,-71.016119
2,-11,-72,"POLYGON ((-10.81949 -70.99950, -10.75741 -70.9...",POINT (-10.75569 -71.00237),-10.755689,-71.002365
3,148,-11,"POLYGON ((147.99949 -11.00050, 147.99949 -10.1...",POINT (148.48457 -10.58733),148.48457,-10.587332
4,-25,81,"POLYGON ((-25.00338 81.58330, -25.00338 81.720...",POINT (-24.42133 81.79519),-24.421329,81.795188


In [28]:
w1 = water[water['lat'].ge(32) & water['lat'].le(df.latitude.max())]

In [29]:
w2 = w1[w1['lon'].ge(-120) & w1['lon'].le(df.longitude.max())]

In [30]:
water_clean=w2

In [31]:
water_clean.shape

(7, 6)

In [32]:
water_clean.head()

Unnamed: 0,x,y,geometry,centroid,lon,lat
2273,-119,32,"POLYGON ((-119.00059 31.99950, -119.00059 33.0...",POINT (-118.50030 32.49486),-118.5003,32.494855
4213,-120,33,"POLYGON ((-120.00060 32.99950, -120.00060 33.9...",POINT (-119.49751 33.49693),-119.49751,33.496934
5667,-118,33,"POLYGON ((-118.00060 32.99950, -118.00060 33.6...",POINT (-117.72560 33.23868),-117.725604,33.238679
6235,-119,33,"POLYGON ((-119.00060 32.99950, -119.00060 34.0...",POINT (-118.54007 33.45453),-118.540071,33.45453
7032,-119,34,"POLYGON ((-119.00061 33.99950, -119.00061 34.0...",POINT (-118.76659 34.01927),-118.766594,34.019275


In [33]:
# drop centroid
water_clean_2 = water_clean.drop(['centroid'], axis=1)

In [34]:
water_clean_2

Unnamed: 0,x,y,geometry,lon,lat
2273,-119,32,"POLYGON ((-119.00059 31.99950, -119.00059 33.0...",-118.5003,32.494855
4213,-120,33,"POLYGON ((-120.00060 32.99950, -120.00060 33.9...",-119.49751,33.496934
5667,-118,33,"POLYGON ((-118.00060 32.99950, -118.00060 33.6...",-117.725604,33.238679
6235,-119,33,"POLYGON ((-119.00060 32.99950, -119.00060 34.0...",-118.540071,33.45453
7032,-119,34,"POLYGON ((-119.00061 33.99950, -119.00061 34.0...",-118.766594,34.019275
9818,-120,34,"POLYGON ((-120.00061 33.99950, -120.00061 34.4...",-119.599482,34.191505
23632,-120,32,"POLYGON ((-120.00059 31.99950, -120.00059 33.0...",-119.5,32.5


In [35]:
#http://download.geofabrik.de/north-america/us/california/socal.html
water_cont=gpd.read_file('../data/socal-latest-free/gis_osm_water_a_free_1.shp')

In [36]:
water_cont.shape

(15638, 5)

In [37]:
water_cont['centroid'] = water_cont['geometry'].centroid
water_cont['lon'], water_cont['lat'] = [list(t) for t in zip(*map(getXY, water_cont.centroid))]


  """Entry point for launching an IPython kernel.

  


In [38]:
wc1 = water_cont[water_cont['lat'].ge(df.latitude.min()) & water_cont['lat'].le(df.latitude.max())]

In [39]:
wc2 = wc1[wc1['lon'].ge(df.longitude.min()) & wc1['lon'].le(df.longitude.max())]

In [40]:
water_cont_clean=wc2

In [41]:
water_cont.shape, water_cont_clean.shape

((15638, 8), (2846, 8))

In [42]:
water_cont_clean.head()

Unnamed: 0,osm_id,code,fclass,name,geometry,centroid,lon,lat
10,15478215,8200,water,,"POLYGON ((-118.43431 33.96971, -118.43431 33.9...",POINT (-118.43077 33.96937),-118.430772,33.969369
13,20693077,8200,water,Upper Franklin Canyon Reservoir,"POLYGON ((-118.41093 34.12019, -118.41091 34.1...",POINT (-118.41021 34.12018),-118.410214,34.120181
14,22367519,8201,reservoir,Hollywood Reservoir,"POLYGON ((-118.33562 34.12360, -118.33562 34.1...",POINT (-118.33177 34.12120),-118.331768,34.121195
15,22372384,8200,water,Seaside Lagoon,"POLYGON ((-118.39504 33.84423, -118.39503 33.8...",POINT (-118.39445 33.84431),-118.394447,33.84431
16,22372524,8200,water,Oxford Retention Basin,"POLYGON ((-118.45691 33.98493, -118.45688 33.9...",POINT (-118.45543 33.98527),-118.455432,33.985272


In [43]:
water_cont_clean_2=water_cont_clean.drop(['centroid'], axis=1)

In [44]:
green_clean_difference=gpd.overlay(green_clean, water_clean_2, how='difference')

In [45]:
green_clean.shape, green_clean_difference.shape

((17017, 7), (17006, 7))

In [46]:
green_clean_difference_2=gpd.overlay(green_clean_difference, water_cont_clean_2, how='difference')

In [47]:
green=green_clean_difference_2

In [48]:
geom = green.geometry.unary_union

In [49]:
green_unique = gpd.GeoDataFrame(geometry=[geom],crs='epsg:4326')

In [50]:
green_unique.head()

Unnamed: 0,geometry
0,"MULTIPOLYGON (((-117.75412 33.52781, -117.7540..."


In [51]:
green_unique.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [52]:
green_unique = green_unique.explode().reset_index(drop=True)

In [53]:
green_unique.head()

Unnamed: 0,geometry
0,"POLYGON ((-117.75412 33.52781, -117.75409 33.5..."
1,"POLYGON ((-117.64313 33.64055, -117.64253 33.6..."
2,"POLYGON ((-117.72871 33.64874, -117.72877 33.6..."
3,"POLYGON ((-117.73685 33.71229, -117.73677 33.7..."
4,"POLYGON ((-117.74009 33.79436, -117.74004 33.7..."


In [54]:
green_unique.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [55]:
green_unique.shape

(12947, 1)

In [61]:
#save files
green_unique.to_file('../data/outputs/greenspaces_la_final.shp')
water_cont_clean_2.to_file('../data/outputs/bluespaces_la_final.shp')
water_clean_2.to_file('../data/outputs/ocean_la_final.shp')