# Get the greenspaces from Los Angeles and set the flags

In [1]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon
from glob2 import glob
import osmnx as ox

In [2]:
#get California as gdf to check anomalies later. Points outside California will be excluded
ca=ox.geocode_to_gdf('California')

In [3]:
ca

Unnamed: 0,geometry,bbox_north,bbox_south,bbox_east,bbox_west,place_id,osm_type,osm_id,lat,lon,display_name,class,type,importance
0,"MULTIPOLYGON (((-124.48200 40.44032, -124.4813...",42.009499,32.529524,-114.130782,-124.482003,259044285,relation,165475,36.701463,-118.755997,"California, United States",boundary,administrative,0.922136


## Load observations (API)

In [4]:
data_folder = '../data/observations_final'
la_df = pd.concat([pd.read_csv(f).assign(challenge=f.replace('.csv','')) for f in glob(data_folder+'/CNC_Los_Angeles_*.csv')])

In [5]:
la_df.shape

(91694, 39)

In [6]:
#check for incorrect points (lat)
la_df['latitude'].min(), la_df['latitude'].max()

(32.792719780300004, 76.3489192018)

In [7]:
#check for incorrect points (lon). Clearly something wrong!
la_df['longitude'].min(), la_df['longitude'].max()

(-118.9965502909, 130.01725921940002)

## Get only the points inside CA

In [8]:
la_df=la_df[la_df['latitude'].le(ca['bbox_north'].iloc[0]) \
                  & la_df['latitude'].ge(ca['bbox_south'].iloc[0]) \
                  & la_df['longitude'].le(ca['bbox_east'].iloc[0]) \
                  & la_df['longitude'].ge(ca['bbox_west'].iloc[0])]

In [9]:
la_df.shape

(91693, 39)

In [10]:
la_df.head()

Unnamed: 0,id,observed_on_string,time_observed_at,created_time_zone,created_at,updated_at,description,user_id,user_login,quality_grade,...,iconic_taxon_name,taxon_rank,taxon_parent_id,taxon_native,taxon_endemic,taxon_threatened,taxon_search_rank,taxon_observations,identifications,challenge
0,5591391,Fri Apr 14 2017 09:34:12 GMT-0700 (PDT),2017-04-14T09:34:12-07:00,America/Los_Angeles,2017-04-06T09:34:29-07:00,2018-01-13T14:28:01-08:00,,436798,laurachun,research,...,Plantae,species,52855,True,False,False,11983,11983,"[{'user_id': 73705, 'category': 'supporting', ...",../data/observations_final\CNC_Los_Angeles_2017
1,5591694,Sat Apr 15 2017 10:18:50 GMT-0700 (PDT),2017-04-15T10:18:50-07:00,America/Los_Angeles,2017-04-06T10:19:03-07:00,2018-01-13T14:28:03-08:00,,436798,laurachun,research,...,Plantae,species,53145,True,False,False,6476,6476,"[{'user_id': 73705, 'category': 'supporting', ...",../data/observations_final\CNC_Los_Angeles_2017
2,5684540,Fri Apr 14 2017 00:03:16 GMT-0700 (PDT),2017-04-14T00:03:16-07:00,America/Los_Angeles,2017-04-14T00:04:16-07:00,2018-01-13T14:31:44-08:00,,6743,lhiggins,needs_id,...,Insecta,family,52380,False,False,False,55396,55396,"[{'user_id': 275891, 'category': 'supporting',...",../data/observations_final\CNC_Los_Angeles_2017
3,5684550,Fri Apr 14 2017 00:04:42 GMT-0700 (PDT),2017-04-14T00:04:42-07:00,America/Los_Angeles,2017-04-14T00:05:31-07:00,2018-01-13T14:31:44-08:00,,6743,lhiggins,needs_id,...,Mollusca,genus,933174,False,False,False,8938,8938,"[{'user_id': 81261, 'category': 'improving', '...",../data/observations_final\CNC_Los_Angeles_2017
4,5684551,Fri Apr 14 2017 00:04:06 GMT-0700 (PDT),2017-04-14T00:04:06-07:00,America/Los_Angeles,2017-04-14T00:05:32-07:00,2017-04-14T05:30:19-07:00,,15419,mordenana,needs_id,...,Arachnida,order,47119,False,False,False,1420015,1420015,"[{'user_id': 15419, 'category': None, 'disagre...",../data/observations_final\CNC_Los_Angeles_2017


## Get the greenspaces using OSMnx and the points we defined before

In [11]:
green=ox.geometries.geometries_from_bbox(la_df['latitude'].max(),
                                         la_df['latitude'].min(),
                                         la_df['longitude'].max(),
                                         la_df['longitude'].min(),
                                         {'leisure':'park', 'landuse':'forest'})

In [12]:
green.shape

(4473, 144)

In [13]:
green.head(5)

Unnamed: 0,unique_id,osmid,element_type,ele,gnis:county_id,gnis:created,gnis:feature_id,gnis:state_id,leisure,name,...,longitude,ownership,protect_class,taxon,taxon:en,studio,conservation,protected_area,FID,protection_title
0,node/358786394,358786394,node,24,59,01/19/1981,243909,6,park,Irvine Bowl Park,...,,,,,,,,,,
1,node/358822072,358822072,node,3,59,12/02/1996,1702600,6,park,Capistrano Beach Park,...,,,,,,,,,,
2,node/358822079,358822079,node,5,59,12/02/1996,1702604,6,park,Dana Point Marine Life Refuge,...,,,,,,,,,,
3,node/358822080,358822080,node,-3,59,12/02/1996,1702605,6,park,Doheny Marine Life Refuge,...,,,,,,,,,,
4,node/358822082,358822082,node,-2,59,12/02/1996,1702606,6,park,Irvine Coast Marine Life Refuge,...,,,,,,,,,,


In [14]:
#keep only polygons

#exclude Point
green=green[green['geometry'].geom_type.ne('Point')]
#exclude LineString
green=green[green['geometry'].geom_type.ne('LineString')]

In [15]:
green.shape

(4228, 144)

## Exclude the overlapping geometries 

In [16]:
geom = green.geometry.unary_union

In [17]:
green_unique = gpd.GeoDataFrame(geometry=[geom],crs='epsg:4326')

In [18]:
green_unique.head()

Unnamed: 0,geometry
0,"MULTIPOLYGON (((-118.33014 33.33963, -118.3301..."


In [19]:
green_unique.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [20]:
green_unique = green_unique.explode().reset_index(drop=True)

In [21]:
green_unique.head()

Unnamed: 0,geometry
0,"POLYGON ((-118.33014 33.33963, -118.33011 33.3..."
1,"POLYGON ((-118.32523 33.34060, -118.32513 33.3..."
2,"POLYGON ((-118.32373 33.34246, -118.32370 33.3..."
3,"POLYGON ((-118.32504 33.34339, -118.32508 33.3..."
4,"POLYGON ((-117.61153 33.41189, -117.61184 33.4..."


In [22]:
green_unique.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [23]:
green_unique.shape

(4089, 1)

## Create gdf using the observations

In [24]:
la_df['longitude'].isna().sum(), la_df['latitude'].isna().sum()

(0, 0)

In [25]:
#create the points inverting the coordinates
geometry_invert = [Point(xy) for xy in zip(la_df.longitude, la_df.latitude)]

In [26]:
#crete a geodataframe with the observations
observations_gdf = gpd.GeoDataFrame(la_df, crs='epsg:4326', geometry=geometry_invert)

In [27]:
observations_gdf.shape

(91693, 40)

In [28]:
observations_gdf.head()

Unnamed: 0,id,observed_on_string,time_observed_at,created_time_zone,created_at,updated_at,description,user_id,user_login,quality_grade,...,taxon_rank,taxon_parent_id,taxon_native,taxon_endemic,taxon_threatened,taxon_search_rank,taxon_observations,identifications,challenge,geometry
0,5591391,Fri Apr 14 2017 09:34:12 GMT-0700 (PDT),2017-04-14T09:34:12-07:00,America/Los_Angeles,2017-04-06T09:34:29-07:00,2018-01-13T14:28:01-08:00,,436798,laurachun,research,...,species,52855,True,False,False,11983,11983,"[{'user_id': 73705, 'category': 'supporting', ...",../data/observations_final\CNC_Los_Angeles_2017,POINT (-118.20138 34.09752)
1,5591694,Sat Apr 15 2017 10:18:50 GMT-0700 (PDT),2017-04-15T10:18:50-07:00,America/Los_Angeles,2017-04-06T10:19:03-07:00,2018-01-13T14:28:03-08:00,,436798,laurachun,research,...,species,53145,True,False,False,6476,6476,"[{'user_id': 73705, 'category': 'supporting', ...",../data/observations_final\CNC_Los_Angeles_2017,POINT (-118.20141 34.09936)
2,5684540,Fri Apr 14 2017 00:03:16 GMT-0700 (PDT),2017-04-14T00:03:16-07:00,America/Los_Angeles,2017-04-14T00:04:16-07:00,2018-01-13T14:31:44-08:00,,6743,lhiggins,needs_id,...,family,52380,False,False,False,55396,55396,"[{'user_id': 275891, 'category': 'supporting',...",../data/observations_final\CNC_Los_Angeles_2017,POINT (-118.31088 34.07109)
3,5684550,Fri Apr 14 2017 00:04:42 GMT-0700 (PDT),2017-04-14T00:04:42-07:00,America/Los_Angeles,2017-04-14T00:05:31-07:00,2018-01-13T14:31:44-08:00,,6743,lhiggins,needs_id,...,genus,933174,False,False,False,8938,8938,"[{'user_id': 81261, 'category': 'improving', '...",../data/observations_final\CNC_Los_Angeles_2017,POINT (-118.31077 34.07111)
4,5684551,Fri Apr 14 2017 00:04:06 GMT-0700 (PDT),2017-04-14T00:04:06-07:00,America/Los_Angeles,2017-04-14T00:05:32-07:00,2017-04-14T05:30:19-07:00,,15419,mordenana,needs_id,...,order,47119,False,False,False,1420015,1420015,"[{'user_id': 15419, 'category': None, 'disagre...",../data/observations_final\CNC_Los_Angeles_2017,POINT (-117.99642 34.14409)


## Spatial join

In [29]:
# spatial join observations and observations.
points_polys = gpd.sjoin(observations_gdf, green_unique, how='inner')

In [30]:
points_polys.head()

Unnamed: 0,id,observed_on_string,time_observed_at,created_time_zone,created_at,updated_at,description,user_id,user_login,quality_grade,...,taxon_parent_id,taxon_native,taxon_endemic,taxon_threatened,taxon_search_rank,taxon_observations,identifications,challenge,geometry,index_right
0,5591391,Fri Apr 14 2017 09:34:12 GMT-0700 (PDT),2017-04-14T09:34:12-07:00,America/Los_Angeles,2017-04-06T09:34:29-07:00,2018-01-13T14:28:01-08:00,,436798,laurachun,research,...,52855,True,False,False,11983,11983,"[{'user_id': 73705, 'category': 'supporting', ...",../data/observations_final\CNC_Los_Angeles_2017,POINT (-118.20138 34.09752),3033
1,5591694,Sat Apr 15 2017 10:18:50 GMT-0700 (PDT),2017-04-15T10:18:50-07:00,America/Los_Angeles,2017-04-06T10:19:03-07:00,2018-01-13T14:28:03-08:00,,436798,laurachun,research,...,53145,True,False,False,6476,6476,"[{'user_id': 73705, 'category': 'supporting', ...",../data/observations_final\CNC_Los_Angeles_2017,POINT (-118.20141 34.09936),3033
3697,5733872,Sat Apr 15 2017 14:09:39 GMT-0700 (PDT),2017-04-15T14:09:39-07:00,America/Los_Angeles,2017-04-15T14:13:45-07:00,2018-01-13T14:44:03-08:00,"Observation and photo by Martha Singer, sent t...",262752,alexbroad,research,...,49921,True,False,False,17659,17659,"[{'user_id': 262752, 'category': 'improving', ...",../data/observations_final\CNC_Los_Angeles_2017,POINT (-118.20125 34.09777),3033
3702,5733938,Sat Apr 15 2017 07:54:21 GMT-0700 (PDT),2017-04-15T07:54:21-07:00,America/Los_Angeles,2017-04-15T14:14:33-07:00,2018-01-13T14:44:04-08:00,"Observation and photo by Martha Singer, sent t...",262752,alexbroad,needs_id,...,4950,True,False,False,105228,105228,"[{'user_id': 262752, 'category': None, 'disagr...",../data/observations_final\CNC_Los_Angeles_2017,POINT (-118.20125 34.09777),3033
3707,5734002,Sat Apr 15 2017 10:05:04 GMT-0700 (PDT),2017-04-15T10:05:04-07:00,America/Los_Angeles,2017-04-15T14:16:11-07:00,2018-01-13T14:44:05-08:00,"Observation and photo by Martha Singer, sent t...",262752,alexbroad,research,...,14889,True,False,False,3866,3866,"[{'user_id': 262752, 'category': 'improving', ...",../data/observations_final\CNC_Los_Angeles_2017,POINT (-118.20125 34.09777),3033


In [31]:
points_polys.shape

(22136, 41)

In [32]:
points_polys.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

## Set flag for greenspaces

In [33]:
ids_with_greenspace = points_polys.id.to_list()

In [34]:
new_la = la_df.set_index('id')
new_la.head()

Unnamed: 0_level_0,observed_on_string,time_observed_at,created_time_zone,created_at,updated_at,description,user_id,user_login,quality_grade,reviewed_by,...,taxon_rank,taxon_parent_id,taxon_native,taxon_endemic,taxon_threatened,taxon_search_rank,taxon_observations,identifications,challenge,geometry
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5591391,Fri Apr 14 2017 09:34:12 GMT-0700 (PDT),2017-04-14T09:34:12-07:00,America/Los_Angeles,2017-04-06T09:34:29-07:00,2018-01-13T14:28:01-08:00,,436798,laurachun,research,"[1850, 73705, 436798]",...,species,52855,True,False,False,11983,11983,"[{'user_id': 73705, 'category': 'supporting', ...",../data/observations_final\CNC_Los_Angeles_2017,POINT (-118.20138 34.09752)
5591694,Sat Apr 15 2017 10:18:50 GMT-0700 (PDT),2017-04-15T10:18:50-07:00,America/Los_Angeles,2017-04-06T10:19:03-07:00,2018-01-13T14:28:03-08:00,,436798,laurachun,research,"[1850, 6971, 73705, 436798]",...,species,53145,True,False,False,6476,6476,"[{'user_id': 73705, 'category': 'supporting', ...",../data/observations_final\CNC_Los_Angeles_2017,POINT (-118.20141 34.09936)
5684540,Fri Apr 14 2017 00:03:16 GMT-0700 (PDT),2017-04-14T00:03:16-07:00,America/Los_Angeles,2017-04-14T00:04:16-07:00,2018-01-13T14:31:44-08:00,,6743,lhiggins,needs_id,"[1850, 6743, 52839, 275891]",...,family,52380,False,False,False,55396,55396,"[{'user_id': 275891, 'category': 'supporting',...",../data/observations_final\CNC_Los_Angeles_2017,POINT (-118.31088 34.07109)
5684550,Fri Apr 14 2017 00:04:42 GMT-0700 (PDT),2017-04-14T00:04:42-07:00,America/Los_Angeles,2017-04-14T00:05:31-07:00,2018-01-13T14:31:44-08:00,,6743,lhiggins,needs_id,"[1850, 6743, 10285, 12809, 73705, 81261, 440756]",...,genus,933174,False,False,False,8938,8938,"[{'user_id': 81261, 'category': 'improving', '...",../data/observations_final\CNC_Los_Angeles_2017,POINT (-118.31077 34.07111)
5684551,Fri Apr 14 2017 00:04:06 GMT-0700 (PDT),2017-04-14T00:04:06-07:00,America/Los_Angeles,2017-04-14T00:05:32-07:00,2017-04-14T05:30:19-07:00,,15419,mordenana,needs_id,"[1850, 15419]",...,order,47119,False,False,False,1420015,1420015,"[{'user_id': 15419, 'category': None, 'disagre...",../data/observations_final\CNC_Los_Angeles_2017,POINT (-117.99642 34.14409)


In [35]:
new_la['greenspace_flag'] = '0'
new_la.loc[ids_with_greenspace,'greenspace_flag'] = '1'

In [36]:
new_la.greenspace_flag.value_counts()

0    69557
1    22136
Name: greenspace_flag, dtype: int64

In [38]:
#export as csv
new_la.to_csv('../data/outputs/la_data_with_greenspace_flag.csv')