# Add Geospatial Data for San Francisco

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd 
import geopandas as gpd
from glob import glob
from OSGridConverter import latlong2grid
import matplotlib.pyplot as plt 
from shapely.geometry import Point, Polygon
from geopandas import GeoDataFrame
import seaborn as sns
import altair as alt
import plotly.express as px

## 1. Load iNaturalist Data

In [3]:
data_folder = '/Users/florence/Desktop/citizen_science/muki_data'
sf_df = pd.concat([pd.read_csv(f).assign(challenge=f.replace(".csv","")) for f in glob(data_folder+"/CNC San Francisco*.csv")])

In [4]:
sf_df.head()

Unnamed: 0,id,observed_on_string,observed_on,time_observed_at,time_zone,user_id,user_login,created_at,updated_at,quality_grade,...,taxon_geoprivacy,coordinates_obscured,positioning_method,positioning_device,species_guess,scientific_name,common_name,iconic_taxon_name,taxon_id,challenge
0,43036609,2020-04-24 12:00:58 AM PDT,2020-04-24,2020-04-24 07:00:58 UTC,Pacific Time (US & Canada),2692,kestrel,2020-04-24 07:01:46 UTC,2020-04-24 08:09:21 UTC,research,...,,False,gps,gps,Ornate Tiger Moth,Apantesis ornata,Ornate Tiger Moth,Insecta,321458.0,/Users/florence/Desktop/citizen_science/muki_d...
1,43036980,2020/04/24 12:01 AM PDT,2020-04-24,2020-04-24 07:01:00 UTC,Pacific Time (US & Canada),179103,catchang,2020-04-24 07:07:05 UTC,2020-04-30 05:32:41 UTC,needs_id,...,,False,,,Grass Spiders,Anyphaena,,Arachnida,153567.0,/Users/florence/Desktop/citizen_science/muki_d...
2,43036981,2020/04/24 12:01 AM PDT,2020-04-24,2020-04-24 07:01:00 UTC,Pacific Time (US & Canada),179103,catchang,2020-04-24 07:07:06 UTC,2020-04-24 07:34:24 UTC,research,...,,False,,,White-shouldered House Moth,Endrosis sarcitrella,White-shouldered House Moth,Insecta,205758.0,/Users/florence/Desktop/citizen_science/muki_d...
3,43036982,2020/04/24 12:01 AM PDT,2020-04-24,2020-04-24 07:01:00 UTC,Pacific Time (US & Canada),179103,catchang,2020-04-24 07:07:06 UTC,2020-04-30 05:21:53 UTC,research,...,,False,,,Noble False Widow,Steatoda nobilis,Noble False Widow,Arachnida,366894.0,/Users/florence/Desktop/citizen_science/muki_d...
4,43037180,Fri Apr 24 2020 00:02:24 GMT-0700 (PDT),2020-04-24,2020-04-24 07:02:24 UTC,Pacific Time (US & Canada),819,rebeccafay,2020-04-24 07:10:24 UTC,2020-04-24 07:11:25 UTC,needs_id,...,,False,,,,Armadillidium vulgare,Common Pill Woodlouse,Animalia,56083.0,/Users/florence/Desktop/citizen_science/muki_d...


## 2. Load additional SF data

In [6]:
sf_geodata = gpd.read_file('/Users/florence/Desktop/citizen_science/additional_data/sf/land_use/geo_export_6f4312bd-a776-4012-943d-c4add7b1c155.shp')

In [7]:
sf_geodata.head()

Unnamed: 0,bldgsqft,blklot,block_num,cie,from_st,landuse,lot_num,mapblklot,med,mips,...,retail,shape_area,shape_leng,st_type,street,to_st,total_uses,visitor,yrbuilt,geometry
0,5460.0,26013,26,0.0,838,RESIDENT,13,26013,0.0,0.0,...,0.0,3864.061762,327.786363,ST,BAY,842,0.0,0.0,1912.0,"POLYGON ((-122.42108 37.80478, -122.42118 37.8..."
1,4500.0,26014,26,0.0,844,MIXRES,14,26014,0.0,3471.0,...,0.0,3361.025351,320.352522,ST,BAY,844,3471.0,0.0,1900.0,"POLYGON ((-122.42118 37.80476, -122.42126 37.8..."
2,5783.0,26015,26,0.0,848,RESIDENT,15,26015,0.0,0.0,...,0.0,3272.033196,319.046088,ST,BAY,850,0.0,0.0,1900.0,"POLYGON ((-122.42126 37.80475, -122.42135 37.8..."
3,3700.0,26016,26,0.0,860,RESIDENT,16,26016,0.0,0.0,...,0.0,4570.767667,279.353002,ST,BAY,862,0.0,0.0,1940.0,"POLYGON ((-122.42135 37.80474, -122.42152 37.8..."
4,4543.0,26017,26,0.0,870,RESIDENT,17,26017,0.0,0.0,...,0.0,5225.823735,294.354339,ST,BAY,872,0.0,0.0,1938.0,"POLYGON ((-122.42153 37.80472, -122.42173 37.8..."


In [8]:
sf_geodata.columns

Index(['bldgsqft', 'blklot', 'block_num', 'cie', 'from_st', 'landuse',
       'lot_num', 'mapblklot', 'med', 'mips', 'objectid', 'pdr', 'resunits',
       'retail', 'shape_area', 'shape_leng', 'st_type', 'street', 'to_st',
       'total_uses', 'visitor', 'yrbuilt', 'geometry'],
      dtype='object')

In [9]:
sf_geodata.landuse

0         RESIDENT
1           MIXRES
2         RESIDENT
3         RESIDENT
4         RESIDENT
            ...   
155463      MIXRES
155464    RESIDENT
155465    RESIDENT
155466    RESIDENT
155467    RESIDENT
Name: landuse, Length: 155468, dtype: object

In [10]:
len(sf_geodata[sf_geodata.landuse == 'OpenSpace'])

932

In [11]:
len(sf_geodata)

155468

In [12]:
sf_geodata.landuse.unique()

array(['RESIDENT', 'MIXRES', 'VACANT', 'PDR', 'OpenSpace', 'VISITOR',
       'MIPS', 'MIXED', 'RETAIL/ENT', 'CIE', 'MISSING DATA', 'MED',
       'Right of Way'], dtype=object)

In [13]:
sf_greenspace = sf_geodata[sf_geodata.landuse == 'OpenSpace']

In [14]:
sf_greenspace.crs

{'init': 'epsg:4326'}

In [17]:
sf_greenspace.geometry

9         POLYGON ((-122.41921 37.80788, -122.42064 37.8...
202       POLYGON ((-122.41901 37.80676, -122.41895 37.8...
252       POLYGON ((-122.42019 37.80426, -122.42161 37.8...
253       POLYGON ((-122.42124 37.80321, -122.42145 37.8...
843       POLYGON ((-122.40521 37.80198, -122.40531 37.8...
                                ...                        
154576    POLYGON ((-122.38976 37.79120, -122.39010 37.7...
154849    POLYGON ((-122.39009 37.79146, -122.39043 37.7...
155225    POLYGON ((-122.37645 37.73392, -122.37641 37.7...
155389    POLYGON ((-122.38650 37.72533, -122.38670 37.7...
155426    POLYGON ((-122.38105 37.73214, -122.38105 37.7...
Name: geometry, Length: 932, dtype: geometry

In [16]:
sf_df.longitude, sf_df.latitude

(0       -122.491855
 1       -122.272478
 2       -122.272478
 3       -122.272478
 4       -122.432312
             ...    
 24046   -122.449242
 24047   -122.974083
 24048   -122.973692
 24049   -122.970253
 24050   -123.063815
 Name: longitude, Length: 138907, dtype: float64,
 0        38.322071
 1        37.842187
 2        37.842187
 3        37.842187
 4        37.801128
            ...    
 24046    37.890830
 24047    37.993592
 24048    37.993256
 24049    37.992244
 24050    38.304860
 Name: latitude, Length: 138907, dtype: float64)

In [18]:
# create geodataframe from SF iNaturalist data 
geometry = [Point(xy) for xy in zip(sf_df.longitude, sf_df.latitude)]
gdf = GeoDataFrame(sf_df, crs="epsg:4326", geometry=geometry)

In [19]:
gdf.head()

Unnamed: 0,id,observed_on_string,observed_on,time_observed_at,time_zone,user_id,user_login,created_at,updated_at,quality_grade,...,coordinates_obscured,positioning_method,positioning_device,species_guess,scientific_name,common_name,iconic_taxon_name,taxon_id,challenge,geometry
0,43036609,2020-04-24 12:00:58 AM PDT,2020-04-24,2020-04-24 07:00:58 UTC,Pacific Time (US & Canada),2692,kestrel,2020-04-24 07:01:46 UTC,2020-04-24 08:09:21 UTC,research,...,False,gps,gps,Ornate Tiger Moth,Apantesis ornata,Ornate Tiger Moth,Insecta,321458.0,/Users/florence/Desktop/citizen_science/muki_d...,POINT (-122.49185 38.32207)
1,43036980,2020/04/24 12:01 AM PDT,2020-04-24,2020-04-24 07:01:00 UTC,Pacific Time (US & Canada),179103,catchang,2020-04-24 07:07:05 UTC,2020-04-30 05:32:41 UTC,needs_id,...,False,,,Grass Spiders,Anyphaena,,Arachnida,153567.0,/Users/florence/Desktop/citizen_science/muki_d...,POINT (-122.27248 37.84219)
2,43036981,2020/04/24 12:01 AM PDT,2020-04-24,2020-04-24 07:01:00 UTC,Pacific Time (US & Canada),179103,catchang,2020-04-24 07:07:06 UTC,2020-04-24 07:34:24 UTC,research,...,False,,,White-shouldered House Moth,Endrosis sarcitrella,White-shouldered House Moth,Insecta,205758.0,/Users/florence/Desktop/citizen_science/muki_d...,POINT (-122.27248 37.84219)
3,43036982,2020/04/24 12:01 AM PDT,2020-04-24,2020-04-24 07:01:00 UTC,Pacific Time (US & Canada),179103,catchang,2020-04-24 07:07:06 UTC,2020-04-30 05:21:53 UTC,research,...,False,,,Noble False Widow,Steatoda nobilis,Noble False Widow,Arachnida,366894.0,/Users/florence/Desktop/citizen_science/muki_d...,POINT (-122.27248 37.84219)
4,43037180,Fri Apr 24 2020 00:02:24 GMT-0700 (PDT),2020-04-24,2020-04-24 07:02:24 UTC,Pacific Time (US & Canada),819,rebeccafay,2020-04-24 07:10:24 UTC,2020-04-24 07:11:25 UTC,needs_id,...,False,,,,Armadillidium vulgare,Common Pill Woodlouse,Animalia,56083.0,/Users/florence/Desktop/citizen_science/muki_d...,POINT (-122.43231 37.80113)


## 3. Create greenspace flag

In [20]:
# join greenspace and London data
points_polys = gpd.sjoin(gdf, sf_greenspace, how="inner") 

  warn(


In [21]:
points_polys.head()

Unnamed: 0,id,observed_on_string,observed_on,time_observed_at,time_zone,user_id,user_login,created_at,updated_at,quality_grade,...,resunits,retail,shape_area,shape_leng,st_type,street,to_st,total_uses,visitor,yrbuilt
45,43077476,Fri Apr 24 2020 07:11:36 GMT-0700 (PDT),2020-04-24,2020-04-24 14:11:36 UTC,Pacific Time (US & Canada),2795728,sfgardener,2020-04-24 14:11:57 UTC,2020-04-24 14:18:08 UTC,research,...,0.0,0.0,1020487.0,4348.313934,,UNKNOWN,0,0.0,0.0,0.0
14820,43680079,2020/04/26 10:33 AM PDT,2020-04-26,2020-04-26 17:33:00 UTC,Pacific Time (US & Canada),17796,wbsimey,2020-04-26 18:03:23 UTC,2020-04-30 05:30:57 UTC,research,...,0.0,0.0,1020487.0,4348.313934,,UNKNOWN,0,0.0,0.0,0.0
14821,43680080,2020/04/26 10:34 AM PDT,2020-04-26,2020-04-26 17:34:00 UTC,Pacific Time (US & Canada),17796,wbsimey,2020-04-26 18:03:23 UTC,2020-04-26 18:03:23 UTC,needs_id,...,0.0,0.0,1020487.0,4348.313934,,UNKNOWN,0,0.0,0.0,0.0
14822,43680081,2020/04/26 10:33 AM PDT,2020-04-26,2020-04-26 17:33:00 UTC,Pacific Time (US & Canada),17796,wbsimey,2020-04-26 18:03:24 UTC,2020-04-28 12:48:31 UTC,research,...,0.0,0.0,1020487.0,4348.313934,,UNKNOWN,0,0.0,0.0,0.0
14823,43680096,2020/04/26 10:35 AM PDT,2020-04-26,2020-04-26 17:35:00 UTC,Pacific Time (US & Canada),17796,wbsimey,2020-04-26 18:03:27 UTC,2020-04-26 18:03:27 UTC,needs_id,...,0.0,0.0,1020487.0,4348.313934,,UNKNOWN,0,0.0,0.0,0.0


In [22]:
len(points_polys)

20243

In [23]:
len(sf_df)

138907

In [24]:
ids_with_greenspace = points_polys.id.to_list()

In [25]:
# set Flag for greenspace in the iNaturalist DataFrame 
new_sf = sf_df.set_index('id')
new_sf['greenspace_flag'] = '0'
new_sf.loc[ids_with_greenspace,'greenspace_flag'] = '1'

In [26]:
new_sf.head()

Unnamed: 0_level_0,observed_on_string,observed_on,time_observed_at,time_zone,user_id,user_login,created_at,updated_at,quality_grade,license,...,positioning_method,positioning_device,species_guess,scientific_name,common_name,iconic_taxon_name,taxon_id,challenge,geometry,greenspace_flag
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
43036609,2020-04-24 12:00:58 AM PDT,2020-04-24,2020-04-24 07:00:58 UTC,Pacific Time (US & Canada),2692,kestrel,2020-04-24 07:01:46 UTC,2020-04-24 08:09:21 UTC,research,CC0,...,gps,gps,Ornate Tiger Moth,Apantesis ornata,Ornate Tiger Moth,Insecta,321458.0,/Users/florence/Desktop/citizen_science/muki_d...,POINT (-122.49185 38.32207),0
43036980,2020/04/24 12:01 AM PDT,2020-04-24,2020-04-24 07:01:00 UTC,Pacific Time (US & Canada),179103,catchang,2020-04-24 07:07:05 UTC,2020-04-30 05:32:41 UTC,needs_id,CC-BY-NC,...,,,Grass Spiders,Anyphaena,,Arachnida,153567.0,/Users/florence/Desktop/citizen_science/muki_d...,POINT (-122.27248 37.84219),0
43036981,2020/04/24 12:01 AM PDT,2020-04-24,2020-04-24 07:01:00 UTC,Pacific Time (US & Canada),179103,catchang,2020-04-24 07:07:06 UTC,2020-04-24 07:34:24 UTC,research,CC-BY-NC,...,,,White-shouldered House Moth,Endrosis sarcitrella,White-shouldered House Moth,Insecta,205758.0,/Users/florence/Desktop/citizen_science/muki_d...,POINT (-122.27248 37.84219),0
43036982,2020/04/24 12:01 AM PDT,2020-04-24,2020-04-24 07:01:00 UTC,Pacific Time (US & Canada),179103,catchang,2020-04-24 07:07:06 UTC,2020-04-30 05:21:53 UTC,research,CC-BY-NC,...,,,Noble False Widow,Steatoda nobilis,Noble False Widow,Arachnida,366894.0,/Users/florence/Desktop/citizen_science/muki_d...,POINT (-122.27248 37.84219),0
43037180,Fri Apr 24 2020 00:02:24 GMT-0700 (PDT),2020-04-24,2020-04-24 07:02:24 UTC,Pacific Time (US & Canada),819,rebeccafay,2020-04-24 07:10:24 UTC,2020-04-24 07:11:25 UTC,needs_id,CC0,...,,,,Armadillidium vulgare,Common Pill Woodlouse,Animalia,56083.0,/Users/florence/Desktop/citizen_science/muki_d...,POINT (-122.43231 37.80113),0


In [27]:
new_sf.greenspace_flag.value_counts()

0    118669
1     20238
Name: greenspace_flag, dtype: int64

## 4. Calculate min-distance to a greenspace

In [28]:
def min_distance(point, lines):
    return lines.distance(point).min()

if False:
    gdf['min_dist_to_greenspace'] = gdf.geometry.apply(min_distance, args=(sf_greenspace.geometry,))

In [29]:
gdf.head()

Unnamed: 0,id,observed_on_string,observed_on,time_observed_at,time_zone,user_id,user_login,created_at,updated_at,quality_grade,...,positioning_method,positioning_device,species_guess,scientific_name,common_name,iconic_taxon_name,taxon_id,challenge,geometry,min_dist_to_greenspace
0,43036609,2020-04-24 12:00:58 AM PDT,2020-04-24,2020-04-24 07:00:58 UTC,Pacific Time (US & Canada),2692,kestrel,2020-04-24 07:01:46 UTC,2020-04-24 08:09:21 UTC,research,...,gps,gps,Ornate Tiger Moth,Apantesis ornata,Ornate Tiger Moth,Insecta,321458.0,/Users/florence/Desktop/citizen_science/muki_d...,POINT (-122.49185 38.32207),0.511253
1,43036980,2020/04/24 12:01 AM PDT,2020-04-24,2020-04-24 07:01:00 UTC,Pacific Time (US & Canada),179103,catchang,2020-04-24 07:07:05 UTC,2020-04-30 05:32:41 UTC,needs_id,...,,,Grass Spiders,Anyphaena,,Arachnida,153567.0,/Users/florence/Desktop/citizen_science/muki_d...,POINT (-122.27248 37.84219),0.127103
2,43036981,2020/04/24 12:01 AM PDT,2020-04-24,2020-04-24 07:01:00 UTC,Pacific Time (US & Canada),179103,catchang,2020-04-24 07:07:06 UTC,2020-04-24 07:34:24 UTC,research,...,,,White-shouldered House Moth,Endrosis sarcitrella,White-shouldered House Moth,Insecta,205758.0,/Users/florence/Desktop/citizen_science/muki_d...,POINT (-122.27248 37.84219),0.127103
3,43036982,2020/04/24 12:01 AM PDT,2020-04-24,2020-04-24 07:01:00 UTC,Pacific Time (US & Canada),179103,catchang,2020-04-24 07:07:06 UTC,2020-04-30 05:21:53 UTC,research,...,,,Noble False Widow,Steatoda nobilis,Noble False Widow,Arachnida,366894.0,/Users/florence/Desktop/citizen_science/muki_d...,POINT (-122.27248 37.84219),0.127103
4,43037180,Fri Apr 24 2020 00:02:24 GMT-0700 (PDT),2020-04-24,2020-04-24 07:02:24 UTC,Pacific Time (US & Canada),819,rebeccafay,2020-04-24 07:10:24 UTC,2020-04-24 07:11:25 UTC,needs_id,...,,,,Armadillidium vulgare,Common Pill Woodlouse,Animalia,56083.0,/Users/florence/Desktop/citizen_science/muki_d...,POINT (-122.43231 37.80113),0.000282


In [30]:
gdf.min_dist_to_greenspace.min(), gdf.min_dist_to_greenspace.max()

(0.0, 1.7976931348623157e+308)

In [31]:
# set Flag for greenspace in the iNaturalist DataFrame 
new_sf = gdf.set_index('id')
new_sf['greenspace_flag'] = '0'
new_sf.loc[ids_with_greenspace,'greenspace_flag'] = '1'

In [32]:
new_sf.head()

Unnamed: 0_level_0,observed_on_string,observed_on,time_observed_at,time_zone,user_id,user_login,created_at,updated_at,quality_grade,license,...,positioning_device,species_guess,scientific_name,common_name,iconic_taxon_name,taxon_id,challenge,geometry,min_dist_to_greenspace,greenspace_flag
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
43036609,2020-04-24 12:00:58 AM PDT,2020-04-24,2020-04-24 07:00:58 UTC,Pacific Time (US & Canada),2692,kestrel,2020-04-24 07:01:46 UTC,2020-04-24 08:09:21 UTC,research,CC0,...,gps,Ornate Tiger Moth,Apantesis ornata,Ornate Tiger Moth,Insecta,321458.0,/Users/florence/Desktop/citizen_science/muki_d...,POINT (-122.49185 38.32207),0.511253,0
43036980,2020/04/24 12:01 AM PDT,2020-04-24,2020-04-24 07:01:00 UTC,Pacific Time (US & Canada),179103,catchang,2020-04-24 07:07:05 UTC,2020-04-30 05:32:41 UTC,needs_id,CC-BY-NC,...,,Grass Spiders,Anyphaena,,Arachnida,153567.0,/Users/florence/Desktop/citizen_science/muki_d...,POINT (-122.27248 37.84219),0.127103,0
43036981,2020/04/24 12:01 AM PDT,2020-04-24,2020-04-24 07:01:00 UTC,Pacific Time (US & Canada),179103,catchang,2020-04-24 07:07:06 UTC,2020-04-24 07:34:24 UTC,research,CC-BY-NC,...,,White-shouldered House Moth,Endrosis sarcitrella,White-shouldered House Moth,Insecta,205758.0,/Users/florence/Desktop/citizen_science/muki_d...,POINT (-122.27248 37.84219),0.127103,0
43036982,2020/04/24 12:01 AM PDT,2020-04-24,2020-04-24 07:01:00 UTC,Pacific Time (US & Canada),179103,catchang,2020-04-24 07:07:06 UTC,2020-04-30 05:21:53 UTC,research,CC-BY-NC,...,,Noble False Widow,Steatoda nobilis,Noble False Widow,Arachnida,366894.0,/Users/florence/Desktop/citizen_science/muki_d...,POINT (-122.27248 37.84219),0.127103,0
43037180,Fri Apr 24 2020 00:02:24 GMT-0700 (PDT),2020-04-24,2020-04-24 07:02:24 UTC,Pacific Time (US & Canada),819,rebeccafay,2020-04-24 07:10:24 UTC,2020-04-24 07:11:25 UTC,needs_id,CC0,...,,,Armadillidium vulgare,Common Pill Woodlouse,Animalia,56083.0,/Users/florence/Desktop/citizen_science/muki_d...,POINT (-122.43231 37.80113),0.000282,0


In [None]:
# TODO: checken, wie ich die Distanzen in Meter bzw. Kilometer umrechnen kann 