<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Collect-data-about-state-parks-(lat,-lon)" data-toc-modified-id="Collect-data-about-state-parks-(lat,-lon)-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Collect data about state parks (lat, lon)</a></span></li></ul></div>

### Collect data about state parks (lat, lon)

Using list from this website: https://www.stateparks.com/california_parks_and_recreation_destinations.html


In [2]:
import requests
from bs4 import BeautifulSoup

import numpy as np
import pandas as pd
import datetime

from shapely.geometry import Point, Polygon
import geopandas as gpd
import geopy

import pickle

from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
from geopy.exc import GeocoderTimedOut

In [3]:
#vgm_url = 'https://www.vgmusic.com/music/console/nintendo/nes/'
html_text = open('california_parks_and_recreation_destinations_source.html','r').read()
soup = BeautifulSoup(html_text, 'html.parser')

In [4]:
park_names = []

for park in soup.find_all(id='parklink'):
    park_names.append(list(park.descendants)[1])

In [5]:
park_names

['Ahjumawi Lava Springs State Park',
 'Andrew Molera State Park',
 'Angel Island State Park',
 'Annadel State Park',
 'Ano Nuevo Island State Park',
 'Anza-Borrego Desert State Park',
 'Arthur B. Ripley Desert Woodland State Park',
 'Bidwell-Sacramento River State Park',
 'Big Basin Redwoods State Park',
 'Bolsa Chica Beach State Park',
 'Border Field State Park',
 'Bothe Napa Valley State Park',
 'Burleigh Murray Ranch State Park',
 'Burton Creek State Park',
 'Butano State Park',
 'Calaveras Big Trees State Park',
 'Castle Crags State Park',
 'Castle Rock State Park',
 'Caswell Memorial State Park',
 'Chino Hills State Park',
 'Clear Lake State Park',
 'Crystal Cove State Park',
 'Cuyamaca State Park',
 'D L Bliss State Park',
 'Del Norte Coast Redwoods State Park',
 'Dockweiler Beach State Park',
 'Donner Memorial State Park',
 'El Capitan Beach State Park',
 'Emerald Bay State Park',
 'Estero Bluffs State Park',
 'Fort Ord Dunes State Park',
 'Fremont Ford State Park',
 'Fremont Pe

In [6]:
# get gps locations of these parks
geolocator = Nominatim(user_agent='christoph@appliedanalytics.xyz')

location = None
valid_location = False

parks = {}

for park in park_names:
    try:
        location = geolocator.geocode(park + ", California")
        parks[park] = location
    except GeocoderTimedOut as e:
        valid_location = False
        print ("Error: %s"%park)



In [9]:
parks['Andrew Molera State Park']

Location(Andrew Molera State Park, Big Sur, Monterey County, California, United States of America, (36.27964585, -121.83095118457348, 0.0))

In [None]:
# input some park locations by hand

In [8]:
parksll = {}

for key in parks.keys():
    if (parks[key] == None):
        print (key)
        continue
    else:
        #print (key)
        parksll[key] = (parks[key].latitude,parks[key].longitude)

Ano Nuevo Island State Park
Bolsa Chica Beach State Park
Burleigh Murray Ranch State Park
Dockweiler Beach State Park
El Capitan Beach State Park
George J Hatfield State Park
Gold Discovery Site State Park
James D Phelan Beach State Park
John Little State Park
Kings Beach State Park
Manchester Beach State Park
Manhattan Beach State Park
Marin Headlands State Park
McNee Ranch State Park
Natural Bridges Beach State Park
Placerita Canyon State Park
Santa Monica Beach State Park
Sugar Pine Point State Park
Thornton Beach State Park
White Point State Park
Will Rogers Beach State Park
Will Rogers State Park
Austin Creek State Recreation Area
Benicia State Recreation Area
Millerton Lake State Recreation Area
Boggs Mountain State Forest
Columbia Historic State Park
Darrah Springs State Fish Hatchery
Hot Creek State Fish Hatchery
Mojave State Fish Hatchery
Mount Shasta State Fish Hatchery
Antelope Valley California Poppy State Natural Reserve
Armstrong Redwoods State Reserve
Jug Handle State Re

In [10]:
parksll['Marin Headlands State Park'] = (37.8252061, -122.5074731)
parksll['Antelope Valley California Poppy State Natural Reserve'] = (34.726260, -118.396545)
parksll['Armstrong Redwoods State Reserve'] = (38.537056, -123.006324)
parksll['Point Lobos State Reserve'] = (36.516162, -121.937696)
parksll['Torrey Pines State Reserve'] = (32.917783, -117.254675)
parksll['Lake of the Woods Wildlife Area'] = (38.950917, -121.577500)
parksll['Calaveras Bigtree National Forest'] = (38.252963, -120.251214)
parksll['Coachella Valley National Wildlife Refuge'] = (33.841549, -116.313356)
parksll['Farallon National Wildlife Refuge'] = (37.697282, -123.001580)
parksll['Sweetwater Marsh National Wildlife Refuge'] = (32.641032, -117.111598)
parksll['Washoe Meadows State Park'] = (38.876651, -120.029260)
parksll['Carrizo Plain National Monument'] = (35.189908, -119.862997)
parksll['Snow Mountain Wilderness'] = (39.403534, -122.740587)
parksll['Yolla Bolly-Middle Eeel Wilderness'] = (40.063274, -123.222007)

# corrected locations:
parksll['Tahoe State Park'] = (39.1751847,-120.1349142)
parksll['Mountain Home State Forest'] = (36.230925, -118.712957)

In [15]:
# convert to dataframe
df = pd.DataFrame.from_dict(parksll,orient='index')
df = df.reset_index()
df.columns = ['Name','Latitude','Longitude']
df.Latitude = df.Latitude.astype(np.float64)
df.Longitude = df.Longitude.astype(np.float64)
df['Name'] = df['Name'].astype(str)
df

Unnamed: 0,Name,Latitude,Longitude
0,Ahjumawi Lava Springs State Park,41.121537,-121.451396
1,Andrew Molera State Park,36.279646,-121.830951
2,Angel Island State Park,37.862930,-122.430364
3,Annadel State Park,38.426131,-122.624343
4,Anza-Borrego Desert State Park,33.095535,-116.301898
...,...,...,...
179,Sweetwater Marsh National Wildlife Refuge,32.641032,-117.111598
180,Washoe Meadows State Park,38.876651,-120.029260
181,Carrizo Plain National Monument,35.189908,-119.862997
182,Snow Mountain Wilderness,39.403534,-122.740587


In [16]:
import pyproj
import rasterio

elevation_filename = lambda a: 'srtm/srtm_%s/srtm_%s.tif'%(a,a)
#elev = rasterio.open(elevation_filename('12_04'))
#coords = (-123.004553,37.999545)
#vals=elev.sample(coords)

# lookup elevation data for each park
src = {}
src_coord = {}
lonlat = {}
elevation_data = {}

def get_gps (row, col):
    east, north = src.xy(row,col) # image --> spatial coordinates
    lon,lat = pyproj.transform(src_coord, lonlat, east, north)
    value = elevation_data[row, col]
    return lon, lat

# input: longitude, latitude (gps coordinate)
# return: elevation_data value at input location(s)
def get_value (lon, lat):
    east,north = pyproj.transform(lonlat, src_coord, lon, lat)

    # What is the corresponding row and column in our image?
    row, col = src.index(east, north) # spatial --> image coordinates
    #print(f'row,col=\t\t({row},{col})')

    # What is the value at that index?
    value = elevation_data[row, col]
    return value

for a in ['12_04','12_05','12_06','13_04','13_05','13_06','14_05','14_06']:
    src = rasterio.open(elevation_filename(a))
    src_coord = pyproj.Proj(src.crs)
    lonlat = pyproj.Proj(init='epsg:4326')
    elevation_data = src.read(1)
    print (a)
    print (get_gps(0,0))
    print (get_gps(5999,5999))
    print ('---------')

12_04
(-124.99958333333335, 44.99958333333334)
(-120.00041666666667, 40.000416666666666)
---------
12_05
(-124.99958333333335, 39.999583333333334)
(-120.00041666666667, 35.000416666666666)
---------
12_06
(-124.99958333333335, 34.999583333333334)
(-120.00041666666667, 30.000416666666666)
---------
13_04
(-119.99958333333335, 44.99958333333334)
(-115.00041666666667, 40.000416666666666)
---------
13_05
(-119.99958333333335, 39.999583333333334)
(-115.00041666666667, 35.000416666666666)
---------
13_06
(-119.99958333333335, 34.999583333333334)
(-115.00041666666667, 30.000416666666666)
---------
14_05
(-114.99958333333333, 39.999583333333334)
(-110.00041666666667, 35.000416666666666)
---------
14_06
(-114.99958333333333, 34.999583333333334)
(-110.00041666666667, 30.000416666666666)
---------


In [17]:
srtm_lon_index = np.floor((df.Longitude+125.0000001)/5).astype(np.int).map({0:12,1:13,2:14})
srtm_lat_index = np.floor((df.Latitude-30.000417)/5).astype(np.int).map({0:6,1:5,2:4})

df['elevation_srtm'] = -9999

for a in ['12_04','12_05','12_06','13_04','13_05','13_06','14_05','14_06']:
    Longitude_i,Latitude_i = a.split('_')
    src = rasterio.open(elevation_filename(a))
    src_coord = pyproj.Proj(src.crs)
    LongitudeLatitude = pyproj.Proj(init='epsg:4326')
    elevation_data = src.read(1)
    # get all observations for this map
    #print (a+":")
    _df = df[(srtm_lon_index==int(Longitude_i))&(srtm_lat_index==int(Latitude_i))]
    elevation = get_value(_df.Longitude.values,_df.Latitude.values)
    df.loc[_df.index,'elevation_srtm'] = elevation
    #print (len(elevation))


In [19]:
df = df[df.elevation_srtm>-500]
df

Unnamed: 0,Name,Latitude,Longitude,elevation_srtm
0,Ahjumawi Lava Springs State Park,41.121537,-121.451396,1030
1,Andrew Molera State Park,36.279646,-121.830951,25
2,Angel Island State Park,37.862930,-122.430364,181
3,Annadel State Park,38.426131,-122.624343,286
4,Anza-Borrego Desert State Park,33.095535,-116.301898,549
...,...,...,...,...
179,Sweetwater Marsh National Wildlife Refuge,32.641032,-117.111598,3
180,Washoe Meadows State Park,38.876651,-120.029260,1935
181,Carrizo Plain National Monument,35.189908,-119.862997,599
182,Snow Mountain Wilderness,39.403534,-122.740587,1732


In [20]:
# save parks data
df.to_pickle('data/parks.pkl')