In [27]:
import requests, datetime, json
from bs4 import BeautifulSoup

def parse_table_from_page(txt):
    # Return list of table entries on the page
    soup = BeautifulSoup(txt)
    tabulka = soup.find("table", {"class" : "tablesaw-stack"})
    rows = []
    fields = ['name', 'area', 'elevation', 'county', 'location']
    for row in tabulka.findAll('tr'):
        col = row.findAll('td')
        parts = [c for c in col]
        r = dict(zip(fields, parts))
        rows.append(r)
    return rows

def html_to_record(r):
    # Turn HTML object for table row into a dict
    name = next(r['name'].children).string
    url = 'https://wdfw.wa.gov' + r['name'].findNext('a').get('href')
    elevation = next(r['elevation'].children).string.split()[0]
    county = next(r['county'].children).string.strip()
    area = next(r['area'].children).string.strip()
    latlon = [x.string for x in r['location'].findAll('span')]
    return dict(name=name, url=url, elevation=float(elevation), area=area, county=county, lat=float(latlon[0]), lon=float(latlon[1]))

def get_lakes_from_all_pages(url_base):
    # Scrape all pages
    url_base = 'https://wdfw.wa.gov/fishing/locations/high-lakes/overabundant?name=&county=All&order=title&sort=asc&page='
    i = 0
    all_records = []
    while True:
        url = url_base + str(i)
        r = requests.get(url)
        txt = r.text
        try: rows = parse_table_from_page(txt)
        except: break
        records = [html_to_record(rw) for rw in rows if rw]
        if len(records)==0: break
        all_records.extend(records)
        i += 1
    return all_records

#all_records = get_lakes_from_all_pages()
#print(len(all_records))


In [10]:
starting_url_base = 'https://wdfw.wa.gov/fishing/locations/high-lakes/getting-started?name=&county=All&order=title&sort=asc&page='

overabundant_url_base = 'https://wdfw.wa.gov/fishing/locations/high-lakes/overabundant?name=&county=All&order=title&sort=asc&page='

all_url_base = 'https://wdfw.wa.gov/fishing/locations/high-lakes?name=&county=All&order=title&sort=asc&page='


def get_data():
    all_lakes = get_lakes_from_all_pages(all_url_base)
    all_lakes = [lk for lk in all_lakes if lk['elevation']>2500.0]
    overabundant_lakes = get_lakes_from_all_pages(overabundant_url_base)
    starting_lakes = get_lakes_from_all_pages(starting_url_base)
    overabundant_urls = set(lk['url'] for lk in overabundant_lakes)
    starting_urls = set(lk['url'] for lk in starting_lakes)
    for lk in all_lakes:
        lk['starting'] = lk['url'] in starting_urls
        lk['overabundant'] = lk['url'] in overabundant_urls
    timestamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    return dict(
        lakes=all_lakes,
        overabundant_lakes=[lk for lk in all_lakes if lk['overabundant']],
        starting_lakes=[lk for lk in all_lakes if lk['starting']],
        normal_lakes=[lk for lk in all_lakes if not lk['overabundant'] and not lk['overabundant']],
        timestamp=timestamp
    )



In [28]:
data = get_data()

In [29]:
data['overabundant_lakes'][0]

{'name': 'Airplane',
 'url': 'https://wdfw.wa.gov/fishing/locations/high-lakes/airplane',
 'elevation': 5305.0,
 'area': '9.40 acres',
 'county': 'Chelan',
 'lat': 48.002594,
 'lon': -121.006674,
 'starting': True,
 'overabundant': True}

In [30]:
output = json.dumps(dict(lakes=data['lakes'], timestamp=data['timestamp']))#data)
open('data.json', 'w').write(output)
open('data/starting_lakes.json', 'w').write(json.dumps(
    dict(lakes=data['starting_lakes'], timestamp=data['timestamp'])
))
open('data/overabundant_lakes.json', 'w').write(json.dumps(
    dict(lakes=data['overabundant_lakes'], timestamp=data['timestamp'])
))
open('data/normal_lakes.json', 'w').write(json.dumps(
    dict(lakes=data['normal_lakes'], timestamp=data['timestamp'])
))


49

In [33]:
import simplekml

def lake2marker_html(lk):
  link = '<a target=\"_blank\" href=\"'+lk['url']+'\">WDFW Page</a>'
  elevation = '<p>Elevation: '+str(round(lk['elevation']))+'ft' + '</p>'
  county = '<p>County: '+lk['county']+'</p>'
  size = '<p>Size: '+str(lk['area'])+' </p>'
  return elevation + county + size + link

def get_kml(lakes):
  kml = simplekml.Kml()
  kml.parsetext(parse=False)
  for lk in lakes:
    desc = lake2marker_html(lk)
    coords = [(lk['lon'],lk['lat'])]
    pnt = kml.newpoint(name=lk['name'].replace('&', ' and '), coords=coords, description=desc)
  return kml

In [34]:
starting_kml = get_kml(data['starting_lakes'])
starting_kml.save("data/starting_lakes.kml")
overabundant_kml = get_kml(data['overabundant_lakes'])
overabundant_kml.save("data/overabundant_lakes.kml")
all_kml = get_kml(data['lakes'])
all_kml.save("data/all_lakes.kml")

In [2]:

old_gmaps_Stuff = '''
import gmaps
import os

# Check https://github.com/pbugnion/gmaps/blob/master/docs/source/tutorial.rst
# for more documentation on the gmaps library

api_key = os.environ['GOOGLE_MAPS_API_KEY']

gmaps.configure(api_key=api_key)

info_box_template = """
<dl>
<dt>Name</dt><dd><a href="{url}">{name}</a></dd>
<dt>Elevation</dt><dd>{elevation}</dd>
</dl>
"""

marker_locs = [(r['lat'], r['lon']) for r in all_records]
boxes = [info_box_template.format(**rec) for rec in all_records]

markers = gmaps.marker_layer(marker_locs, info_box_content=boxes)
fig = gmaps.figure()
fig.add_layer(markers)
fig
'''

Figure(layout=FigureLayout(height='420px'))