In [10]:
from bs4 import BeautifulSoup
import re
import requests

In [53]:
html_file = '../data/New Lost Lake.html'

In [54]:
with open(html_file,'r') as f:
	cg_soup = BeautifulSoup(f.read())

In [55]:
cg_soup

<!--  Default ver 1.2.2 -->
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">

<!--Default_FS ver  1.2.2 -->
<html lang="en" xmlns="http://www.w3.org/1999/xhtml">
<!--  head  ver 4.44 -->
<head>
<meta content="IE=edge" http-equiv="X-UA-Compatible"/>
<script type="text/javascript">
        var bidiSupport = new Object();
        bidiSupport.bidiAlignRight = "right";
        bidiSupport.bidiAlignLeft = "left"; 
        bidiSupport.bidiDirAttr = ""; 
        bidiSupport.bidiImageRTL = null;
        bidiSupport.isRTL = false;
</script>
<title> 
	    Mt. Hood National Forest - Lost Lake Campground, Resort and Day Use Area
	</title><!--<link href='about:blank' rel="shortcut icon" /> -->
<link href="/Internet/scr/img/favicon.ico" rel="shortcut icon"/>
<link href="/FSE_WIDTheme/themes/./html/FSE_WIDTheme/reset.min.css" rel="styleSheet" type="text/css"/>
<link href="/FSE_WIDTheme/themes/./html/FSE_WIDTheme/WIDConsumption_S

In [16]:
def get_area_status(soup) :
    status = None
    try :
        for strong_tag in soup.find_all('strong'):
            if ('Area Status' in strong_tag.text):
                status = strong_tag.next_sibling.strip()
    except Exception as ex:
        print('couldnt get area status %s', ex)

    return status

# for extracting Latitude,Longitude, and Elevation
def get_location(soup, search_field):
    return_value = None
    try :
        field_div = soup.find_all('div', text=re.compile(search_field))
        value_div = [row.next_sibling.next_sibling for row in field_div]
        return_value  = value_div[0].text.strip()

    except Exception as ex:
        print('couldnt get location info %s', ex)

    return return_value

# returns a dataframe of basic campground info
def get_campground_info(soup):
    info = {}
    try :
        tables = soup.find_all('div', {'class': 'tablecolor'})
    except Exception as ex:
        print('couldnt get tables %s', tables)
        return pd.DataFrame()

    try :
        rows = tables[0].find_all('tr')
        for row in rows:
            if row.th.text == 'Reservations:':
                info['reservations'] = row.td.text.strip()
            if row.th.text == 'Open Season:':
                info['openseason'] = row.td.text.strip()
            if row.th.text == 'Current Conditions:':
                info['conditions'] = row.td.text.strip()
            if row.th.text == 'Water:':
                info['water'] = row.td.text.strip()
            if row.th.text == 'Restroom:':
                info['restroom'] = row.td.text.strip()
    except Exception as ex:
        print('couldnt get basic campground info, %s', ex)
        return {}

    return info

def get_soup(path): 
    cg_req = requests.get(path)
    if cg_req.status_code == 200:
        cg_soup = BeautifulSoup(cg_req.text)
        return cg_soup

# extract information from USFS webpages
# expects row with columns url and facilityname
# returns a df with scrape results
def scrape(url, facility_name):
    cg_info = {}
    cg_soup = get_soup(url)
    cg_info['FacilityStatus'] = get_area_status(cg_soup)
    cg_info['FacilityLatitude'] = get_location(cg_soup, 'Latitude')
    cg_info['FacilityLongitude'] = get_location(cg_soup, 'Longitude')
    cg_info['FacilityElevation'] = get_location(cg_soup, 'Elevation')
    # Consider making this a json attribute, not separate columns
    cg_info.update(get_campground_info(cg_soup))
    cg_info['FacilityName'] = facility_name
    return cg_info


In [17]:
url = "http://www.fs.usda.gov/recarea/mthood/recreation/camping-cabins/recarea/?recid=53228&actid=29"
res = scrape(url, "Lost Lake")

In [19]:
res

{'FacilityStatus': 'Closed',
 'FacilityLatitude': '45.50080',
 'FacilityLongitude': '-121.81641',
 'FacilityElevation': '3200',
 'conditions': 'CLOSED FOR THE SEASON\n\xa0\n**Lost Lake is currently limiting parking capacity within the campground and resort area to 50%.\xa0 Friday-Monday, a road barricade is staffed 4 miles below the entrance to the resort.\xa0 Once parking capacity has reached 50% vehicles will be turned around at that location.\xa0 This is the result of parking congestion along the roadways accessing the campground and resort.\xa0 The congestion has made it difficult for emergency personnel to access the site.**\xa0\xa0\nFor the 2020 season, site is operating at 50% capacity.\xa0 Capacity will be met early\xa0on\xa0weekends so strongly consider a mid-week visit instead.\xa0\nForest Road 13 is CLOSED at Lake Branch Bridge at mile post 10.8, just northwest of Lost Lake.',
 'reservations': 'Reservations can be made by visiting Recreation.gov. \xa0Reservations must be mad

In [57]:
get_location(cg_soup, 'Latitude')

'45.50080'

In [58]:
get_location(cg_soup, 'Longitude')

'-121.81641'

In [59]:
get_location(cg_soup, 'Elevation')

'3200'

In [60]:
reservations

'This site can be reserved by calling Toll Free 1-877-444-6777(International 518-885-3639 or TDD 877-833-6777 or online at http:// www.recreation.gov/campgroundDetails.do?'

In [61]:
get_campground_info(cg_soup)

{'conditions': 'CLOSED FOR THE SEASON\n \n**Lost Lake is currently limiting parking capacity within the campground and resort area to 50%.  Friday-Monday, a road barricade is staffed 4 miles below the entrance to the resort.  Once parking capacity has reached 50% vehicles will be turned around at that location.  This is the result of parking congestion along the roadways accessing the campground and resort.  The congestion has made it difficult for emergency personnel to access the site.**  \nFor the 2020 season, site is operating at 50% capacity.  Capacity will be met early on weekends so strongly consider a mid-week visit instead. \nForest Road 13 is CLOSED at Lake Branch Bridge at mile post 10.8, just northwest of Lost Lake.',
 'reservations': 'Reservations can be made by visiting Recreation.gov. \xa0Reservations must be made 4 days ahead of arrival and can be made up to 6 months in advance for the standard sites and 12 months in advance for the group sites.',
 'water': 'Drinking Wa