In [2]:
import requests	
from bs4 import BeautifulSoup
import re
import pandas as pd
import os
from sqlalchemy import create_engine
import config
from unidecode import unidecode
from camping import camping_data
from camping import storage

In [7]:
campgrounds = pd.read_csv('../data/usfs_sites.csv')

In [13]:
store = storage.Storage()

In [27]:

def extract_cg_info(campgrounds) :
    df_cg = pd.DataFrame()
    for index, campground in campgrounds.iterrows():
        latitude = longitude = elevation = site_id = numsites = conditions = ""
        fees = openseason = usage = water = restroom = status = ""
        site_url = "http://" + config.LAMP_IP + "/" + campground['url']
        print(campground['facilityname'] + '\t' + site_url)
        try :
            cg_req = requests.get(site_url)
            cg_soup = BeautifulSoup(cg_req.text, 'lxml')
        except Exception as ex :
            print('couldnt get site_url ' + site_url)
            print(ex)
            continue

        # get area status if available
        try :
            for strong_tag in cg_soup.find_all('strong'):
                if ('Area Status' in unidecode(strong_tag.text)):
                    status = unidecode(strong_tag.next_sibling).strip()
        except Exception:
            print('couldnt get area status')


        print("getting location")
        # get lat, long, altitude
        try :
            lat = cg_soup.find_all('div', text=re.compile('Latitude'))
            div = [row.next_sibling.next_sibling for row in lat]
            latitude  = div[0].text.strip()

            lng = cg_soup.find_all('div', text=re.compile('Longitude'))
            div = [row.next_sibling.next_sibling for row in lng]
            longitude  = div[0].text.strip()

            el = cg_soup.find_all('div', text=re.compile('Elevation'))
            div = [row.next_sibling.next_sibling for row in el]
            elevation  = div[0].text.strip()

            # get site usage, type, num sites, site info

        except Exception:
            print('couldnt get location info')

        # table[0] is the basic info table

        try :
            tables = cg_soup.find_all('div', {'class': 'tablecolor'})
        except Exception:
            print('couldnt get tables')

        try :
            print(len(tables))
            rows = tables[0].find_all('tr')
            for row in rows:
                
                if row.th.text == 'Reservations:':
                    reservations = unidecode(row.td.text).strip()
                if row.th.text == 'Open Season:':
                    openseason = unidecode(row.td.text).strip()
                if row.th.text == 'Current Conditions:':
                    print(unidecode(row.td.text))
                    conditions = unidecode(row.td.text).strip()
                if row.th.text == 'Water:':
                    water = unidecode(row.td.text).strip()
                if row.th.text == 'Restroom:':
                    restroom = unidecode(row.td.text).strip()
        except Exception as ex:
            print('couldnt get basic campground info')
            print(ex)

        # table 1 is the campground info
        try:
            rows = tables[1].find_all('tr')

            for row in rows:
                if row.td.text == 'Reservation Info':
                    reserveinfo = unidecode(row.td.next_sibling.text).strip()
                if row.td.text == 'No. of Sites':
                    numsites = unidecode(row.td.next_sibling.text).strip()
        except Exception:
            print('couldnt get campsite availability info')
        
        # assemble into DataFrame
        df_cg = df_cg.append(pd.DataFrame ({
            'latitude': [latitude],
            'longitude': [longitude],
            'elevation': [elevation],
            'facilityname' : [campground['facilityname']],
            'facilityurl' : [site_url],
            'status' : [status],
            'water' : [water],
            'restroom' : [restroom],
            'reservations': [reservations],
            'conditions': [conditions],
            'numsites': [numsites]
            }))
    return df_cg

In [28]:
test = extract_cg_info(campgrounds)

whispering falls campground	http://172.17.0.3/Willamette National Forest - Whispering Falls Campground.html
getting location
5
lost lake campground	http://172.17.0.3/Mt. Hood National Forest - Lost Lake Campground.html
getting location
10
OPEN 
lake harriet campground	http://172.17.0.3/Mt. Hood National Forest - Lake Harriet Campground_Day Use.html
getting location
2
kinnikinnick (laurance lake) campground	http://172.17.0.3/Mt. Hood National Forest - Kinnikinnick (Laurance Lake) Campground.html
getting location
9
OPEN 
fifteenmile campground	http://172.17.0.3/Mt. Hood National Forest - Fifteenmile Campground.html
getting location
5
No services provided.A  Pack in/Pack out. 
clear lake campground	http://172.17.0.3/Mt. Hood National Forest - Clear Lake Campground.html
getting location
8
CLOSED 
camp creek campground	http://172.17.0.3/Mt. Hood National Forest - Camp Creek Campground.html
getting location
5
badger lake campground	http://172.17.0.3/Mt. Hood National Forest - Badger Lake Cam

In [29]:
test.shape


(9, 11)

In [30]:
test

Unnamed: 0,conditions,elevation,facilityname,facilityurl,latitude,longitude,numsites,reservations,restroom,status,water
0,,2000 feet,whispering falls campground,http://172.17.0.3/Willamette National Forest -...,44.68829400202167,-122.0103430470687,16,Reserve online at www.recreation.gov or phone ...,Vault Toilets (3),,Yes
0,OPEN,3200,lost lake campground,http://172.17.0.3/Mt. Hood National Forest - L...,45.5008,-121.81641,,This site can be reserved by calling Toll Free...,Vault Toilet (18),Open,Potable Water
0,,2000,lake harriet campground,http://172.17.0.3/Mt. Hood National Forest - L...,45.073611,-121.956944,13 single site(s),This site can be reserved by calling Toll Free...,Vault Toilet,Open,Potable Water
0,OPEN,3000,kinnikinnick (laurance lake) campground,http://172.17.0.3/Mt. Hood National Forest - K...,45.45748,-121.66343,20 walk-in access only campsites.,This site can be reserved by calling Toll Free...,Vault Toilet (2),Open,No
0,No services provided.A Pack in/Pack out.,4000,fifteenmile campground,http://172.17.0.3/Mt. Hood National Forest - F...,45.3505,-121.4729,3 single site(s),No reservations,Vault Toilet (1),Open,No
0,CLOSED,3600,clear lake campground,http://172.17.0.3/Mt. Hood National Forest - C...,45.17945,-121.69675,,This site can be reserved by calling Toll Free...,Vault Toilet,Closed,Potable Water
0,,2200,camp creek campground,http://172.17.0.3/Mt. Hood National Forest - C...,45.305236,-121.867275,,This site can be reserved by calling Toll Free...,Vault Toilet,Closed,Potable Water
0,Campground not accessible due to snow,4400,badger lake campground,http://172.17.0.3/Mt. Hood National Forest - B...,45.30496,-121.55537,4 single site(s),No reservations,Vault Toilet (1),Open,No
0,Eagle Creek campgroundisA reservable starting ...,200 - 200,eagle creek campground,http://172.17.0.3/Columbia River Gorge Nationa...,45.642244,-121.925284,17,Eagle Creek Campground sites can be reserved u...,Flush toilets,Open,Potable water available


In [8]:
test

Unnamed: 0,conditions,elevation,facilityname,facilityurl,latitude,longitude,numsites,reservations,restroom,status,water
0,,2000 feet,whispering falls campground,http://172.17.0.2/Willamette National Forest -...,44.688294,-122.010343,16,Reserve online at www.recreation.gov or phone ...,Vault Toilets (3),,Yes
1,OPEN,3200,lost lake campground,http://172.17.0.2/Mt. Hood National Forest - L...,45.5008,-121.81641,,This site can be reserved by calling Toll Free...,Vault Toilet (18),Open,Potable Water
2,,2000,lake harriet campground,http://172.17.0.2/Mt. Hood National Forest - L...,45.073611,-121.956944,13 single site(s),This site can be reserved by calling Toll Free...,Vault Toilet,Open,Potable Water
3,OPEN,3000,kinnikinnick (laurance lake) campground,http://172.17.0.2/Mt. Hood National Forest - K...,45.45748,-121.66343,20 walk-in access only campsites.,This site can be reserved by calling Toll Free...,Vault Toilet (2),Open,No
4,No services provided.A Pack in/Pack out.,4000,fifteenmile campground,http://172.17.0.2/Mt. Hood National Forest - F...,45.3505,-121.4729,3 single site(s),No reservations,Vault Toilet (1),Open,No
5,CLOSED,3600,clear lake campground,http://172.17.0.2/Mt. Hood National Forest - C...,45.17945,-121.69675,,This site can be reserved by calling Toll Free...,Vault Toilet,Closed,Potable Water
6,,2200,camp creek campground,http://172.17.0.2/Mt. Hood National Forest - C...,45.305236,-121.867275,,This site can be reserved by calling Toll Free...,Vault Toilet,Closed,Potable Water
7,Campground not accessible due to snow,4400,badger lake campground,http://172.17.0.2/Mt. Hood National Forest - B...,45.30496,-121.55537,4 single site(s),No reservations,Vault Toilet (1),Open,No
8,Eagle Creek campgroundisA reservable starting ...,200 - 200,eagle creek campground,http://172.17.0.2/Columbia River Gorge Nationa...,45.642244,-121.925284,17,Eagle Creek Campground sites can be reserved u...,Flush toilets,Open,Potable water available
9,,2000 feet,whispering falls campground,http://172.17.0.2/Willamette National Forest -...,44.688294,-122.010343,16,Reserve online at www.recreation.gov or phone ...,Vault Toilets (3),,Yes
