### Bringing together all of the objects we have created thus far
<img src="images/battle_for_helms_deep_thecloverlord.deviantart.com.png" width=70% style="display: inline-block">
<br>
image source: thecloverlord.deviantart.com


In [1]:
# import the data types
from classes.ScraperData import ScraperData
from classes.RidbData import RidbData
from classes.RidbDataLive import RidbDataLive

# import the scrapers
from classes.ReservationsScraper import ReservationsScraper
from classes.UsfsWebScraper import UsfsWebScraper
from classes.UsfsWebScraperLocal import UsfsWebScraperLocal

# import the storage
from classes.MysqlStorage import MysqlStorage
from classes.CsvStorage import CsvStorage

import config
import pandas as pd


Setup our storage objects

In [2]:
mysql_store = MysqlStorage()
csv_store = CsvStorage()

Setup our camping plans!

In [3]:
destination_info = dict(Latitude=45.4977712, Longitude=-121.8211673, radius=15)
start_date = '06/01/2016'
stay_length = 2

Setup scraper inputs

In [4]:
usfs_urls = pd.read_csv('data/usfs_sites.csv')
reservation_urls = pd.read_csv('data/reservation_urls.csv')
reservation_urls = reservation_urls.assign(start_date=start_date, stay_length=stay_length)

Setup objects for our data sources - RIDB API, USFS Websites, recreation.gov reservations

In [5]:
ridb_data = RidbData('ridb_merge_lab', "camping", destination_info, mysql_store)
ridb_data_live = RidbDataLive('ridb_live_merge_lab', "camping", destination_info, mysql_store)
usfs_data = ScraperData('usfs_merge_lab',usfs_urls,mysql_store,UsfsWebScraper())
res_data = ScraperData('res_merge_lab',reservation_urls,mysql_store,ReservationsScraper())

starting display


In [6]:
ridb_data.extract()

In [7]:
ridb_data.df

Unnamed: 0,FacilityAdaAccess,FacilityDescription,FacilityDirections,FacilityEmail,FacilityID,FacilityLatitude,FacilityLongitude,FacilityMapURL,FacilityName,FacilityPhone,FacilityReservationURL,FacilityTypeDescription,FacilityUseFeeDescription,Keywords,LastUpdatedDate,LegacyFacilityID,OrgFacilityID,StayLimit
0,,<h2>Overview</h2>Wildwood Recreation Site i...,Wildwood Recreation Site is located 39 m...,,234075,45.356111,-121.986667,,WILDWOOD RECREATION SITE,503-622-3696,,Camping,,,2016-05-12,74082.0,AN374082,
1,,<p>This small rustic campground is locate...,"<p><u>From Prineville , Oregon</u>:</p><p>Tr...",,236929,44.483882,-120.336554,,Wildwood Campground,,,,,,2016-05-09,,38780,
2,,<h2>Overview</h2>Whispering Falls Campground...,"8.3 miles southeast of Detroit , Oregon: ...",,251470,44.687792,-122.009353,,WHISPERING FALLS CAMPGROUND,503-854-3366,,Camping,,,2016-05-12,127540.0,AN427540,
3,,<p>The Resort is situated on the shores ...,"<p><u>From Portland , OR</u> Lost Lake Re...",,235897,45.5008,-121.81641,,Lost Lake Resort,,,,,,2016-05-09,,53230,
4,,<p>Lost Lake Campground is adjacent to H...,"<p>From McKenzie Bridge , OR , follow Hig...",,244288,44.429277,-121.912475,,Lost Lake Campground,,,,,,2016-05-09,,13362,
5,,<h2>Overview</h2>Lost Lake Campground is c...,Getting There:<br /> Lost Lake Resort &a...,,251434,45.488889,-121.821944,,LOST LAKE RESORT AND CAMPGROUND,541-386-6366,,Camping,,,2015-10-15,125541.0,AN425541,
6,True,<h2>Overview</h2>Lake Harriet Campground si...,"From Estacada , Oregon , travel east on ...",,232856,45.073611,-121.956944,,LAKE HARRIET,503-630-6861,,Camping,,,2016-05-12,71641.0,AN371641,
7,,<p>On the southern edge of Laurance Lake...,Access via Forest Road 2840.,,235894,45.45748,-121.66343,,Kinnikinnick (Laurance Lake) Campground,,,,,,2016-05-09,,53214,
8,,"<p>Fifteen Mile Campground , is a small ...","<p>From <u>Dufur , OR</u> - take Forest ...",,235790,45.3505,-121.4729,,Fifteenmile Campground,,,,,,2016-05-09,,52812,
9,,<h2>Overview</h2>Clear Lake Campground is ...,"From Sandy , travel east on Highway 26 ...",,232849,45.181111,-121.696389,,CLEAR LAKE (OR),541-328-0909,,Camping,,,2016-05-12,71633.0,AN371633,


The order of this list will be used in the merge function to determine merge priority
i.e. one_data_list = [res_data, usfs_data,ridb_data] will result in
m1 = merge(usfs_data, res_data, how='left')
merge_final(ridb_data, how='left;

In [8]:
one_data_list = [res_data, usfs_data,ridb_data]
# live version
# one_data_list = [res_data, usfs_data,ridb_hiking_live, ridb_data_live]

Extract all the data in one line with this one weird trick!

In [9]:
list(map(lambda x:x.extract(),one_data_list))

getting reservation.html
setting up web browser
browsed to reservation.html
browsed to reservation.html
browsed to reservation.html
browsed to reservation.html
browsed to reservation.html
browsed to reservation.html
closing browser


[None, None, None]

In [10]:
list(map(lambda x:x.df.shape,one_data_list))

[(6, 4), (9, 10), (15, 18)]

Another one-liner! Isnt object oriented programming rad?

In [11]:
list(map(lambda x:x.name,one_data_list))

['res_merge_lab', 'usfs_merge_lab', 'ridb_merge_lab']

We are ready to merge!

In [12]:
list(map(lambda x:x.df.columns,one_data_list))

[Index(['FacilityLatitude', 'FacilityLongitude', 'FacilityName',
        'SitesAvailable'],
       dtype='object'),
 Index(['CurrentConditions', 'OpenSeason', 'Reservations', 'Restroom', 'Water',
        'FacilityElevation', 'FacilityLatitude', 'FacilityLongitude',
        'FacilityName', 'FacilityStatus'],
       dtype='object'),
 Index(['FacilityAdaAccess', 'FacilityDescription', 'FacilityDirections',
        'FacilityEmail', 'FacilityID', 'FacilityLatitude', 'FacilityLongitude',
        'FacilityMapURL', 'FacilityName', 'FacilityPhone',
        'FacilityReservationURL', 'FacilityTypeDescription',
        'FacilityUseFeeDescription', 'Keywords', 'LastUpdatedDate',
        'LegacyFacilityID', 'OrgFacilityID', 'StayLimit'],
       dtype='object')]

In [13]:
from classes.DistanceMergeData import DistanceMergeData

In [14]:
merge_data = DistanceMergeData("merge_res_usfs",one_data_list,mysql_store)

In [15]:
merge_data.extract()

In [16]:
merge_data.df.columns

Index(['FacilityAdaAccess', 'FacilityDescription', 'FacilityDirections',
       'FacilityEmail', 'FacilityID', 'FacilityLatitude', 'FacilityLongitude',
       'FacilityMapURL', 'FacilityName', 'FacilityPhone',
       'FacilityReservationURL', 'FacilityTypeDescription',
       'FacilityUseFeeDescription', 'Keywords', 'LastUpdatedDate',
       'LegacyFacilityID', 'OrgFacilityID', 'StayLimit', 'merge_index',
       'CurrentConditions', 'OpenSeason', 'Reservations', 'Restroom', 'Water',
       'FacilityElevation', 'FacilityStatus', 'SitesAvailable'],
      dtype='object')

In [17]:
merge_data.put()

In [18]:
merge_data.name

'merge_res_usfs'