### Bringing together all of the objects we have created thus far
<img src="images/battle_for_helms_deep_thecloverlord.deviantart.com.png" width=70% style="display: inline-block">
<br>
image source: thecloverlord.deviantart.com


In [1]:
# import the data types
from classes.ScraperData import ScraperData
from classes.RidbData import RidbData
from classes.RidbDataLive import RidbDataLive

# import the scrapers
from classes.ReservationsScraper import ReservationsScraper
from classes.UsfsWebScraper import UsfsWebScraper
from classes.UsfsWebScraperLocal import UsfsWebScraperLocal

# import the storage
from classes.MysqlStorage import MysqlStorage
from classes.CsvStorage import CsvStorage

import config
import pandas as pd


Setup our storage objects

In [2]:
mysql_store = MysqlStorage()
csv_store = CsvStorage()

Setup our camping plans!

In [3]:
destination_info = dict(Latitude=45.4977712, Longitude=-121.8211673, radius=15)
start_date = '06/01/2016'
stay_length = 2

Setup scraper inputs

In [4]:
usfs_urls = pd.read_csv('data/usfs_sites.csv')
reservation_urls = pd.read_csv('data/reservation_urls.csv')
reservation_urls = reservation_urls.assign(start_date=start_date, stay_length=stay_length)

Setup objects for our data sources - RIDB API, USFS Websites, recreation.gov reservations

In [5]:
ridb_data = RidbData('ridb_merge_lab', "camping", destination_info, mysql_store)
ridb_data_live = RidbDataLive('ridb_live_merge_lab', "camping", destination_info, mysql_store)
usfs_data = ScraperData('usfs_merge_lab',usfs_urls,mysql_store,UsfsWebScraper())
res_data = ScraperData('res_merge_lab',reservation_urls,mysql_store,ReservationsScraper())

starting display


The order of this list will be used in the merge function to determine merge priority
i.e. one_data_list = [res_data, usfs_data,ridb_data] will result in
m1 = merge(usfs_data, res_data, how='left')
merge_final(ridb_data, how='left;

In [6]:
one_data_list = [res_data, usfs_data,ridb_data]
# live version
# one_data_list = [res_data, usfs_data,ridb_hiking_live, ridb_data_live]

Extract all the data in one line with this one weird trick!

In [7]:
list(map(lambda x:x.extract(),one_data_list))

getting reservation.html
setting up web browser
browsed to reservation.html
browsed to reservation.html
browsed to reservation.html
browsed to reservation.html
browsed to reservation.html
browsed to reservation.html
closing browser


[None, None, None]

In [8]:
list(map(lambda x:x.df.shape,one_data_list))

[(6, 4), (9, 10), (16, 18)]

Another one-liner! Isnt object oriented programming rad?

In [9]:
list(map(lambda x:x.name,one_data_list))

['res_merge_lab', 'usfs_merge_lab', 'ridb_merge_lab']

We are ready to merge!

In [10]:
list(map(lambda x:x.df.columns,one_data_list))

[Index(['FacilityLatitude', 'FacilityLongitude', 'FacilityName',
        'SitesAvailable'],
       dtype='object'),
 Index(['CurrentConditions', 'OpenSeason', 'Reservations', 'Restroom', 'Water',
        'FacilityElevation', 'FacilityLatitude', 'FacilityLongitude',
        'FacilityName', 'FacilityStatus'],
       dtype='object'),
 Index(['FacilityAdaAccess', 'FacilityDescription', 'FacilityDirections',
        'FacilityEmail', 'FacilityID', 'FacilityLatitude', 'FacilityLongitude',
        'FacilityMapURL', 'FacilityName', 'FacilityPhone',
        'FacilityReservationURL', 'FacilityTypeDescription',
        'FacilityUseFeeDescription', 'Keywords', 'LastUpdatedDate',
        'LegacyFacilityID', 'OrgFacilityID', 'StayLimit'],
       dtype='object')]

In [11]:
from classes.DistanceMergeData import DistanceMergeData

In [12]:
%autoreload


ERROR: Line magic function `%autoreload` not found.


In [25]:
one_data_list[0:3]

[<classes.ScraperData.ScraperData at 0x7f890d019860>,
 <classes.ScraperData.ScraperData at 0x7f890d0194a8>,
 <classes.RidbData.RidbData at 0x7f890d019208>]

In [26]:
merge_data = DistanceMergeData("merge_newcode",one_data_list[0:2],mysql_store)

In [27]:
merge_data.extract()

In [28]:
merge_data.df.shape

(9, 12)

In [17]:
merge_data.df.columns

Index(['CurrentConditions', 'FacilityAdaAccess', 'FacilityDescription',
       'FacilityDirections', 'FacilityElevation', 'FacilityEmail',
       'FacilityID', 'FacilityLatitude', 'FacilityLongitude', 'FacilityMapURL',
       'FacilityName', 'FacilityPhone', 'FacilityReservationURL',
       'FacilityStatus', 'FacilityTypeDescription',
       'FacilityUseFeeDescription', 'Keywords', 'LastUpdatedDate',
       'LegacyFacilityID', 'OpenSeason', 'OrgFacilityID', 'Reservations',
       'Restroom', 'SitesAvailable', 'StayLimit', 'Water', 'merge_index'],
      dtype='object')

In [None]:
merge_data.put()

In [None]:
merge_data.name

In [None]:
from classes.Pipeline import Pipeline

In [None]:
ridb_data.df.shape

In [None]:
pipe = Pipeline('pipeline', [res_data,usfs_data, ridb_data], DistanceMergeData, mysql_store)

In [None]:
pipe.extract()

In [None]:
pipe.df.columns

In [None]:
%autoreload
%aimport classes
%aimport

In [None]:
from classes import BokehPlot
from bokeh.io import output_notebook
from bokeh.plotting import show

In [None]:
output_notebook()

In [None]:
cols_to_display=['FacilityName','SitesAvailable', 'Water','Restroom']

In [None]:
sites_with_availability = pipe.df.dropna(subset=['SitesAvailable'])

In [None]:
p = BokehPlot.create_plot(sites_with_availability, cols_to_display)

In [None]:
show(p)