### Objects for storage and data
#### How can the steps from Lab 1 be consolidated ?

In [1]:
from classes.Storage import Storage
from classes.MysqlStorage import MysqlStorage
from classes.CsvStorage import CsvStorage
import config
import inspect
import requests
import pandas as pd

### Encapsulating data storage processes

What are the proceses typically used around storing data?

In [2]:
print(inspect.getsource(Storage))

class Storage :
	
	def __init__(self, name) :
		self.name = name

	# store passed dataframe object as table <name>
	def put(self, df, name) :
		assert False, "Storage.put(): needs implementation"

	# return requested query from storage in a dataframe
	def get(self, query_string) :
		assert False, 'Storage.get(): needs implementation'



In [3]:
print(inspect.getsource(CsvStorage))

class CsvStorage(Storage) :
	
	# setup database connection to mysql unless otherwise specified
	def __init__(self) :
		super().__init__('csvstorage')


	# store passed dataframe object as table <name>
	# NOTE - put overwrites by default
	def put(self, df, name) :
		try:
			df.to_csv(name + ".csv", Index=False)
		except Exception as ex:
			print("Storage.put failed")
			print(ex)	

	# return requested query from storage in a dataframe
	def get(self, query_string) :
		try:
			df = pd.read_csv(query_string + ".csv")
		except Exception as ex:
			print("Storage.get failed")
			print(ex)
			return pd.DataFrame()
		return df



In [4]:
print(inspect.getsource(MysqlStorage))

class MysqlStorage(Storage) :
	
	# setup database connection to mysql unless otherwise specified
	def __init__(self, connPre='mysql+pymysql://') :
		connectStr = connPre + config.DB_USER + ":" + config.DB_PASS + "@" + config.DB_HOST +  "/" + config.DB_NAME
		#print(connectStr)
		try:
			self.db_engine = create_engine(connectStr)
		except Exception as ex:
			print("Storage object failed to initialize")
			print(ex)
			self.db_engine = -1

	# store passed dataframe object as table <name>
	# NOTE - put overwrites by default
	def put(self, df, name) :
		try:
			df.to_sql(name, self.db_engine, if_exists='replace')
		except Exception as ex:
			print("Storage.put failed")
			print(ex)	

	# return requested query from storage in a dataframe
	def get(self, query_string) :
		try:
			df = pd.read_sql('select * from ' + query_string, self.db_engine, index_col='index')
		except Exception as ex:
			print("Storage.get failed")
			print(ex)
			return pd.DataFrame()
		return df



Notice that both Storage implementations:
    1. Overwrite existing data
    2. get/put an entire tables/dataframes

In [5]:
store_mysql = MysqlStorage()

In [6]:
sql_test = store_mysql.get('test')

In [7]:
sql_test.head()

Unnamed: 0_level_0,FacilityAdaAccess,FacilityDescription,FacilityDirections,FacilityEmail,FacilityID,FacilityLatitude,FacilityLongitude,FacilityMapURL,FacilityName,FacilityPhone,FacilityReservationURL,FacilityTypeDescription,FacilityUseFeeDescription,Keywords,LastUpdatedDate,LegacyFacilityID,OrgFacilityID,StayLimit
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
0,,<h2>Overview</h2>Wildwood Recreation Site i...,Wildwood Recreation Site is located 39 m...,,234075,45.356111,-121.986667,,WILDWOOD RECREATION SITE,503-622-3696,,Camping,,,2016-05-12,74082.0,AN374082,
1,,<p>This small rustic campground is locate...,"<p><u>From Prineville , Oregon</u>:</p><p>Tr...",,236929,44.483882,-120.336554,,Wildwood Campground,,,,,,2016-05-09,,38780,
2,,<h2>Overview</h2>Whispering Falls Campground...,"8.3 miles southeast of Detroit , Oregon: ...",,251470,44.687792,-122.009353,,WHISPERING FALLS CAMPGROUND,503-854-3366,,Camping,,,2016-05-12,127540.0,AN427540,
3,,<p>The Resort is situated on the shores ...,"<p><u>From Portland , OR</u> Lost Lake Re...",,235897,45.5008,-121.81641,,Lost Lake Resort,,,,,,2016-05-09,,53230,
4,,<p>Lost Lake Campground is adjacent to H...,"<p>From McKenzie Bridge , OR , follow Hig...",,244288,44.429277,-121.912475,,Lost Lake Campground,,,,,,2016-05-09,,13362,


In [8]:
store_csv = CsvStorage()

In [9]:
store_csv.name

'csvstorage'

In [10]:
csv_test = store_csv.get('test')
csv_test.head()

Unnamed: 0,FacilityAdaAccess,FacilityDescription,FacilityDirections,FacilityEmail,FacilityID,FacilityLatitude,FacilityLongitude,FacilityMapURL,FacilityName,FacilityPhone,FacilityReservationURL,FacilityTypeDescription,FacilityUseFeeDescription,Keywords,LastUpdatedDate,LegacyFacilityID,OrgFacilityID,StayLimit
0,,<h2>Overview</h2>Wildwood Recreation Site i...,Wildwood Recreation Site is located 39 m...,,234075,45.356111,-121.986667,,WILDWOOD RECREATION SITE,503-622-3696,,Camping,,,2016-05-12,74082.0,AN374082,
1,,<p>This small rustic campground is locate...,"<p><u>From Prineville , Oregon</u>:</p><p>Tr...",,236929,44.483882,-120.336554,,Wildwood Campground,,,,,,2016-05-09,,38780.0,
2,,<h2>Overview</h2>Whispering Falls Campground...,"8.3 miles southeast of Detroit , Oregon: ...",,251470,44.687792,-122.009353,,WHISPERING FALLS CAMPGROUND,503-854-3366,,Camping,,,2016-05-12,127540.0,AN427540,
3,,<p>The Resort is situated on the shores ...,"<p><u>From Portland , OR</u> Lost Lake Re...",,235897,45.5008,-121.81641,,Lost Lake Resort,,,,,,2016-05-09,,53230.0,
4,,<p>Lost Lake Campground is adjacent to H...,"<p>From McKenzie Bridge , OR , follow Hig...",,244288,44.429277,-121.912475,,Lost Lake Campground,,,,,,2016-05-09,,13362.0,


### Encapsulating the data extraction process

In [11]:
from classes.Data import Data
from classes.RidbData import RidbData
from classes.RidbDataLive import RidbDataLive

In [12]:
print(inspect.getsource(Data))

class Data():

	def __init__(self, name):
		self.name = name
		self.df = pd.DataFrame()

	def extract(self):
		assert False, "Data.extract must be defined"



Recall from Lab 1:

In [13]:
ridb_facilities_url = "https://ridb.recreation.gov/api/v1/facilities"
camping_params = params=dict(activity_id=9, apiKey = config.RIDB_API_KEY,\
                             latitude=45.4977712, longitude=-121.8211673, radius=15)
response = requests.get(ridb_facilities_url,camping_params)

In [14]:
print(inspect.getsource(RidbData))

class RidbData(Data):

	# RIDB API specific information
	activity_dict = dict(camping=9, hiking=14)
	ridb_endpoint = 'https://ridb.recreation.gov/api/v1'
	endpoint = ridb_endpoint + "/facilities"
	url_params = dict(apiKey = config.RIDB_API_KEY)

	# storage param is expected to be of type classes.Storage
	def __init__(self, name, activity, dict_params, storage):
		self.df = pd.DataFrame()
		try:
			self.activity_id = self.activity_dict[activity]
		except Exception as ex:
			print("RidbData.__init__(): cannot find activity: " + activity)
			print("Activity options are " + self.activity_dict.keys)
			print(ex)
			return

		# update the URL params to include the RIDB Key, activity_id, and input dict_params
		self.url_params.update(dict(activity_id = self.activity_id))
		self.url_params.update(dict_params)
		self.storage = storage
		self.name = name

	def clean(self) :
		self.df = self.df.replace('', np.nan)
		self.df = self.df.dropna(subset=['FacilityLatitude','FacilityLongitude'])
		if 'G

In [15]:
print(inspect.getsource(RidbDataLive))

class RidbDataLive(RidbData) :

	def extract(self):
		try:
			response = requests.get(url=self.endpoint,params=self.url_params)

		except Exception as ex:
			print("RidbDataLive.extract(): unable to get request " + self.endpoint)
			print("with params: " + str(self.url_params))
			print(ex)
			self.df = pd.DataFrame()
			return

		try :
			data = json.loads(response.text)
			self.df = json_normalize(data['RECDATA'])


		except Exception as ex:
			print("RidbData.extract(): unable to read response")
			print(ex)

		self.clean()



In [16]:
dest_params = dict(latitude=45.4977712, longitude=-121.8211673, radius=15)
ridb_camping = RidbData("ridb_camping", "camping", dest_params, store_mysql)
ridb_camping_live = RidbDataLive("ridb_camping", "camping", dest_params, store_mysql)

In [17]:
ridb_camping.df

In [18]:
ridb_camping.extract()
ridb_camping_live.extract()

In [19]:
ridb_camping.df.head()

Unnamed: 0,FacilityAdaAccess,FacilityDescription,FacilityDirections,FacilityEmail,FacilityID,FacilityLatitude,FacilityLongitude,FacilityMapURL,FacilityName,FacilityPhone,FacilityReservationURL,FacilityTypeDescription,FacilityUseFeeDescription,Keywords,LastUpdatedDate,LegacyFacilityID,OrgFacilityID,StayLimit
0,,<h2>Overview</h2>Wildwood Recreation Site i...,Wildwood Recreation Site is located 39 m...,,234075,45.356111,-121.986667,,WILDWOOD RECREATION SITE,503-622-3696,,Camping,,,2016-05-12,74082.0,AN374082,
1,,<p>This small rustic campground is locate...,"<p><u>From Prineville , Oregon</u>:</p><p>Tr...",,236929,44.483882,-120.336554,,Wildwood Campground,,,,,,2016-05-09,,38780,
2,,<h2>Overview</h2>Whispering Falls Campground...,"8.3 miles southeast of Detroit , Oregon: ...",,251470,44.687792,-122.009353,,WHISPERING FALLS CAMPGROUND,503-854-3366,,Camping,,,2016-05-12,127540.0,AN427540,
3,,<p>The Resort is situated on the shores ...,"<p><u>From Portland , OR</u> Lost Lake Re...",,235897,45.5008,-121.81641,,Lost Lake Resort,,,,,,2016-05-09,,53230,
4,,<p>Lost Lake Campground is adjacent to H...,"<p>From McKenzie Bridge , OR , follow Hig...",,244288,44.429277,-121.912475,,Lost Lake Campground,,,,,,2016-05-09,,13362,


In [20]:
ridb_camping_live.df.head()

Unnamed: 0,FacilityAdaAccess,FacilityDescription,FacilityDirections,FacilityEmail,FacilityID,FacilityLatitude,FacilityLongitude,FacilityMapURL,FacilityName,FacilityPhone,FacilityReservationURL,FacilityTypeDescription,FacilityUseFeeDescription,Keywords,LastUpdatedDate,LegacyFacilityID,OrgFacilityID,StayLimit
0,,<h2>Overview</h2>Riley Campground is an equest...,"From Sandy, travel Highway 26 for 18 miles to ...",,232834,45.381389,-121.859444,,RILEY HORSE CAMPGROUND,541-328-0909,,Camping,,,2016-05-26,71617.0,AN371617,
1,,<h2>Overview</h2>Still Creek Campground lies i...,"From Portland, travel east on Highway 26 to Go...",,232835,45.295833,-121.735556,,STILL CREEK,541-328-0909,,Camping,,,2016-05-26,71618.0,AN371618,
2,,<h2>Overview</h2>Tollgate Campground is one of...,"From Portland, travel southeast on Highway 26 ...",,232836,45.321944,-121.905278,,TOLLGATE,541-328-0909,,Camping,,,2016-05-26,71619.0,AN371619,
3,,<h2>Overview</h2>The Camp Creek Campground sit...,"From Portland, travel east on Highway 26 for a...",,232837,45.303056,-121.864722,,CAMP CREEK,541-328-0909,,Camping,,,2016-05-26,71620.0,AN371620,
4,True,<h2>Overview</h2>Lost Creek Campground is a fu...,"From Portland, follow Highway 26 for 18 miles ...",,232838,45.381944,-121.834444,,LOST CREEK,541-328-0909,,Camping,,,2016-05-26,71621.0,AN371621,


You can use the RidbData object to retrieve other types of facility info, like hiking! ()

In [21]:
ridb_hiking_live = RidbDataLive("ridb_hiking", "hiking", dest_params, store_mysql)

In [22]:
ridb_hiking_live.extract()

In [23]:
ridb_hiking_live.df.head()

Unnamed: 0,FacilityAdaAccess,FacilityDescription,FacilityDirections,FacilityEmail,FacilityID,FacilityLatitude,FacilityLongitude,FacilityMapURL,FacilityName,FacilityPhone,FacilityReservationURL,FacilityTypeDescription,FacilityUseFeeDescription,Keywords,LastUpdatedDate,LegacyFacilityID,OrgFacilityID,StayLimit
0,,<h2>Overview</h2>Riley Campground is an equest...,"From Sandy, travel Highway 26 for 18 miles to ...",,232834,45.381389,-121.859444,,RILEY HORSE CAMPGROUND,541-328-0909,,Camping,,,2016-05-26,71617.0,AN371617,
1,,<h2>Overview</h2>Still Creek Campground lies i...,"From Portland, travel east on Highway 26 to Go...",,232835,45.295833,-121.735556,,STILL CREEK,541-328-0909,,Camping,,,2016-05-26,71618.0,AN371618,
2,,<h2>Overview</h2>Tollgate Campground is one of...,"From Portland, travel southeast on Highway 26 ...",,232836,45.321944,-121.905278,,TOLLGATE,541-328-0909,,Camping,,,2016-05-26,71619.0,AN371619,
3,,<h2>Overview</h2>The Camp Creek Campground sit...,"From Portland, travel east on Highway 26 for a...",,232837,45.303056,-121.864722,,CAMP CREEK,541-328-0909,,Camping,,,2016-05-26,71620.0,AN371620,
4,True,<h2>Overview</h2>Lost Creek Campground is a fu...,"From Portland, follow Highway 26 for 18 miles ...",,232838,45.381944,-121.834444,,LOST CREEK,541-328-0909,,Camping,,,2016-05-26,71621.0,AN371621,


In [24]:
ridb_camping.put()

In [25]:
ridb_hiking_live.put()

In [26]:
ridb_camping.name

'ridb_camping'