In [1]:
import pandas as pd
import requests 
import json
from config import gkey
import numpy as np
from sqlalchemy import create_engine

In [None]:
url = 'https://en.wikipedia.org/wiki/List_of_the_United_States_National_Park_System_official_units'

In [None]:
tables = pd.read_html(url)

In [None]:
raw_df = tables[1]
df = raw_df.drop_duplicates(subset='Name', keep="last")
print(len(df))

In [None]:
states_that_matter = ["Washington", "Oregon", "California", "Nevada", "Idaho", "Montana", "Wyoming", "Utah",
                      "Arizona", "New Mexico", "Colorado", "North Dakota", "South Dakota", "Nebraska", "Kansas", 
                      "Oklahoma", "Texas"]

In [None]:
#filtering out states east of the Mississippi
for name in df["Location"]:
    if name not in states_that_matter: 
        df.drop(df.loc[df['Location']==name].index, inplace=True)
print(len(df))


In [None]:
us_parks = df.reset_index(drop=True)
us_parks.count()

In [None]:
##DO NOT RUN NOW
raw_national_parks = us_parks.loc[us_parks['Name'].str.endswith('National Park')]
national_parks = raw_national_parks.reset_index(drop=True)

raw_national_monuments = us_parks.loc[us_parks['Name'].str.endswith('National Monument')]
national_monuments = raw_national_monuments.reset_index(drop=True)

raw_recreation_areas = us_parks.loc[us_parks['Name'].str.endswith('Recreation Area')]
recreation_areas = raw_recreation_areas.reset_index(drop=True)

In [None]:
us_parks["type"] = ""

In [None]:
us_parks.loc[us_parks.Name.str.endswith('National Park'), 'type'] = 'National Park'
us_parks.loc[us_parks.Name.str.endswith('National Monument'), 'type'] = 'National Monument'
us_parks.loc[us_parks.Name.str.endswith('Recreation Area'), 'type'] = 'Recreation Area'

In [None]:
## dropping everything that isnt a National Park, Monument or Recreation Area

i = us_parks[(us_parks.type == '')].index
us_parks.drop(i, inplace = True)

In [None]:
#had to remove this park because it will break the API call loop, island can't be driven to
us_parks.drop(us_parks.loc[us_parks['Name'] == "Channel Islands National Park"].index, inplace = True)
us_parks.drop(us_parks.loc[us_parks['Name'] == "Rainbow Bridge National Monument"].index, inplace = True)
us_parks.drop(us_parks.loc[us_parks['Name'] == "Lake Chelan National Recreation Area"].index, inplace = True)


In [None]:
us_parks.rename(columns = {"Name" : "name", "Location" : 'location'}, inplace = 'True')

In [None]:
us_parks.head()

In [None]:
lat_list = []
lng_list = []

for park in us_parks['name']:
    target_park = park
    base_url = "https://maps.googleapis.com/maps/api/place/findplacefromtext/json"
    params = {"input": target_park,"key": gkey, "inputtype" : "textquery", "fields" : "geometry"}
    response = requests.get(base_url, params=params)
    trip = response.json()
    
    lat = trip["candidates"][0]["geometry"]["location"]["lat"]
    lng = trip["candidates"][0]["geometry"]["location"]["lng"]
    
    lat_list.append(lat)
    lng_list.append(lng)
    
us_parks.insert(3, 'lat', lat_list)
us_parks.insert(4, 'lng', lng_list)   


In [None]:
data = pd.read_sql("SELECT * FROM Census_Data", conn)

In [None]:
#all working

##dont run on first iteration
#del us_parks['distance']
#del us_parks['duration']
target_city = "Los Angeles"

distances = []
durations = []
for park in us_parks['name']:
    #target_city = "Portland"
    target_park = park

    base_url = "https://maps.googleapis.com/maps/api/distancematrix/json?units=imperial"

    params = {"origins": target_city, "destinations": target_park, "key": gkey}

    response = requests.get(base_url, params=params)
    try:
        trip = response.json()

        distance = trip["rows"][0]["elements"][0]["distance"]["text"]
        duration = trip["rows"][0]["elements"][0]["duration"]["value"]

        hours = round((duration/3600),2)

        if "." in distance:
            new_distance = float(distance.strip('mi').replace(',', ''))
        else:
            new_distance = int(distance.strip('mi').replace(',', ''))

        distances.append(new_distance)
        durations.append(hours)
    except:
        distances.append(0)
        durations.append(0)


us_parks.insert(5, 'distance', distances)
us_parks.insert(6, 'duration', durations)

# add this shit to the database so I dont have to query Google every time 

In [None]:
us_parks.head()

In [None]:
rds_connection_string = "postgres:postgres@localhost:5432/Parks"
engine = create_engine(f'postgresql://{rds_connection_string}')

In [None]:
#us_parks.to_sql(target_city, engine)
us_parks.to_sql(name=target_city.lower(), con=engine, if_exists='replace', index=False)

In [None]:
close_index = int(input("What is considered close to you in hours?"))
near_index = int(input("What is considered near to you in hours?"))
far_index = int(input("What is considered far to you in hours?"))

far_national_parks = national_parks[national_parks['Duration'] <= far_index]

medium_national_parks = national_parks[(national_parks['Duration'] >= close_index) & (national_parks['Duration'] < far_index)]

close_national_parks = national_parks[national_parks['Duration'] < close_index]

print("Here is what you could reach within those criteria")
#print(len(medium_natinoal_parks))
#print(len(close_national_parks))
#print(len(far_natinoal_parks))
close_national_parks

In [None]:
medium_national_parks

In [None]:
far_national_parks

In [None]:
close_index = int(input("What is considered close to you in hours?"))
near_index = int(input("What is considered near to you in hours?"))
far_index = int(input("What is considered far to you in hours?"))

far_national_parks = national_parks[national_parks['Duration'] >= far_index]

medium_national_parks = national_parks[(national_parks['Duration'] >= close_index) & (national_parks['Duration'] < far_index)]

close_national_parks = national_parks[national_parks['Duration'] < near_index]

print("Here is what you could reach within those criteria")
#print(len(medium_natinoal_parks))
#print(len(close_national_parks))
#print(len(far_natinoal_parks))
close_national_parks

In [None]:
##for scalability, when user inputs a new city, it uses the Google API to find all distances. After a city has been queried onve, the dabase is accessed instead for speed purposes

#when you click a park, it takes you to a wiki page