## Notebook with code for week 5 capstone project

**Sections below:**
1. Python Code
2. Sample Data

### Python Code

In [211]:
# Import all needed packages here
import sys
import numpy as np
import pandas as pd
import requests
import json
import csv
from pandas.io.json import json_normalize
from bs4 import BeautifulSoup
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
#!conda install -c conda-forge geocoder --yes
#!conda install -c conda-forge geopy --yes
#!conda install -c conda-forge folium=0.5.0 --yes
import geocoder
from geopy.geocoders import Nominatim
import folium
#from data_wrangling import cleanse_data
from constants import CLIENT_ID, CLIENT_SECRET, CATID, VERSION, LIMIT
#import lxml
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_colwidth', 200)
print('Libraries imported.')

Libraries imported.


#### Get base coordinates for each island
These coordinates will be used for retrieving the Foursquare data.

In [193]:
base_loc = [ #Island      Town             Radius
            ('Hawaii',   'Naalehu, HI',    161000),
            ('Lanai',    'Lanai City, HI',  16100),
            ('Maui',     'Pukalani, HI',    40250),
            ('Molokai',  'Kualapuu, HI',     4830),
            ('Oahu',     'Laie, HI',        64400),
            ('Kauai',    'Wailua, HI',      40250)]
d_island = {}
for island, town, radius in base_loc:
    lat = lng = None
    while lng is None:
        gc = geocoder.arcgis(town)
        lat, lng = gc.latlng
    d_island[island] = {'latitude':lat, 'longitude': lng, 'radius': radius,
                        'venue_list': None, 'venue_sum_df': None, 'venue_tips_df': None, 
                        'venue_loc_df': None, 'map': None}
    print(town, lat, lng)
print('Done getting coordinates.')

Naalehu, HI 19.061420000000055 -155.58232999999998
Lanai City, HI 20.82794000000007 -156.91951999999998
Pukalani, HI 20.839210000000037 -156.34107999999998
Kualapuu, HI 21.151910000000044 -157.03659999999996
Laie, HI 21.64867000000004 -157.92323999999996
Wailua, HI 22.055890000000034 -159.37107999999998
Done getting coordinates.


#### Below Are All the Python Functions Used To Get and Wrangle the Data

In [6]:
def get_geo_data(lat, lng, radius):
    burl = 'https://api.foursquare.com/v2/venues/search?'
    buri = '&client_id={}&client_secret={}&v={}&categoryId={}&limit={}&radius={}&ll={},{}'
    url = burl + buri
    url = url.format(CLIENT_ID, CLIENT_SECRET, VERSION, CATID, LIMIT, radius, lat, lng)
    return requests.get(url).json()

In [7]:
def create_venues_list(json_data):
    l_dict_data = json_data['response']['venues'][:]
    l_venues = []
    for dven in l_dict_data:
        ltmp = [dven['name'].title(), 
                float(dven['location']['lat']), 
                float(dven['location']['lng']), 
                round(dven['location']['distance']/1609.344, 1),  # convert to miles
                dven['id']]
        l_venues.append(ltmp)
    l_venues.sort()
    return l_venues

In [9]:
def write_csv(filename, data):
    with open(filename, 'w') as fobj:
        wr = csv.writer(fobj, quoting=csv.QUOTE_ALL)
        for row in data:
            wr.writerow(row)

In [70]:
def read_csv(filename):
    l_data = []
    with open(filename, 'r') as f:
        reader = csv.reader(f, quoting=csv.QUOTE_ALL)
        for row in reader:
            l_data.append(row)
    return l_data

In [None]:
# get venue locations and store in CSV files for data cleansing
def get_venue_locations():
    l_island = d_island.keys()
    for island in l_island:
        rad = d_island[island]['radius']
        lat = d_island[island]['latitude']
        lng = d_island[island]['longitude']
        dat = get_geo_data(lat, lng, rad)
        ven = create_venues_list(dat)
        filename = "./csv_source/%s.csv" % island
        write_csv(filename, ven)
        print('Created CSV file: %s' % filename)

In [132]:
def get_map(loc):
    gc = geocoder.arcgis(loc)
    lat, lng = gc.latlng
    return folium.Map(location=[lat, lng], zoom_start=11)

In [83]:
def get_venue_summary_data(venue_id):
    url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'
    url = url.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)
    result = requests.get(url).json()
    ven = result['response']['venue']
    course = ven['name']
    course_likes = ven['likes']['count']
    course_tips = ven['stats']['tipCount']
    try: rating = result['response']['venue']['rating']
    except: rating = 'N/A'
    ven_dat = [course, course_likes, course_tips, rating]
    return ven_dat
    # ven_dat looks like this ['Kona Country Club', 1, 3, 'rating']

In [120]:
def get_venue_tips(course, venue_id):
    url = 'https://api.foursquare.com/v2/venues/{}/tips?client_id={}&client_secret={}&v={}&limit={}'
    url = url.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION, LIMIT)
    results = requests.get(url).json()
    tips = []
    l_tip = []  # results['response']['tips']['items']
    try: l_tip = results['response']['tips']['items']
    except: return ['Quota exceeded']
    for dtip in l_tip:
        tip = dtip['text']
        user = dtip.get('user', None)
        if not user:
            name = 'N/A'
        else:
            fname = user.get('firstName', 'N/A')
            lname = user.get('lastName', 'N/A')
            name = fname + ' ' + lname
        tips.append([course, tip, name])
    return tips

#### Note: Using Beautiful Soup to Scrape Web Data for Course Rankings

In [251]:
def get_soup(url):
    # go get the web page
    web_page = requests.get(url)
    # make some tasty soup
    soup = BeautifulSoup(web_page.content, 'html.parser')
    return soup

In [249]:
url = 'https://www.top100golfcourses.com/news-item/us-pacific-division-best-in-state-rankings-2018'
soup = get_soup(url)
crs_ranking = []
# find data in the table
crs_list = soup.findAll('table')[2]
html_rows = crs_list.find_all('tr')
# loop thru the rows, populate data
for tr in html_rows:
    td = tr.find_all('td')
    row = [tr.text.strip() for tr in td if tr.text.strip()]
    crs_ranking.append(row)

filename = './csv_source/scraped_ratings.csv'
write_csv(filename, crs_ranking)
print('Done gathering course ratings.')

Done gathering course ratings.


In [68]:
#cleanse_data()
def get_island_venues(island):
    filename = './csv_cleansed/%s.csv' % island
    return read_csv(filename)

In [255]:
def final_venue_locs():
    filename = './csv_cleansed/Final_venue_locs.csv'
    return read_csv(filename)

In [92]:
def get_all_course_summaries():
    for island in d_island:
        print('\n%s' % island)
        vens = get_island_venues(island)
        island_venues = []  # ['Golf Course', 'Likes', 'Tips', 'Rating']]
        for row in vens:
            ven_sum = get_venue_summary_data(row[4])
            island_venues.append(ven_sum)
            print(ven_sum)
        filename = './csv_source/%s_venue_summary.csv' % island
        write_csv(filename, island_venues)

In [172]:
def get_all_course_tips():
    for island in d_island:
        print('\n%s' % island)
        vens = get_island_venues(island)
        island_venues = []
        for row in vens:
            crs = row[0]
            vid = row[4]
            ven_tips = get_venue_tips(crs, vid)
            print('Working on course: %s' % crs)
            for tip in ven_tips:
                island_venues.append(tip)
                print(tip)
        filename = './csv_source/%s_venue_tips.csv' % island
        write_csv(filename, island_venues)

In [200]:
def create_df_venue_locs(list_data):
    df = pd.DataFrame(columns=['Golf Course', 'Latitude', 'Longitude'])
    for crs, lat, lng, dst, vid in list_data:
        df = df.append({'Golf Course': crs, 'Latitude': lat, 'Longitude': lng},
                        ignore_index=True)
    return df

In [186]:
def create_map(lat, lng, points, zoom=9):
    the_map = folium.Map(location=[lat, lng], zoom_start=zoom)
    # add points for golf courses
    for course, lat, lng, dst, vid in points:
        label = '{}'.format(course)
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker([float(lat), float(lng)], radius=3, popup=label, color='blue',
            fill=True, fill_color='#3186cc', fill_opacity=0.7, 
            parse_html=False).add_to(the_map)
    return the_map

In [221]:
def map_each_island():
    for island, town, radius in base_loc:
        lat = float(d_island[island]['latitude'])
        lng = float(d_island[island]['longitude'])
        points = get_island_venues(island)
        hi_map = create_map(lat, lng, points)
        d_island[island]['map'] = hi_map
        print('Mapped', island)

In [182]:
def map_island_chain():
    ven_list = []
    lat = float(d_island['Molokai']['latitude'])
    lng = float(d_island['Molokai']['longitude'])
    for island in d_island:
        ven_list += get_island_venues(island)
    return create_map(lat, lng, ven_list, zoom=8)

In [256]:
def create_final_map(lat, lng, points, zoom=8):
    the_map = folium.Map(location=[lat, lng], zoom_start=zoom)
    # add points for golf courses
    for course, lat, lng in points:
        label = '{}'.format(course)
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker([float(lat), float(lng)], radius=3, popup=label, color='blue',
            fill=True, fill_color='#3186cc', fill_opacity=0.7, 
            parse_html=False).add_to(the_map)
    return the_map

In [263]:
def map_venue_list():
    ven_list = final_venue_locs()
    lat = float(d_island['Molokai']['latitude'])
    lng = float(d_island['Molokai']['longitude'])
    return create_final_map(lat, lng, ven_list, zoom=7)

In [None]:
# This should be a one time run
get_venue_locations()

In [None]:
# This should be a one time run
get_all_course_summaries()

In [None]:
# This should be a one time run - DO NOT RUN IT TWICE OR WILL EXCEED FOURSQUARE QUOTA !!!
get_all_course_tips()

In [224]:
# One time run
map_each_island()

Mapped Hawaii
Mapped Lanai
Mapped Maui
Mapped Molokai
Mapped Oahu
Mapped Kauai


In [264]:
# One time run
island_chain_map = map_island_chain()
final_venue_map = map_venue_list()

In [201]:
# create and save DataFrames for venue locations
for island, *rest in base_loc:
    vens = get_island_venues(island)
    df = create_df_venue_locs(vens)
    d_island[island]['venue_loc_df'] = df

In [204]:
# create and save DataFrames for venue summary
for island, *rest in base_loc:
    filename = './csv_source/%s_venue_summary.csv' % island
    df = pd.DataFrame(columns=['Golf Course', 'Likes', 'Tips', 'Rating'])
    l_sum = read_csv(filename)
    for crs, like, tip, rate in l_sum:
        df = df.append({'Golf Course': crs, 'Likes': like, 'Tips': tip, 'Rating': rate},
                       ignore_index=True)
        d_island[island]['venue_sum_df'] = df

In [206]:
# create and save DataFrames for venue tips
for island, *rest in base_loc:
    filename = './csv_source/%s_venue_tips.csv' % island
    df = pd.DataFrame(columns=['Golf Course', 'Tip', 'Golfer'])
    l_sum = read_csv(filename)
    for crs, tip, golfer in l_sum:
        df = df.append({'Golf Course': crs, 'Tip': tip, 'Golfer': golfer},
                       ignore_index=True)
        d_island[island]['venue_tips_df'] = df

### Sample Data
#### Interactive Map of the Entire Hawaiian Island Chain

In [215]:
island_chain_map

In [265]:
final_venue_map

#### All the Golf Venues on the Island of Oahu

In [213]:
d_island['Oahu']['venue_loc_df']

Unnamed: 0,Golf Course,Latitude,Longitude
0,Ala Wai Golf Course,21.2774249751136,-157.818884683944
1,Aloha Golf Center,21.3218466042598,-157.876110554008
2,Arnold Palmer Course At Turtle Bay Golf,21.7018676366352,-157.995931649699
3,Barbers Point Golf Course,21.3247097331889,-158.038900735511
4,Bay View Golf Park,21.406152,-157.7892125
5,Coral Creek,21.3297087883719,-158.031702284379
6,Ewa Beach Golf Club,21.3161649238159,-157.992840164485
7,Ewa Villages Golf Course,21.3436726666464,-158.040516920183
8,Fort Shafter Golf,21.348564,-157.884539
9,Hawaii Country Club,21.4335117034745,-158.056783884795


#### Foursquare Data Containing Recent User Review Data for All Golf Courses on Oahu

In [212]:
d_island['Oahu']['venue_sum_df']

Unnamed: 0,Golf Course,Likes,Tips,Rating
0,Golf Course,Likes,Tips,Rating
1,Ala Wai Golf Course,9,7,7.5
2,Aloha Golf Center,1,3,
3,Arnold Palmer Course at Turtle Bay Golf,6,4,7.0
4,Barbers Point Golf Course,3,1,
5,Bay View Golf Park,7,3,6.9
6,Coral Creek,11,5,7.8
7,Ewa Beach Golf Club,8,6,6.5
8,Ewa Villages Golf Course,3,1,
9,Fort Shafter Golf,0,0,


#### Individual Written Reviews from Foursquare Users 
While the value of this data varies by review, it can also be useful for data cleansing.

In [214]:
d_island['Oahu']['venue_tips_df']

Unnamed: 0,Golf Course,Tip,Golfer
0,Ala Wai Golf Course,"The food is great, although a third world location for people deciding upon a place to eat, I highly recommend this place for good food and and pricing. The fried noodles is awesome!",Chris Allen Constantino
1,Ala Wai Golf Course,I have golfed here more than most travelers in Waikiki & the green fees are the lowest in the south part of Honolulu.Non members need a good week in advance for tee time. Cost around $150.00 a round.,Daniel Burton
2,Aloha Golf Center,The first place with HydroFlask in Hawaii and still the cheapest you'll find!!!,pitbull808 N/A
3,Aloha Golf Center,"Best place in Hawaii to get fit. The only place is Hi w/ digital loft/lie machines. Bettinardi, Adams, Mizuno, Ping,Miura fitting center!",pitbull808 N/A
4,Arnold Palmer Course At Turtle Bay Golf,Pretty neat Titleist has a rep here all the time. 36 holes of golf. Good to spend two days playing the two courses and not in one day to take in scenery. The coastline beckons you to play here,Daniel Burton
5,Arnold Palmer Course At Turtle Bay Golf,Bathroom on the 14th hole is very convenient when running in the blue trail.,Donna Genco
6,Barbers Point Golf Course,"Cheap for military, nice staff and a perfect course for a relaxed game with some friends.",Daniel Robinson
7,Bay View Golf Park,Twilight rate (Kama'aina) $20 to ride - greens in great condition! Best 3 hour round secret in Kaneohe!,John Campos
8,Bay View Golf Park,The greens are in excellent condition and fairways rapidly improving . a few tee boxes under improvement as well,John Campos
9,Coral Creek,"Every last Friday of the month, MaNeL808 plays here! They are a contemporary band which plays hits then and now! They are awesome! Esp. The vocalist! Very entertaining!",Zenz Raq


#### Initial List of Golf Courses to Support

In [254]:
filename = './csv_cleansed/Final_list.csv'
l_final = read_csv(filename)
df_courses = pd.DataFrame(columns=['Rank', 'Golf Course', 'Likes', 'Tips', 'Rating'])
for rank, crs, likes, tips, rating in l_final:
    df_courses = df_courses.append({'Rank': rank, 'Golf Course': crs, 'Likes': likes, 
                                    'Tips': tips, 'Rating': rating},
                                    ignore_index=True)

df_courses

Unnamed: 0,Rank,Golf Course,Likes,Tips,Rating
0,1,Nanea,0,0,
1,2,Kapalua (Plantation),9,1,7.3
2,3,Princeville (Prince),0,0,
3,4,Mauna Kea,9,6,7.3
4,5,Manele,0,0,
5,6,Kuki'o,2,0,
6,7,Hokuli'a,3,1,
7,8,Kohanaiki,0,0,
8,9,Kukui'ula,6,5,7.9
9,10,Poipu Bay,19,9,8.3


#### Individual Island Maps

In [225]:
d_island['Hawaii']['map']

In [226]:
d_island['Lanai']['map']

In [227]:
d_island['Maui']['map']


In [228]:
d_island['Molokai']['map']

In [229]:
d_island['Oahu']['map']

In [230]:
d_island['Kauai']['map']