# Capstone Project - The Battle of Neighborhoods (Week 2) - Visit Hawaii

<div class="alert alert-block alert-info" style="margin-top: 20px">
    
**Applied Data Science Capstone by IBM/Coursera**

The purpose of this Capstone assignment is to showcase students skills and the tools using location data to explore a geographical location. Students will have the opportunity to be as *creative* as we want and come up with an idea to leverage the *Foursquare* location data to explore or compare neighborhoods or cities of our choice or to come up with a problem that we can use the *Foursquare* location data to solve.

</div>

In [None]:
# global setting for verbose
verbose = True

In [None]:
# The code was removed by Watson Studio for sharing.
google_api_key = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" # TODO: REMOVE THE KEY, ABCDEFGHIJKLMNOPQRSTUVWXYZ

In [None]:
# The code was removed by Watson Studio for sharing.
foursquare_client_id = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' # your Foursquare ID, ABCDEFGHIJKLMNOPQRSTUVWXYZ
foursquare_client_secret = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' # your Foursquare Secret
foursquare_version = '20180724' # Foursquare API version

# Foursquare category - outdoors
outdoor_categoryId = '4d4b7105d754a06377d81259'
outdoor_categoryName = 'outdoors'

# Foursquare category - art
art_categoryId = '4d4b7104d754a06370d81259'
art_categoryName = 'art'

# Foursquare category - food
food_categoryId = '4d4b7105d754a06374d81259'
food_categoryName = 'food'

# Foursquare category - bars
bars_categoryId = '4d4b7105d754a06376d81259'
bars_categoryName = 'bars'

In [None]:
# let's unpack both island_df.pkl and neighborhood_df.plk files created during the data gathering step.

# useful for many scientific computing in Python
import numpy as np

# primary data structure library
import pandas as pd 

# pickle library to unpack the .pkl files
import pickle

# Unpack - island_df.pkl
with open('island_df.pkl', 'rb') as f:
    island_df = pickle.load(f)
    
# Unpack - island_df.pkl
with open('neighborhood_df.pkl', 'rb') as f:
    neighborhood_df = pickle.load(f)  

In [None]:
%matplotlib inline

import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches # needed for waffle Charts

mpl.style.use('ggplot') # optional: for ggplot-like style

In [None]:
# import requests and sys libraries
import requests
import sys

# method: Use Google API to retrieve coordinates using the address information
def get_coordinates(api_key, address):
    try:
        url = 'https://maps.googleapis.com/maps/api/geocode/json?key={}&address={}'.format(api_key, address)
        response = requests.get(url).json()
        results = response['results']
        geographical_data = results[0]['geometry']['location'] # get geographical coordinates
        lat = geographical_data['lat']
        lon = geographical_data['lng']
        return [lat, lon]
    except:
        e = sys.exc_info()[0]
        return [None, None]

# test: "get_coordinates"
hawaii_address = 'Honolulu, Hawaii, USA'
hawaii_coordinates = get_coordinates(google_api_key, hawaii_address)

if verbose:
    print('Coordinate of {}: {}'.format(hawaii_address, hawaii_coordinates))

In [None]:
# method: Use Google API to retrieve neighborhood information using the address information
def get_neighborhood(api_key, address):
    try:
        url = 'https://maps.googleapis.com/maps/api/geocode/json?key={}&address={}'.format(api_key, address)
        response = requests.get(url).json()
        results = response['results']
        neighborhood_data = results[0]['address_components'] # get neighborhood
        for item in neighborhood_data:
            if item['types'][0] == 'neighborhood':
                return item['long_name']              
        return[None]
    except:
        e = sys.exc_info()[0]
        return [None]    

# test: "get_neighborhood"
hawaii_address = '96830' # Zip code for Waikiki
hawaii_neighborhood = get_neighborhood(google_api_key, hawaii_address)

if verbose:
    print('Coordinate of {}: {}'.format(hawaii_address, hawaii_neighborhood))

In [None]:
# import folium for map
#!conda install -c conda-forge folium=0.5.0 --yes
import folium
from folium import plugins
from folium.plugins import HeatMap
from folium.features import DivIcon

# import json
import json

# import numpy
import numpy

In [None]:
# install wordcloud
!conda install -c conda-forge wordcloud==1.4.1 --yes

# import package and its set of stopwords
from wordcloud import WordCloud, STOPWORDS

In [None]:
# load Airbnb data
airbnb_housing_df = pd.read_csv('airbnb_listing_hawaii.csv')

latlons = {}
data_latlons = {}

with open('neighbourhoods.geojson') as f:
  hawaii_geojson = json.load(f)

# method: apply style for geojson
def geojson_style(feature):
    return { 'color': 'blue', 'fill_opacity': 0.5, 'fill': False }

# method: apply style for geojson
def geojson_light_style(feature):
    return { 'color': 'lightblue', 'fill_opacity': 0.5, 'fill': False }

# method: get_zipcode_map with zip code information
def get_island_zipcode_map(title, island, neighborhood_df, heatmap_latlons, zoom_level=11):
    island_address = island
    island_coordinates = get_coordinates(google_api_key, island_address)  

    island_map = folium.Map(location=island_coordinates, zoom_start=zoom_level)
    folium.Marker(island_coordinates, popup=island_address).add_to(island_map)
    
    # add zip code markers to the map
    for i, data in neighborhood_df.iterrows():
        if numpy.isnan(data[4]) == False and numpy.isnan(data[5]) == False:
            folium.CircleMarker(
                [data[4], data[5]], 
                radius=5, 
                color='yellow', 
                fill=True, 
                fill_color='blue', 
                fill_opacity=0.8, 
                popup='{} ({})'.format(data[3],data[2])
            ).add_to(island_map)
    
    # divide into several zones
    folium.GeoJson(hawaii_geojson, style_function=geojson_style, name='geojson').add_to(island_map)
    
    # apply heatmap to latitudes and longitudes
    HeatMap(heatmap_latlons).add_to(island_map)
    
    island_map.get_root().html.add_child(folium.Element('<h3 align="center" style="font-size:20px"><b>{}</b></h3>'.format(title)))

    return island_map

# method: get a specific island
def get_island_dataframes(island_df, neighborhood_df, airbnb_housing_df, island_name):
    temp_island_df = island_df.loc[island_df['island'] == island_name]
    temp_neighborhood_df = neighborhood_df.loc[neighborhood_df['island'] == island_name]
    
    if island_name == 'Oahu':
        island_name = 'Honolulu'

    temp_airbnb_housing_df = airbnb_housing_df.loc[airbnb_housing_df['neighbourhood_group'] == island_name]
    
    return temp_island_df, temp_neighborhood_df, temp_airbnb_housing_df

# method: generate crime incident data from dataframe
def generate_crime_incidents(neighborhood_df):
    latlons = {}
    for i, data in neighborhood_df.iterrows():
        for i in range(0, data[6]):
            if numpy.isnan(data[4]) == False and numpy.isnan(data[5]) == False:
                latlon = (data[4], data[5])
                latlons[data[2]] = latlon
    island_latlons = {}
    island_latlons = [[res[0], res[1]] for res in latlons.values()]
    
    return island_latlons

# method: generate airbnb data from dataframe
def generate_airbnb_housing(airbnb_df):
    latlons = {}
    for i, data in airbnb_df.iterrows():
        if numpy.isnan(data[6]) == False and numpy.isnan(data[7]) == False:
            latlon = (data[6], data[7])
            latlons[data[0]] = latlon
    island_latlons = {}
    island_latlons = [[res[0], res[1]] for res in latlons.values()]
    
    return island_latlons

# method: get_toppicks_map with venue information
def generate_toppicks(island, zoom_level=11):
    island_address = island
    island_coordinates = get_coordinates(google_api_key, island_address)  

    outdoor_venues_df = get_toppicks_venues(island_address, outdoor_categoryId, outdoor_categoryName, 50)
    art_venues_df = get_toppicks_venues(island_address, art_categoryId, art_categoryName, 50)
    food_venues_df = get_toppicks_venues(island_address, food_categoryId, food_categoryName, 50)
    bars_venues_df = get_toppicks_venues(island_address, bars_categoryId, bars_categoryName, 50)
    
    latlons = {}
    
    for i, data in outdoor_venues_df.iterrows():
        if numpy.isnan(data[4]) == False and numpy.isnan(data[5]) == False:
            latlon = (data[4], data[5])
            latlons[data[0]] = latlon

    for i, data in art_venues_df.iterrows():
        if numpy.isnan(data[4]) == False and numpy.isnan(data[5]) == False:
            latlon = (data[4], data[5])
            latlons[data[0]] = latlon
    
    for i, data in food_venues_df.iterrows():
        if numpy.isnan(data[4]) == False and numpy.isnan(data[5]) == False:
            latlon = (data[4], data[5])
            latlons[data[0]] = latlon
    
    for i, data in bars_venues_df.iterrows():
        if numpy.isnan(data[4]) == False and numpy.isnan(data[5]) == False:
            latlon = (data[4], data[5])
            latlons[data[0]] = latlon
            
    island_latlons = {}
    island_latlons = [[res[0], res[1]] for res in latlons.values()]   
    
    return island_latlons

# method: toppicks
def get_toppicks_venues(island, categoryId, categoryName, limit=20):
    venues_columns = [
        'category', 
        'categoryId', 
        'name', 
        'subcategory',
        'latitude', 
        'longitude',
        'address']

    # Create the empty neighborhood dataframe
    venues_df = pd.DataFrame(columns=venues_columns)    
    
    url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&near={}&categoryId={}&limit={}&sortByPopularity=1&section=topPicks'.format(
        foursquare_client_id, foursquare_client_secret, foursquare_version, island, categoryId, limit)
    results = requests.get(url).json()['response']['groups'][0]['items']

    for item in results:
        venues_df = venues_df.append({
                              'category': categoryName,
                              'categoryId': item['venue']['id'],
                              'name': item['venue']['name'],
                              'subcategory': item['venue']['categories'][0]['name'],
                              'latitude': item['venue']['location']['lat'],
                              'longitude': item['venue']['location']['lng'],
                              'address': ', '.join(item['venue']['location']['formattedAddress'])
                              }, ignore_index=True)
    return venues_df

# method: generate top-picks in string
def generate_word_count_toppicks(island):
    island_address = island
    island_coordinates = get_coordinates(google_api_key, island_address)  

    outdoor_venues_df = get_toppicks_venues(island_address, outdoor_categoryId, outdoor_categoryName, 25)
    art_venues_df = get_toppicks_venues(island_address, art_categoryId, art_categoryName, 25)
    
    frames = [outdoor_venues_df, art_venues_df]
    results_df = pd.concat(frames)
    
    word_string = ''
    count = 50
    
    for i, data in results_df.iterrows():
        name = data[2]
        word_string += (name + ' ') * (count ** 2)
        count -= 1     
    
    return word_string 

In [None]:
# method: get_island_toppicks_outdoor_map with zip code information
def get_island_toppicks_outdoor_map(title, island, limit=20, zoom_level=11):
    island_address = island
    island_coordinates = get_coordinates(google_api_key, island_address)  

    island_map = folium.Map(location=island_coordinates, zoom_start=zoom_level)
    folium.Marker(island_coordinates, popup=island_address).add_to(island_map)
    
    # divide into several zones
    folium.GeoJson(hawaii_geojson, style_function=geojson_style, name='geojson').add_to(island_map)
    
    outdoor_venues_df = get_toppicks_venues(island_address, outdoor_categoryId, outdoor_categoryName, limit)  

    # add zip code markers to the map
    for i, data in outdoor_venues_df.iterrows():
        if numpy.isnan(data[4]) == False and numpy.isnan(data[5]) == False:
            folium.CircleMarker(
                [data[4], data[5]], 
                radius=5, 
                color='yellow', 
                fill=True, 
                fill_color='blue', 
                fill_opacity=0.8, 
                popup='{}'.format(data[2].replace("'", ""))
            ).add_to(island_map)
            # add label markers
            label = '<div style="font-size:8pt">{}</div>'.format(data[2].replace("'", ""))
            folium.Marker(
                [data[4], data[5]], 
                icon=DivIcon(
                    icon_anchor=(0,0),
                    icon_size=(180,36),
                    html=label)
            ).add_to(island_map)             

    island_map.get_root().html.add_child(folium.Element('<h3 align="center" style="font-size:20px"><b>{}</b></h3>'.format(title)))

    return island_map

In [None]:
# verify airbnb_housing_df dataframe is unpacked successfully
if verbose:
    airbnb_housing_df.head()

In [None]:
# verify neighborhood_df dataframe is unpacked successfully
if verbose:
    neighborhood_df.head()

In [None]:
!pip install shapely
import shapely.geometry

!pip install pyproj
import pyproj

import math

def lonlat_to_xy(lon, lat):
    proj_latlon = pyproj.Proj(proj='latlong',datum='WGS84')
    proj_xy = pyproj.Proj(proj="utm", zone=33, datum='WGS84')
    xy = pyproj.transform(proj_latlon, proj_xy, lon, lat)
    return xy[0], xy[1]

def xy_to_lonlat(x, y):
    proj_latlon = pyproj.Proj(proj='latlong',datum='WGS84')
    proj_xy = pyproj.Proj(proj="utm", zone=33, datum='WGS84')
    lonlat = pyproj.transform(proj_xy, proj_latlon, x, y)
    return lonlat[0], lonlat[1]

def calc_xy_distance(x1, y1, x2, y2):
    dx = x2 - x1
    dy = y2 - y1
    return math.sqrt(dx*dx + dy*dy)

def coordinates_distance_miles(coordinate1, coordinate2):
    #radius of the Earth
    R = 6373.0 
    # coordinates
    lat1 = math.radians(coordinate1[0])
    lon1 = math.radians(coordinate1[1])
    lat2 = math.radians(coordinate2[0])
    lon2 = math.radians(coordinate2[1])
    # change in coordinates
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    # Haversine formula
    a = math.sin(dlat / 2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    distance = R * c

    return (distance * 0.621371) # convert to miles


lihue_address = 'Lihue, Hawaii, USA'
lihue_coordinates = get_coordinates(google_api_key, lihue_address)
honolulu_address = 'Honolulu, Hawaii, USA'
honolulu_coordinates = get_coordinates(google_api_key, honolulu_address)

print('Lihue: latitude={}, longitude={}'.format(lihue_coordinates[0], lihue_coordinates[1]))
print('Honolulu: latitude={}, longitude={}'.format(honolulu_coordinates[0], honolulu_coordinates[1]))
print('Distance (miles) from Lihue to Honolulu: {}miles'.format(coordinates_distance_miles(lihue_coordinates, honolulu_coordinates)))

In [None]:
# import sklearn - Kmeans
from sklearn.cluster import KMeans

# method: define clusters
def get_housing_kmeans_map(title, island, data_airbnb_housing_df, heatmap_latlons, clusters=15, zoom_level=11):
    good_xys = data_airbnb_housing_df[['latitude', 'longitude']].values
    kmeans = KMeans(n_clusters=clusters, random_state=0).fit(good_xys)
    cluster_centers = kmeans.cluster_centers_
    
    if verbose:
        print(title)
        print(island)
        print(cluster_centers)

    island_address = island
    island_coordinates = get_coordinates(google_api_key, island_address)  
    
    island_map = folium.Map(location=island_coordinates, zoom_start=zoom_level)
    folium.TileLayer('cartodbpositron').add_to(island_map)
    
    HeatMap(heatmap_latlons).add_to(island_map)
    
    # divide into several zones
    folium.GeoJson(hawaii_geojson, style_function=geojson_style, name='geojson').add_to(island_map)

    for lat, lon in cluster_centers:
        folium.Circle([lat, lon], radius=1500, color='green', fill=True, fill_opacity=0.25).add_to(island_map) 
    
    island_map.get_root().html.add_child(folium.Element('<h3 align="center" style="font-size:20px"><b>{}</b></h3>'.format(title)))
    
    return island_map

In [None]:
airbnb_housing_df['safe_area'] = True

# check which Airbnb posting is within 5 miles of crime being reported
def update_airbnb_housing_df_for_safe_area():
    temp_df = airbnb_housing_df.loc[airbnb_housing_df['neighbourhood_group'] == 'Honolulu']
    for i, data in temp_df.iterrows():
        print(' *', end='')
        if data[5] == 'Honolulu' and numpy.isnan(data[6]) == False and numpy.isnan(data[7]) == False: # since only crime committed in Honolulu, let's restrict it
            for i1, data1 in neighborhood_df.iterrows():
                print(' .', end='')
                if data1[6] > 0 and numpy.isnan(data1[4]) == False and numpy.isnan(data1[4]) == False:
                    distance = coordinates_distance_miles([data[6], data[7]], [data1[4], data1[5]])
                    if distance <= 5: # if crime committed in radius range of 5 miles
                        airbnb_housing_df.loc[airbnb_housing_df['id'] == data[0], 'safe_area'] = False
                    

## Section 4: Analysis <a name="analysis"></a>

As mentioned in the previous section, the analysis will be done by each Hawaiian island. This is important to group the information in this way, and it will greatly help to present the information for each island at a time. So for each Hawaiian island, the analysis section should include the following areas of studies:
- Crime Incidents
- Accommodation
- Foursquare Top-Picks
- COVID-19

### 1. Kauai (aka. The Garden Isle)

Let begin by reviewing the data available for Kauai.

In [None]:
# restrict the data by island
data_island_df, data_neighborhood_df, data_airbnb_housing_df = get_island_dataframes(island_df, neighborhood_df, airbnb_housing_df, 'Kauai')

# verify the dataframes
if verbose:
    print('data_island_df.shape: {}'.format(data_island_df.shape))
    print('data_neighborhood_df.shape: {}'.format(data_neighborhood_df.shape))
    print('data_airbnb_housing_df.shape: {}'.format(data_airbnb_housing_df.shape)) 

**Crime Incidents on Kauai**

First of all we are looking to take a look at the Kauai's crime incident. However, given the 14 zip codes assigned to the island of Kauai is none crimes incidents being reported to the police. The map below does not display any crime *heatmap*.

In [None]:
# generate crime incidents
data_latlons = generate_crime_incidents(data_neighborhood_df)

# crime incident heatmap
map = get_island_zipcode_map('Kauai - Crime Incident Heatmap', 'Kauai, Hawaii, USA', data_neighborhood_df, data_latlons)
map

**Airbnb Housing Data**

Use *heatmap* to find where are the Airbnb posting.

In [None]:
# generate Airbnb housing
data_latlons = generate_airbnb_housing(data_airbnb_housing_df)

# Airbnb housing heatmap
map = get_island_zipcode_map('Kauai - Airbnb Housing Heatmap', 'Kauai, Hawaii, USA', data_neighborhood_df, data_latlons)
map

**Use K-Means to Define 15 Clusters of Housing Neighborhoods**

Next, use K-Means machine learning technique to define 15 clusters using Airbnb housing data. Two of the popular neighborhoods on the Kauai are **Hanalei** in the North of the island and **Poipu** in the South of the island. And these two neighborhoods tends to be more expensive compared to other neighborhoods on the island.

In [None]:
# display 15 clusters of neighborhoods base on Airbnb data
map = get_housing_kmeans_map('Kauai - Use K-Means to Define 15 Clusters of Housing Neighborhoods', 'Kauai, Hawaii', data_airbnb_housing_df, data_latlons, clusters=15, zoom_level=11)
map

**Top-Picks Heatmap**

After retrieved the top-picks from Foursquare, let's put the venues on the map using *heatmap*. Basically, it is helpful to identify where are the visitor destinations.

In [None]:
# generate top-picks
data_latlons = generate_toppicks('Kauai, Hawaii, USA')

# top-picks heatmap
map = get_island_zipcode_map('Kauai - Top-Picks Heatmap', 'Kauai, Hawaii, USA', data_neighborhood_df, data_latlons)
map

**Kauai: Word Count**
    
Using the *word count* technique, we can easily see the popular venues.

In [None]:
word_string = generate_word_count_toppicks('Kauai, Hawaii, USA')

# create the word cloud
wordcloud = WordCloud(background_color='white').generate(word_string)

# display the cloud
fig = plt.figure()
fig.set_figwidth(14)
fig.set_figheight(18)

plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()

**Kauai: Top-Picks on the Map**

Here is a list of the top-picks on the map.

In [None]:
# top-picks on a map
map = get_island_toppicks_outdoor_map('Kauai - Top-Picks', 'Kauai, Hawaii, USA')
map

### 2. Oahu (aka. The Gathering Place)

In [None]:
# restrict the data by island
data_island_df, data_neighborhood_df, data_airbnb_housing_df = get_island_dataframes(island_df, neighborhood_df, airbnb_housing_df, 'Oahu')

# verify the dataframes
if verbose:
    print('data_island_df.shape: {}'.format(data_island_df.shape))
    print('data_neighborhood_df.shape: {}'.format(data_neighborhood_df.shape))  
    print('data_airbnb_housing_df.shape: {}'.format(data_airbnb_housing_df.shape)) 

In [None]:
# generate crime incidents
data_latlons = generate_crime_incidents(data_neighborhood_df)

# crime incident heatmap
map = get_island_zipcode_map('Oahu - Crime Incident Heatmap', 'Oahu, Hawaii, USA', data_neighborhood_df, data_latlons)
map  

In [None]:
# generate Airbnb housing
data_latlons = generate_airbnb_housing(data_airbnb_housing_df)

# Airbnb housing heatmap
map = get_island_zipcode_map('Oahu - Airbnb Housing Heatmap', 'Oahu, Hawaii, USA', data_neighborhood_df, data_latlons)
map

In [None]:
# display 15 clusters of neighborhoods base on Airbnb data
map = get_housing_kmeans_map('Oahu - Use K-Means to Define 15 Clusters of Housing Neighborhoods', 'Oahu, Hawaii', data_airbnb_housing_df, data_latlons, clusters=15, zoom_level=11)
map

In [None]:
# generate top-picks
data_latlons = generate_toppicks('Oahu, Hawaii, USA')

# top-picks heatmap
map = get_island_zipcode_map('Oahu - Top-Picks Heatmap', 'Oahu, Hawaii, USA', data_neighborhood_df, data_latlons)
map

In [None]:
word_string = generate_word_count_toppicks('Oahu, Hawaii, USA')

# create the word cloud
wordcloud = WordCloud(background_color='white').generate(word_string)

# display the cloud
fig = plt.figure()
fig.set_figwidth(14)
fig.set_figheight(18)

plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()

In [None]:
# top-picks on a map
map = get_island_toppicks_outdoor_map('Oahu - Top-Picks', 'Oahu, Hawaii, USA')
map

### 3. Molokai (aka. The Friendly Isle)

In [None]:
# restrict the data by island
data_island_df, data_neighborhood_df, data_airbnb_housing_df = get_island_dataframes(island_df, neighborhood_df, airbnb_housing_df, 'Molokai')

# verify the dataframes
if verbose:
    print('data_island_df.shape: {}'.format(data_island_df.shape))
    print('data_neighborhood_df.shape: {}'.format(data_neighborhood_df.shape))
    print('data_airbnb_housing_df.shape: {}'.format(data_airbnb_housing_df.shape)) 

In [None]:
# generate crime incidents
data_latlons = generate_crime_incidents(data_neighborhood_df)

# crime incident heatmap
map = get_island_zipcode_map('Molokai - Crime Incident Heatmap', 'Molokai, Hawaii, USA', data_neighborhood_df, data_latlons)
map    

In [None]:
# restrict the data by island (Maui data)
data_island_df, data_neighborhood_df, data_airbnb_housing_df = get_island_dataframes(island_df, neighborhood_df, airbnb_housing_df, 'Maui')

# generate Airbnb housing
data_latlons = generate_airbnb_housing(data_airbnb_housing_df)

# Airbnb housing heatmap
map = get_island_zipcode_map('Molokai - Airbnb Housing Heatmap', 'Molokai, Hawaii, USA', data_neighborhood_df, data_latlons)
map

In [None]:
# display 15 clusters of neighborhoods base on Airbnb data
map = get_housing_kmeans_map('Molokai - Use K-Means to Define 15 Clusters of Housing Neighborhoods', 'Molokai, Hawaii', data_airbnb_housing_df, data_latlons, clusters=15, zoom_level=11)
map

In [None]:
# generate top-picks
data_latlons = generate_toppicks('Molokai, Hawaii, USA')

# top-picks heatmap
map = get_island_zipcode_map('Molokai - Top-Picks Heatmap', 'Molokai, Hawaii, USA', data_neighborhood_df, data_latlons)
map

In [None]:
word_string = generate_word_count_toppicks('Molokai, Hawaii, USA')

# create the word cloud
wordcloud = WordCloud(background_color='white').generate(word_string)

# display the cloud
fig = plt.figure()
fig.set_figwidth(14)
fig.set_figheight(18)

plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()

In [None]:
# top-picks on a map
map = get_island_toppicks_outdoor_map('Molokai - Top-Picks', 'Molokai, Hawaii, USA')
map

### 4. Lanai (aka. The Pineapple Isle)

In [None]:
# restrict the data by island
data_island_df, data_neighborhood_df, data_airbnb_housing_df = get_island_dataframes(island_df, neighborhood_df, airbnb_housing_df, 'Lanai')

# verify the dataframes
if verbose:
    print('data_island_df.shape: {}'.format(data_island_df.shape))
    print('data_neighborhood_df.shape: {}'.format(data_neighborhood_df.shape))
    print('data_airbnb_housing_df.shape: {}'.format(data_airbnb_housing_df.shape)) 

In [None]:
# generate crime incidents
data_latlons = generate_crime_incidents(data_neighborhood_df)

# crime incident heatmap
map = get_island_zipcode_map('Lanai - Crime Incident Heatmap', 'Lanai, Hawaii, USA', data_neighborhood_df, data_latlons)
map    

In [None]:
# restrict the data by island (Maui data)
data_island_df, data_neighborhood_df, data_airbnb_housing_df = get_island_dataframes(island_df, neighborhood_df, airbnb_housing_df, 'Maui')

# generate Airbnb housing
data_latlons = generate_airbnb_housing(data_airbnb_housing_df)

# Airbnb housing heatmap
map = get_island_zipcode_map('Lanai - Airbnb Housing Heatmap', 'Lanai, Hawaii, USA', data_neighborhood_df, data_latlons)
map

In [None]:
# display 15 clusters of neighborhoods base on Airbnb data
map = get_housing_kmeans_map('Lanai - Use K-Means to Define 15 Clusters of Housing Neighborhoods', 'Lanai, Hawaii', data_airbnb_housing_df, data_latlons, clusters=15, zoom_level=11)
map

In [None]:
# generate top-picks
data_latlons = generate_toppicks('Lanai, Hawaii, USA')

# top-picks heatmap
map = get_island_zipcode_map('Lanai - Top-Picks Heatmap', 'Lanai, Hawaii, USA', data_neighborhood_df, data_latlons)
map

In [None]:
word_string = generate_word_count_toppicks('Lanai, Hawaii, USA')

# create the word cloud
wordcloud = WordCloud(background_color='white').generate(word_string)

# display the cloud
fig = plt.figure()
fig.set_figwidth(14)
fig.set_figheight(18)

plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()

In [None]:
# top-picks on a map
map = get_island_toppicks_outdoor_map('Lanai - Top-Picks', 'Lanai, Hawaii, USA')
map

### 5. Maui (aka. The Valley Isle)

In [None]:
# restrict the data by island
data_island_df, data_neighborhood_df, data_airbnb_housing_df = get_island_dataframes(island_df, neighborhood_df, airbnb_housing_df, 'Maui')

# verify the dataframes
if verbose:
    print('data_island_df.shape: {}'.format(data_island_df.shape))
    print('data_neighborhood_df.shape: {}'.format(data_neighborhood_df.shape)) 
    print('data_airbnb_housing_df.shape: {}'.format(data_airbnb_housing_df.shape)) 

In [None]:
# generate crime incidents
data_latlons = generate_crime_incidents(data_neighborhood_df)

# crime incident heatmap
map = get_island_zipcode_map('Maui - Crime Incident Heatmap', 'Maui, Hawaii, USA', data_neighborhood_df, data_latlons)
map   

In [None]:
# generate Airbnb housing
data_latlons = generate_airbnb_housing(data_airbnb_housing_df)

# Airbnb housing heatmap
map = get_island_zipcode_map('Maui - Airbnb Housing Heatmap', 'Maui, Hawaii, USA', data_neighborhood_df, data_latlons)
map

In [None]:
# display 15 clusters of neighborhoods base on Airbnb data
map = get_housing_kmeans_map('Maui - Use K-Means to Define 15 Clusters of Housing Neighborhoods', 'Maui, Hawaii', data_airbnb_housing_df, data_latlons, clusters=15, zoom_level=11)
map

In [None]:
# generate top-picks
data_latlons = generate_toppicks('Maui, Hawaii, USA')

# top-picks heatmap
map = get_island_zipcode_map('Maui - Top-Picks Heatmap', 'Maui, Hawaii, USA', data_neighborhood_df, data_latlons)
map

In [None]:
word_string = generate_word_count_toppicks('Maui, Hawaii, USA')

# create the word cloud
wordcloud = WordCloud(background_color='white').generate(word_string)

# display the cloud
fig = plt.figure()
fig.set_figwidth(14)
fig.set_figheight(18)

plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()

In [None]:
# top-picks on a map
map = get_island_toppicks_outdoor_map('Maui - Top-Picks', 'Maui, Hawaii, USA')
map

### 6. Big Island (aka. The Island of Hawaii)

In [None]:
# restrict the data by island
data_island_df, data_neighborhood_df, data_airbnb_housing_df = get_island_dataframes(island_df, neighborhood_df, airbnb_housing_df, 'Hawaii')

# verify the dataframes
if verbose:
    print('data_island_df.shape: {}'.format(data_island_df.shape))
    print('data_neighborhood_df.shape: {}'.format(data_neighborhood_df.shape))
    print('data_airbnb_housing_df.shape: {}'.format(data_airbnb_housing_df.shape)) 

In [None]:
# generate crime incidents
data_latlons = generate_crime_incidents(data_neighborhood_df)

# crime incident heatmap
map = get_island_zipcode_map('Island of Hawaii - Crime Incident Heatmap', 'Island of Hawaii, Hawaii, USA', data_neighborhood_df, data_latlons, 10)
map 

In [None]:
# generate Airbnb housing
data_latlons = generate_airbnb_housing(data_airbnb_housing_df)

# Airbnb housing heatmap
map = get_island_zipcode_map('Island of Hawaii - Airbnb Housing Heatmap', 'Island of Hawaii, Hawaii, USA', data_neighborhood_df, data_latlons, 10)
map

In [None]:
# display 15 clusters of neighborhoods base on Airbnb data
map = get_housing_kmeans_map('Big Island - Use K-Means to Define 15 Clusters of Housing Neighborhoods', 'Island of Hawaii, Hawaii', data_airbnb_housing_df, data_latlons, clusters=15, zoom_level=11)
map

In [None]:
# generate top-picks
data_latlons = generate_toppicks('Island of Hawaii, Hawaii, USA')

# top-picks heatmap
map = get_island_zipcode_map('Island of Hawaii - Top-Picks Heatmap', 'Island of Hawaii, Hawaii, USA', data_neighborhood_df, data_latlons, 10)
map

In [None]:
word_string = generate_word_count_toppicks('Island of Hawaii, Hawaii, USA')

# create the word cloud
wordcloud = WordCloud(background_color='white').generate(word_string)

# display the cloud
fig = plt.figure()
fig.set_figwidth(14)
fig.set_figheight(18)

plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()

In [None]:
# top-picks on a map
map = get_island_toppicks_outdoor_map('Big Island - Top-Picks', 'Island of Hawaii, Hawaii, USA')
map