## APPLIED DATA SCIENCE CAPSTONE PROJECT - WHERE TO OPEN A COFFEE SHOP IN IOWA CITY, IA


### Import Packages

In [2]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
import lxml
import numpy as np
import os

In [None]:
!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes
import folium # map rendering library

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    openssl-1.1.1d             |       h516909a_0         2.1 MB  conda-forge
    geopy-1.21.0               |             py_0          58 KB  conda-forge
    ca-certificates-2019.11.28 |       hecc5488_0         145 KB  conda-forge
    certifi-2019.11.28         |           py36_0         149 KB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.5 MB

The following NEW packages will be INSTALLED:

    geographiclib:   1.50-py_0         conda-forge
    geopy:           1.21.0-py_0       conda-forge

The following packages will be UPDATED:

    ca-

### Import Dataframe

In [None]:
# The code was removed by Watson Studio for sharing.

In [None]:
 df.columns = ['Neighborhood', 'Latitude', 'Longitude']
df.head()

### Use geopy library to get the latitude and longitude values of Iowa City

In [None]:
address = 'Iowa City, IA, USA'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Iowa City are {}, {}.'.format(latitude, longitude))

### Make a map of Iowa City and put neighborhoods on top

In [None]:
# create map of Iowa City using latitude and longitude values
map_IC = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, Neighborhood in zip(df['Latitude'], df['Longitude'], df['Neighborhood']):
    label = '{}'.format(Neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_IC)  
    
map_IC

### Define Foursquare credentials

In [None]:
CLIENT_ID = 'JT5ZWCDTA5ZHJS4T2U214P4JNX2LYDLBGHT1FL1TQJXXB1ZP' # your Foursquare ID
CLIENT_SECRET = 'IPHRJ21KWQZWLMLEH3AGRHVUBFVKKETS34GZAPCQTCP2IONH' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

### Create function to explore all neighborhoods

In [None]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

### Run function on all neighborhoods

In [None]:
IowaCity_venues = getNearbyVenues(names=df['Neighborhood'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )

### Create dataframe using onehot encoding

In [None]:
# one hot encoding
IowaCity_onehot = pd.get_dummies(IowaCity_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
IowaCity_onehot['Neighborhood'] = IowaCity_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [IowaCity_onehot.columns[-1]] + list(IowaCity_onehot.columns[:-1])
IowaCity_onehot = IowaCity_onehot[fixed_columns]

IowaCity_onehot.head()

In [None]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [None]:
IowaCity_onehot

### Create dataframe grouped by neighborhood with mean for each venue


In [None]:
IowaCity_grouped = IowaCity_onehot.groupby('Neighborhood').mean().reset_index()
IowaCity_grouped

### Create dataframe grouped by neighborhood with sum for each venue

In [None]:
IowaCity_grouped1 = IowaCity_onehot.groupby('Neighborhood').sum().reset_index()
IowaCity_grouped1

### Create a dataframe with just neighborhoods and coffee shop venue totals

In [None]:
IowaCity_grouped1[['Neighborhood','Coffee Shop']]

### Create a dataframe of venues only serving food and beverages

In [None]:
IowaCity1 = IowaCity_grouped1[['Neighborhood','Bakery','Bar', 'Beer Garden', 'Breakfast Spot', 'Burrito Place', 'Coffee Shop', 'Diner', 'Dive Bar', 'Falafel Restaurant', 'Fast Food Restaurant', 'Fried Chicken Joint', 'Ice Cream Shop', 'Italian Restaurant', 'Japanese Restaurant', 'Juice Bar', 'Mexican Restaurant', 'Pizza Place', 'Pub', 'Restaurant', 'Sandwich Place']]

In [None]:
IowaCity1

### Add new column called 'sum' to total all food/beverage service venues in neighborhood

In [None]:
IowaCity1['Sum'] = IowaCity1.sum(axis=1)
IowaCity1

### Create dataframe with just neighborhood and sum for report

In [None]:
IowaCity1[['Neighborhood', 'Sum']]

### List top 5 venues in every neighborhood

In [None]:
num_top_venues = 5

for hood in IowaCity_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = IowaCity_grouped[IowaCity_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

### Create dataframe with top venues for each neighborhood

In [None]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [None]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = IowaCity_grouped['Neighborhood']

for ind in np.arange(IowaCity_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(IowaCity_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted