# San Diego New Restaurant

### Business Problem
In San Diego, California if a person is looking to open a restaurant, where would they open it?

In [2]:
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis
import json # library to handle JSON files
import requests # library to handle requests
import matplotlib.cm as cm # Matplotlib and associated plotting modules
import matplotlib.colors as colors
import folium # map rendering library

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
from sklearn.cluster import KMeans # import k-means from clustering stage
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

### Data
Using Zillow Open Data Soft Neighborhood data - retrieve the neighborhoods for San Diego

In [3]:
response = requests.get("https://public.opendatasoft.com/api/records/1.0/search/?dataset=zillow-neighborhoods&rows=200&facet=state&facet=county&facet=city&facet=name&refine.state=CA&refine.county=San+Diego")
sdj = response.json()

In [4]:
column_names = ['Neighborhood','City','County','State','Latitude','Longitude'] 
sdf = pd.DataFrame(columns=column_names)

for data in sdj['records']:
    neighborhood = data['fields']['name'] 
    city = data['fields']['city']
    county = data['fields']['county']
    state = data['fields']['state']
    lat = data['fields']['geo_point_2d'][0]
    lon = data['fields']['geo_point_2d'][1]
    sdf = sdf.append({'Neighborhood': neighborhood,'City': city, 'County': county, 'State': state, 'Latitude': lat, 'Longitude': lon}, ignore_index=True)
        

In [5]:
address = 'San Diego, CA'
geolocator = Nominatim(user_agent="sd_explorer")
location = geolocator.geocode(address)
sd_latitude = location.latitude
sd_longitude = location.longitude

In [6]:
map_sd = folium.Map(location=[sd_latitude, sd_longitude], zoom_start=10)

for index, row in sdf.iterrows():
    label = '{}'.format(row['Neighborhood'])
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [row['Latitude'], row['Longitude']],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_sd)  

In [16]:
CLIENT_ID = 'UUTW1SJMSRNAECWRGCB2YATUROPNRT3KZMBCDUEGMNRT2KAA' # your Foursquare ID
CLIENT_SECRET = 'QQFYX4G5WB4NUXCKXVV3MY3DLOVOO2IEYNNYIDPACJO514PF' # your Foursquare Secret
VERSION = '20200311' # Foursquare API version
# type your answer here
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

In [17]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):    
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [18]:
sd_venues = getNearbyVenues(names=sdf['Neighborhood'],
                                   latitudes=sdf['Latitude'],
                                   longitudes=sdf['Longitude']
                                  )

In [21]:
sd_onehot = pd.get_dummies(sd_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
sd_onehot['Neighborhood'] = sd_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [sd_onehot.columns[-1]] + list(sd_onehot.columns[:-1])
sd_onehot = sd_onehot[fixed_columns]

sd_grouped = sd_onehot.groupby('Neighborhood').mean().reset_index()

In [22]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [23]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = sd_grouped['Neighborhood']

for ind in np.arange(sd_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(sd_grouped.iloc[ind, :], num_top_venues)

In [24]:
kclusters = 10

sd_grouped_clustering = sd_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(sd_grouped_clustering)

In [25]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

In [55]:
sd_merged = sdf

sd_merged = sd_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

sd_merged['Cluster Labels'] = sd_merged['Cluster Labels'].replace(np.nan, 0)
sd_merged['Cluster Labels'] = sd_merged['Cluster Labels'].astype(int)

# create map
map_clusters = folium.Map(location=[sd_latitude, sd_longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(sd_merged['Latitude'], sd_merged['Longitude'], sd_merged['Neighborhood'], sd_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [56]:
sd_merged.loc[sd_merged['Cluster Labels'] == 0, sd_merged.columns[[0] + list(range(5, sd_merged.shape[1]-5))]]

Unnamed: 0,Neighborhood,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
41,Granger,-117.076716,0,American Restaurant,Yoga Studio,Fish & Chips Shop,Exhibit,Eye Doctor
45,North Island Naval Air Station,-117.214267,0,,,,,
54,Tierrasanta,-117.066712,0,,,,,
59,Miramar,-117.145886,0,,,,,
69,WIN,-117.088431,0,American Restaurant,Yoga Studio,Fish & Chips Shop,Exhibit,Eye Doctor
70,Rolling Hills Ranch,-116.950277,0,,,,,
77,Old Escondido Historic District,-117.074452,0,,,,,
87,S. Elm/Hickory,-117.072731,0,American Restaurant,Cosmetics Shop,Yoga Studio,Fish & Chips Shop,Eye Doctor
88,Rancho Carlsbad/Sunny Creek,-117.274627,0,,,,,
95,Otay - Ranch,-116.938098,0,,,,,


In [57]:
sd_merged.loc[sd_merged['Cluster Labels'] == 1, sd_merged.columns[[0] + list(range(5, sd_merged.shape[1]-5))]]

Unnamed: 0,Neighborhood,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
37,Otay Ranch Village 5,-116.980478,1,Park,Pool,Playground,Yoga Studio,Filipino Restaurant
44,Bonita Long Canyon,-117.000284,1,Park,Yoga Studio,Food Service,Exhibit,Eye Doctor
75,Azalea-Hollywood Park,-117.104853,1,Restaurant,Park,Yoga Studio,Filipino Restaurant,Exhibit
79,Robertson Ranch,-117.29709,1,Park,Yoga Studio,Food Service,Exhibit,Eye Doctor
109,Adams North,-117.120127,1,Park,Garden,Yoga Studio,Financial or Legal Service,Eye Doctor
134,Rancho Carrillo,-117.23839,1,Park,Pool,Ethiopian Restaurant,Exhibit,Eye Doctor
157,Emerald Hills,-117.075619,1,Park,Yoga Studio,Food Service,Exhibit,Eye Doctor


In [58]:
sd_merged.loc[sd_merged['Cluster Labels'] == 2, sd_merged.columns[[0] + list(range(5, sd_merged.shape[1]-5))]]

Unnamed: 0,Neighborhood,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
12,San Pasqual,-116.992912,2,Intersection,Yoga Studio,Fish & Chips Shop,Exhibit,Eye Doctor
35,Paseo Ranchoero,-117.013449,2,Intersection,Yoga Studio,Fish & Chips Shop,Exhibit,Eye Doctor
67,Valencia Park,-117.07759,2,Intersection,Yoga Studio,Fish & Chips Shop,Exhibit,Eye Doctor


In [59]:
sd_merged.loc[sd_merged['Cluster Labels'] == 3, sd_merged.columns[[0] + list(range(5, sd_merged.shape[1]-5))]]

Unnamed: 0,Neighborhood,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
18,La Playa,-117.233924,3,Harbor / Marina,Beach,Yoga Studio,Filipino Restaurant,Exhibit
98,Hedionda Point,-117.319221,3,Beach,Yoga Studio,Event Space,Eye Doctor,Falafel Restaurant
102,Sunset Cliffs,-117.251854,3,Beach,Yoga Studio,Event Space,Eye Doctor,Falafel Restaurant


In [60]:
sd_merged.loc[sd_merged['Cluster Labels'] == 4, sd_merged.columns[[0] + list(range(5, sd_merged.shape[1]-5))]]

Unnamed: 0,Neighborhood,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
161,College East,-117.058426,4,Supermarket,Food Service,Exhibit,Eye Doctor,Falafel Restaurant


In [61]:
sd_merged.loc[sd_merged['Cluster Labels'] == 5, sd_merged.columns[[0] + list(range(5, sd_merged.shape[1]-5))]]

Unnamed: 0,Neighborhood,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
15,Palm City,-117.080339,5,Mexican Restaurant,Pizza Place,Miscellaneous Shop,Liquor Store,Light Rail Station
19,Torrey Preserve,-117.246484,5,Art Gallery,Fish & Chips Shop,Exhibit,Eye Doctor,Falafel Restaurant
23,Chollas Creek,-117.085324,5,Food Truck,Mexican Restaurant,Clothing Store,Eye Doctor,Falafel Restaurant
31,Bay Ho,-117.217024,5,Mexican Restaurant,Baseball Field,Park,Pizza Place,Liquor Store
32,Eastside,-117.07957,5,BBQ Joint,Mexican Restaurant,Fried Chicken Joint,Eye Doctor,Falafel Restaurant
52,Fairmount Park,-117.109538,5,Mexican Restaurant,Convenience Store,Market,Pizza Place,Yoga Studio
55,Old Town,-117.19624,5,Mexican Restaurant,History Museum,Hotel,Hobby Shop,Gift Shop
57,Bay Terraces,-117.040892,5,Mexican Restaurant,Sandwich Place,Taco Place,Laundromat,Liquor Store
65,Sherman Heights,-117.144041,5,Mexican Restaurant,Hotel,Historic Site,Restaurant,Fast Food Restaurant
71,Olivewood,-117.093466,5,Mexican Restaurant,Fried Chicken Joint,Pub,Cosmetics Shop,Convenience Store


In [62]:
sd_merged.loc[sd_merged['Cluster Labels'] == 6, sd_merged.columns[[0] + list(range(5, sd_merged.shape[1]-5))]]

Unnamed: 0,Neighborhood,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,University Heights,-117.149041,6,Spa,Coffee Shop,Bar,Brewery,Thai Restaurant
1,Sorrento Valley,-117.188819,6,Hotel,Chinese Restaurant,Deli / Bodega,Restaurant,Café
2,Midway District,-117.202478,6,BBQ Joint,Wine Bar,Harbor / Marina,Coffee Shop,Filipino Restaurant
3,Central,-117.094965,6,Fast Food Restaurant,Bakery,Chinese Restaurant,Sushi Restaurant,Tea Room
4,Torrey Highlands,-117.159007,6,Playground,Sushi Restaurant,Cosmetics Shop,Convenience Store,Italian Restaurant
5,South Port Bussiness Center,-117.103997,6,Mexican Restaurant,Fast Food Restaurant,Auto Workshop,Sushi Restaurant,Gym / Fitness Center
6,Village Center,-116.968667,6,Coffee Shop,Grocery Store,Fast Food Restaurant,Sandwich Place,Breakfast Spot
7,Mission Beach,-117.249605,6,Theme Park,Sandwich Place,Sushi Restaurant,Taco Place,Board Shop
8,Mira Mesa,-117.14193,6,Photography Studio,Fast Food Restaurant,Grocery Store,Kids Store,Pet Store
9,Tamarack Point,-117.315026,6,Gym / Fitness Center,Nature Preserve,Dog Run,Financial or Legal Service,Exhibit


In [63]:
sd_merged.loc[sd_merged['Cluster Labels'] == 7, sd_merged.columns[[0] + list(range(5, sd_merged.shape[1]-5))]]

Unnamed: 0,Neighborhood,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
47,Lake Murray,-117.028255,7,Trail,Yoga Studio,Financial or Legal Service,Exhibit,Eye Doctor
115,Oak Park,-117.07244,7,Trail,Baseball Field,Yoga Studio,Financial or Legal Service,Eye Doctor
117,Calavera Hills,-117.295809,7,Trail,Coffee Shop,Yoga Studio,Filipino Restaurant,Exhibit
145,Lynwood Hills,-117.031775,7,Trail,Park,Pet Store,Yoga Studio,Fast Food Restaurant
152,La Costa Ridge,-117.232431,7,Trail,Yoga Studio,Financial or Legal Service,Exhibit,Eye Doctor
185,Castle,-117.107308,7,Liquor Store,Trail,Park,Filipino Restaurant,Exhibit


In [64]:
sd_merged.loc[sd_merged['Cluster Labels'] == 8, sd_merged.columns[[0] + list(range(5, sd_merged.shape[1]-5))]]

Unnamed: 0,Neighborhood,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
25,La Costa Oaks,-117.226281,8,Gym,Food Service,Event Space,Exhibit,Eye Doctor
137,Otay Ranch,-116.973012,8,Football Stadium,Gym,Yoga Studio,Financial or Legal Service,Exhibit
162,Alta Vista,-117.066066,8,Gym,Food Service,Event Space,Exhibit,Eye Doctor


In [65]:
sd_merged.loc[sd_merged['Cluster Labels'] == 9, sd_merged.columns[[0] + list(range(5, sd_merged.shape[1]-5))]]

Unnamed: 0,Neighborhood,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
72,Rolando,-117.057413,9,Pharmacy,Shipping Store,Park,Sandwich Place,Farmers Market
133,Mt. Hope,-117.110089,9,Liquor Store,Shipping Store,Park,Fast Food Restaurant,Event Space
177,Paradise Hills,-117.058982,9,Pharmacy,Food Service,Event Space,Exhibit,Eye Doctor
