In [1]:
import pandas as pd
import numpy as np
from openpyxl import load_workbook

In [108]:
wb = load_workbook(filename='Boston Neighborhood Coordinates.xlsx', read_only=True)
ws = wb.active
col_names = [ws["A1"].value, ws["B1"].value, ws["C1"].value, ws["D1"].value]
header = np.array(col_names)

df = pd.DataFrame(ws.values, columns=header)
wb.close()

In [109]:
df = df.drop(['Zip Code'], axis=1)
df = df.drop([0])
df.set_index('Neighborhood', inplace=True)

In [110]:
df

Unnamed: 0_level_0,Latitude,Longitude
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1
Chinatown/Leather District,42.3515,-71.0602
West End,42.3653,-71.0646
South End,42.3378,-71.0706
Roxbury,42.3218,-71.0884
Mission Hill,42.3319,-71.0988
Mattapan,42.2771,-71.0914
South Boston,42.3365,-71.0349
East Boston,42.3745,-71.032
Charlestown,42.3795,-71.0646
Jamaica Plain,42.3097,-71.1151


In [5]:
import folium # map rendering library

In [6]:
# create map of New York using latitude and longitude values
map_Boston = folium.Map(location=[42.3261, -71.07], zoom_start=12)

# add markers to map
for lat, lng, neighborhood in zip(df['Latitude'], df['Longitude'], df.index):
    label = f'{neighborhood}'
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Boston)
    
    folium.Circle([lat, lng],
                    radius=1100
                   ).add_to(map_Boston)
    
map_Boston

In [7]:
import requests
import json # library to handle JSON files

In [8]:
CLIENT_ID = 'GKVJ5KSFU1RD00P24CV5JT2XSARMWSONFYRMLT2EK2IJFEBW' # your Foursquare ID
CLIENT_SECRET = 'SSJALJ4S5QXDNQBZMNUNFBE1XMVQZETMJWU1JDXE2VWK2PZL' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

In [48]:
hood_name = 'West Cambridge'
hood_lat = df.loc[hood_name, 'Latitude'] # neighborhood latitude value
hood_long = df.loc[hood_name, 'Longitude'] # neighborhood longitude value
radius = 1000
LIMIT = 1000

url = f'https://api.foursquare.com/v2/venues/explore?client_id={CLIENT_ID}&client_secret={CLIENT_SECRET}&ll={hood_lat},{hood_long}&v={VERSION}&radius={radius}&limit={LIMIT}'

In [49]:
r = requests.get(url)
results = r.json()
r.status_code

200

In [50]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [51]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = pd.json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Formaggio Kitchen,Cheese Shop,42.381433,-71.133816
1,Formaggio Kitchen BBQ,BBQ Joint,42.381425,-71.133942
2,Hi-Rise Bread Company,Bakery,42.38252,-71.131325
3,Village Kitchen,Italian Restaurant,42.381205,-71.138507
4,Armando's Pizza & Subs,Pizza Place,42.382338,-71.131216


In [52]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

32 venues were returned by Foursquare.


In [88]:
def getNearbyVenues(names, latitudes, longitudes, radius=1100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
        LIMIT = 1000
        
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        r = requests.get(url)
        print(r.status_code)
        results = r.json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [114]:
Boston_venues_try = getNearbyVenues(names=df.index, latitudes=df['Latitude'], longitudes=df['Longitude'])

Chinatown/Leather District
200
West End
200
South End
200
Roxbury
200
Mission Hill
200
Mattapan
200
South Boston
200
East Boston
200
Charlestown
200
Jamaica Plain
200
Roslindale
200
West Roxbury
200
Allston
200
Brighton
200
Hyde Park
200
North Cambridge 
200
Brookline Village
200
Chestnut Hill
200
Downtown
200
North End
200
Beacon Hill
200
Back Bay
200
Dorchester
200
Harvard Square 
200
East Cambridge
200
Fenway/Kenmore
200
Porter Square
200
West Cambridge
200
Inman Square
200
Central Square
200
Davis Square
200
Winter Hill
200
Assembly Square
200
Seaport
200


In [119]:
print(Boston_venues.shape)
    
Boston_venues.nunique()

(2671, 7)


Neighborhood                34
Neighborhood Latitude       34
Neighborhood Longitude      31
Venue                     1901
Venue Latitude            2159
Venue Longitude           2159
Venue Category             294
dtype: int64

In [121]:
Boston_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Chinatown/Leather District,42.3515,-71.0602,Gracenote Coffee,42.351657,-71.058204,Coffee Shop
1,Chinatown/Leather District,42.3515,-71.0602,Shabu Zen,42.350922,-71.060942,Asian Restaurant
2,Chinatown/Leather District,42.3515,-71.0602,Barry's Bootcamp,42.35401,-71.059776,Gym / Fitness Center
3,Chinatown/Leather District,42.3515,-71.0602,Hostelling International Boston,42.351084,-71.063398,Hostel
4,Chinatown/Leather District,42.3515,-71.0602,Tatte Bakery & Cafe,42.353111,-71.057117,Bakery


In [122]:
Venue_cat_count = Boston_venues.groupby('Venue Category').count()
Venue_cat_count.rename(columns={'Venue': 'Count'}, inplace=True)
Venue_cat_count = pd.DataFrame(Venue_cat_count['Count'], Venue_cat_count.index)
Venue_cat_count.to_excel('Venue_Category_Counts.xlsx')

In [124]:
Venue_per_hood = Boston_venues.groupby('Neighborhood').count()
Venue_per_hood.rename(columns={'Venue': 'Count'}, inplace=True)
Venue_per_hood = pd.DataFrame(Venue_per_hood['Count'], Venue_per_hood.index)
Venue_per_hood.to_excel('Venue_Per_Hood.xlsx')

In [125]:
# one hot encoding
Boston_onehot = pd.get_dummies(Boston_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Boston_onehot['Neighborhood'] = Boston_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [Boston_onehot.columns[-1]] + list(Boston_onehot.columns[:-1])
Boston_onehot = Boston_onehot[fixed_columns]

Totals = Boston_onehot.sum()
Totals = pd.DataFrame(Totals)
Totals = Totals.drop('Neighborhood')
Totals.rename(columns={0: 'Totals'}, inplace=True)

In [126]:
Boston_grouped = Boston_onehot.groupby('Neighborhood').mean().reset_index()
Boston_grouped

Unnamed: 0,Neighborhood,Zoo Exhibit,ATM,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport,Airport Lounge,Airport Service,...,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Allston,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01
1,Assembly Square,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0
2,Back Bay,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.01,0.0,0.0,0.01,0.03,0.0,0.01,0.01
3,Beacon Hill,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01
4,Brighton,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.011628,0.011628,0.0,0.0
5,Brookline Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Central Square,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02
7,Charlestown,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739
8,Chestnut Hill,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Chinatown/Leather District,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.0


In [127]:
num_top_venues = 10

for hood in Boston_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = Boston_grouped[Boston_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Allston----
                 venue  freq
0      Thai Restaurant  0.04
1               Bakery  0.03
2     Sushi Restaurant  0.03
3          Coffee Shop  0.03
4  Fried Chicken Joint  0.03
5                  Bar  0.03
6    Korean Restaurant  0.03
7   Mexican Restaurant  0.03
8          Pizza Place  0.03
9   Chinese Restaurant  0.03


----Assembly Square----
                 venue  freq
0       Clothing Store  0.06
1           Donut Shop  0.06
2          Coffee Shop  0.05
3   Mexican Restaurant  0.05
4             Pharmacy  0.03
5  Sporting Goods Shop  0.03
6                 Café  0.03
7  American Restaurant  0.03
8          Pizza Place  0.03
9            Pet Store  0.02


----Back Bay----
                     venue  freq
0           Clothing Store  0.05
1                      Spa  0.04
2              Coffee Shop  0.03
3                      Gym  0.03
4                    Hotel  0.03
5                   Bakery  0.03
6  New American Restaurant  0.03
7                     Park  0.03
8   

In [128]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [129]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = Boston_grouped['Neighborhood']

for ind in np.arange(Boston_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Boston_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Allston,Thai Restaurant,Coffee Shop,Pizza Place,Sushi Restaurant,Bakery,Bar,Mexican Restaurant,Fried Chicken Joint,Chinese Restaurant,Korean Restaurant
1,Assembly Square,Donut Shop,Clothing Store,Mexican Restaurant,Coffee Shop,Café,Sporting Goods Shop,Pizza Place,Pharmacy,American Restaurant,Ice Cream Shop
2,Back Bay,Clothing Store,Spa,Italian Restaurant,Park,New American Restaurant,Hotel,Gym,Coffee Shop,Seafood Restaurant,Bakery
3,Beacon Hill,Italian Restaurant,Park,Coffee Shop,Pizza Place,Hotel,French Restaurant,Historic Site,Bakery,Science Museum,Market
4,Brighton,Pizza Place,Grocery Store,Chinese Restaurant,Café,Bakery,Convenience Store,Coffee Shop,Pub,Donut Shop,Pet Store
5,Brookline Village,Coffee Shop,Mexican Restaurant,Park,Chinese Restaurant,Hotel,Liquor Store,Pizza Place,Bar,Sporting Goods Shop,Gastropub
6,Central Square,Sandwich Place,Italian Restaurant,Vegetarian / Vegan Restaurant,Bakery,New American Restaurant,Pizza Place,Sushi Restaurant,Bar,Coffee Shop,Gym / Fitness Center
7,Charlestown,Gastropub,Pizza Place,Park,Donut Shop,History Museum,Bus Stop,Grocery Store,Café,Coffee Shop,National Park
8,Chestnut Hill,Furniture / Home Store,Bank,Spa,Convenience Store,Tennis Court,Mobile Phone Shop,Clothing Store,Salon / Barbershop,Italian Restaurant,Seafood Restaurant
9,Chinatown/Leather District,Bakery,Coffee Shop,Chinese Restaurant,Sandwich Place,Park,French Restaurant,Sushi Restaurant,Asian Restaurant,Theater,Hotel


In [192]:
def df_style(val):
    boolean = 0
    for item in match:
        if val == item:
            boolean = 1
        
    if boolean == 1:
        return 'font-weight: bold'
    else:
        return ''

### Create df that includes Neighborhoods with MAIN criterion

In [206]:
criterion = ['Italian', 'Pizza', 'Ice Cream', 'Gym', 'Pub']
match = []
neighborhoods_venues_sorted.sort_values(by=['Neighborhood'], inplace=True)
search_results = neighborhoods_venues_sorted

mask = search_results.applymap(lambda x: criterion[0] in str(x))
search_results = search_results[mask == True].dropna(how='all')

for i in range(0,len(search_results.index)):
    results = []
    results = search_results.iloc[i,].unique()
    match.insert(0, results[1])
match = list(dict.fromkeys(match))

search_results = neighborhoods_venues_sorted.iloc[search_results.index]
    
i=0
for items in zip(search_results['Neighborhood']):
    i=i+1
print(f"{i} Results Found!")

search_results.style.applymap(df_style)

16 Results Found!


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Number of Matches
2,Back Bay,Clothing Store,Spa,Italian Restaurant,Park,New American Restaurant,Hotel,Gym,Coffee Shop,Seafood Restaurant,Bakery,2
3,Beacon Hill,Italian Restaurant,Park,Coffee Shop,Pizza Place,Hotel,French Restaurant,Historic Site,Bakery,Science Museum,Market,2
6,Central Square,Sandwich Place,Italian Restaurant,Vegetarian / Vegan Restaurant,Bakery,New American Restaurant,Pizza Place,Sushi Restaurant,Bar,Coffee Shop,Gym / Fitness Center,3
8,Chestnut Hill,Furniture / Home Store,Bank,Spa,Convenience Store,Tennis Court,Mobile Phone Shop,Clothing Store,Salon / Barbershop,Italian Restaurant,Seafood Restaurant,1
12,Downtown,Park,Italian Restaurant,Hotel,Coffee Shop,Seafood Restaurant,Historic Site,Sandwich Place,Bakery,American Restaurant,Aquarium,1
13,East Boston,Mexican Restaurant,Italian Restaurant,Café,Pizza Place,Airport Lounge,Park,Airport Service,Liquor Store,Sandwich Place,Chinese Restaurant,2
18,Inman Square,New American Restaurant,Coffee Shop,Pub,Portuguese Restaurant,Bakery,Bar,Brewery,Italian Restaurant,Ice Cream Shop,Sandwich Place,2
21,Mission Hill,Sandwich Place,Donut Shop,Café,Art Museum,Pizza Place,Italian Restaurant,Falafel Restaurant,Sushi Restaurant,Caribbean Restaurant,Coffee Shop,2
23,North End,Italian Restaurant,Park,Seafood Restaurant,Pizza Place,Bakery,Hotel,Sandwich Place,Coffee Shop,Historic Site,Aquarium,2
25,Roslindale,Italian Restaurant,American Restaurant,Pizza Place,Plaza,Bakery,Bar,Chinese Restaurant,Coffee Shop,Grocery Store,Latin American Restaurant,2


### Score Neighborhoods based on MAIN criterion

In [None]:
main_score = []
for row in search_results.itertuples():
    i=0
    for item in zip(row):
        for crit in match:
            if item[0] == crit:
                i=i+1
    main_score.append(i)
search_results['Main-Score'] = main_score
search_results.sort_values(by=['Main-Score'], ascending=False, inplace=True)

### Create df that includes Neighborhoods with ALL criterion

In [None]:
match = []
search_results = neighborhoods_venues_sorted

for item in criterion:
    
    mask = search_results.applymap(lambda x: item in str(x))
    search_results = search_results[mask == True].dropna(how='all')
    search_results = neighborhoods_venues_sorted.iloc[search_results.index]
    
i=0
for items in zip(search_results['Neighborhood']):
    i=i+1
print(f"{i} Results Found!")

search_results.style.applymap(df_style)

### Create match list seperately

In [182]:
search_results = neighborhoods_venues_sorted

for item in criterion:
    
    mask = neighborhoods_venues_sorted.applymap(lambda x: item in str(x))
    search_results = neighborhoods_venues_sorted[mask == True].dropna(how='all')

    for i in range(0,len(search_results.index)):
        results = []
        results = search_results.iloc[i,].unique()
        match.insert(0, results[1])
    match = list(dict.fromkeys(match))

### Get df with all neighborhoods organized by # of criterion met with criterion bolded

In [187]:
num_match = []
for row in neighborhoods_venues_sorted.itertuples():
    i=0
    for item in zip(row):
        for crit in match:
            if item[0] == crit:
                i=i+1
    num_match.append(i)
neighborhoods_venues_sorted['Number of Matches'] = num_match
neighborhoods_venues_sorted.sort_values(by=['Number of Matches'], ascending=False, inplace=True)

neighborhoods_venues_sorted.style.applymap(df_style)

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Number of Matches
6,Central Square,Sandwich Place,Italian Restaurant,Vegetarian / Vegan Restaurant,Bakery,New American Restaurant,Pizza Place,Sushi Restaurant,Bar,Coffee Shop,Gym / Fitness Center,3
17,Hyde Park,Pizza Place,Skating Rink,American Restaurant,Baseball Field,Gym / Fitness Center,Gym,Donut Shop,Grocery Store,Gas Station,Skate Park,3
29,South End,Italian Restaurant,Wine Shop,American Restaurant,Café,French Restaurant,Wine Bar,Pet Store,Park,Mexican Restaurant,Gym / Fitness Center,2
33,Winter Hill,Pizza Place,Donut Shop,Park,Café,Brazilian Restaurant,Harbor / Marina,General Entertainment,Mexican Restaurant,New American Restaurant,Ice Cream Shop,2
23,North End,Italian Restaurant,Park,Seafood Restaurant,Pizza Place,Bakery,Hotel,Sandwich Place,Coffee Shop,Historic Site,Aquarium,2
13,East Boston,Mexican Restaurant,Italian Restaurant,Café,Pizza Place,Airport Lounge,Park,Airport Service,Liquor Store,Sandwich Place,Chinese Restaurant,2
32,West Roxbury,Italian Restaurant,Grocery Store,Convenience Store,Pizza Place,Deli / Bodega,Discount Store,Pharmacy,Bank,Salon / Barbershop,Gas Station,2
18,Inman Square,New American Restaurant,Coffee Shop,Pub,Portuguese Restaurant,Bakery,Bar,Brewery,Italian Restaurant,Ice Cream Shop,Sandwich Place,2
19,Jamaica Plain,Park,Coffee Shop,American Restaurant,Bakery,Pizza Place,Ice Cream Shop,New American Restaurant,Pub,Donut Shop,Bookstore,2
21,Mission Hill,Sandwich Place,Donut Shop,Café,Art Museum,Pizza Place,Italian Restaurant,Falafel Restaurant,Sushi Restaurant,Caribbean Restaurant,Coffee Shop,2
