# Foursquare Features

adapted from https://towardsdatascience.com/kickstart-your-first-clustering-project-in-san-francisco-neighborhoods-e258e659440c

In [2]:
import pandas as pd
import numpy as np
import requests

import json # library to handle JSON files
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

## Load Data

In [37]:
df = pd.read_csv('cleaned_data/lat_long.csv')
df.columns=['Neighborhood','Latitude','Longitude']
df.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,DOUGLAS,41.834717,-87.617142
1,OAKLAND,41.824144,-87.605107
2,FULLER PARK,41.809067,-87.632395
3,GRAND BOULEVARD,41.812936,-87.61783
4,KENWOOD,41.809592,-87.596556


## Get Data using Foursquare API

In [6]:
# foursquare info
CLIENT_ID = ''
CLIENT_SECRET = ''
VERSION = '20191207' # Foursquare API version
LIMIT = 1000
sortByPopularity = 1

In [7]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&limit={}&sortByPopularity={}'.format(
            CLIENT_ID, CLIENT_SECRET, 
            VERSION, lat, lng, LIMIT, sortByPopularity)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name,
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood','Venue', 'Venue Latitude', 'Venue Longitude', 'Venue Category']
    
    return(nearby_venues)

In [13]:
all_nearby = pd.DataFrame(columns = ['Neighborhood','Venue', 'Venue Latitude', 'Venue Longitude', 'Venue Category'])

In [14]:
for i in range(len(df)):
    nearby = getNearbyVenues(names=[df['Neighborhood'][i]],
                                   latitudes=[df['Latitude'][i]],
                                   longitudes=[df['Longitude'][i]]
                                  )
    all_nearby = all_nearby.append(nearby)
all_nearby

Unnamed: 0,Neighborhood,Venue,Venue Latitude,Venue Longitude,Venue Category
0,DOUGLAS,Guaranteed Rate Field,41.829873,-87.633723,Baseball Stadium
1,DOUGLAS,Hyatt Regency Mc Cormick Place,41.852121,-87.618501,Hotel
2,DOUGLAS,Mariano's Fresh Market,41.824253,-87.615890,Grocery Store
3,DOUGLAS,31st Street Beach,41.839458,-87.608028,Beach
4,DOUGLAS,LA Fitness,41.832001,-87.614865,Gym / Fitness Center
5,DOUGLAS,Cermak Produce,41.838986,-87.645927,Grocery Store
6,DOUGLAS,Ricobene's,41.845483,-87.633892,Pizza Place
7,DOUGLAS,Walgreens,41.832461,-87.616120,Pharmacy
8,DOUGLAS,Starbucks,41.830727,-87.626820,Coffee Shop
9,DOUGLAS,Armour Square Park,41.834432,-87.635211,Park


In [16]:
all_nearby.groupby('Neighborhood').count()

Unnamed: 0_level_0,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ALBANY PARK,100,100,100,100
ARCHER HEIGHTS,100,100,100,100
ARMOUR SQUARE,100,100,100,100
ASHBURN,100,100,100,100
AUBURN GRESHAM,100,100,100,100
AUSTIN,94,94,94,94
AVALON PARK,100,100,100,100
AVONDALE,100,100,100,100
BELMONT CRAGIN,100,100,100,100
BEVERLY,100,100,100,100


## Standardize data into proportion in neighborhood and save

In [17]:
# one hot encoding
onehot = pd.get_dummies(all_nearby[['Venue Category']], prefix = "", prefix_sep = "")

# add neighborhood column back to dataframe
onehot['Neighborhood'] = all_nearby['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [onehot.columns[-1]] + list(onehot.columns[:-1])
onehot = onehot[fixed_columns]

grouped = onehot.groupby('Neighborhood').mean().reset_index()
grouped.head()

Unnamed: 0,Neighborhood,ATM,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,...,Volleyball Court,Warehouse,Warehouse Store,Waterfront,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,ALBANY PARK,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0
1,ARCHER HEIGHTS,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0
2,ARMOUR SQUARE,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0
3,ASHBURN,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,AUBURN GRESHAM,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0


In [36]:
grouped.to_csv('cleaned_data/foursquare.csv',index=False)

## Rewrite data to show top venues in each neighborhood

In [18]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending = False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [19]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        # append 'st', 'nd', 'rd' to the top 3 venues
        columns.append('{}{} Most Common Venue'.format(ind + 1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind + 1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns = columns)
neighborhoods_venues_sorted['Neighborhood'] = grouped['Neighborhood']

for ind in np.arange(grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,ALBANY PARK,Park,Coffee Shop,Bar,Convenience Store,Pizza Place,Middle Eastern Restaurant,Grocery Store,Asian Restaurant,Diner,Supermarket
1,ARCHER HEIGHTS,Mexican Restaurant,Donut Shop,Taco Place,Fast Food Restaurant,Grocery Store,Pharmacy,Pizza Place,Rental Car Location,Bar,Park
2,ARMOUR SQUARE,Chinese Restaurant,Bar,Pizza Place,Mexican Restaurant,Park,Grocery Store,Coffee Shop,Fast Food Restaurant,Bank,Asian Restaurant
3,ASHBURN,Discount Store,Grocery Store,Pharmacy,Fast Food Restaurant,Park,Pizza Place,Bank,Department Store,Supermarket,Cosmetics Shop
4,AUBURN GRESHAM,Discount Store,Fast Food Restaurant,Grocery Store,Park,Sandwich Place,Seafood Restaurant,Pharmacy,Bar,Fried Chicken Joint,Lounge
