# Foursquare Features


adapted from https://towardsdatascience.com/kickstart-your-first-clustering-project-in-san-francisco-neighborhoods-e258e659440c

In [2]:
import pandas as pd
import geopandas as gpd
import numpy as np
import requests

from matplotlib import pyplot as plt

import json # library to handle JSON files
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

## Load Data

In [10]:
df = pd.read_csv('data/neighborhoods.csv')
df.head()

Unnamed: 0,Airbnb,Unique Identifier,NTA,Longitude,Latitude
0,"Allerton,Pelham Gardens",1,"BX31,BX07",-73.853921,40.865872
1,Arden Heights,2,SI48,-74.189938,40.550339
2,Astoria,3,"QN71,QN70",-73.920299,40.765092
3,Bath Beach,4,BK27,-74.010615,40.603978
4,"Battery Park City,Financial District",5,MN25,-74.009726,40.709304


## Get Data using Foursquare API

In [4]:
# foursquare info
CLIENT_ID = ''
CLIENT_SECRET = ''
VERSION = '20191207' # Foursquare API version
LIMIT = 1000
sortByPopularity = 1

In [5]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&limit={}&sortByPopularity={}'.format(
            CLIENT_ID, CLIENT_SECRET, 
            VERSION, lat, lng, LIMIT, sortByPopularity)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name,
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood','Venue', 'Venue Latitude', 'Venue Longitude', 'Venue Category']
    
    return(nearby_venues)

In [20]:
all_nearby = pd.DataFrame(columns = ['Neighborhood','Venue', 'Venue Latitude', 'Venue Longitude', 'Venue Category'])

In [21]:
## split up because api calls failing
for i in range(95):
    nearby = getNearbyVenues(names=[df['Unique Identifier'][i]],
                                   latitudes=[df['Latitude'][i]],
                                   longitudes=[df['Longitude'][i]]
                                  )
    all_nearby = all_nearby.append(nearby)

In [25]:
for i in range(95,146):
    nearby = getNearbyVenues(names=[df['Unique Identifier'][i]],
                                   latitudes=[df['Latitude'][i]],
                                   longitudes=[df['Longitude'][i]]
                                  )
    all_nearby = all_nearby.append(nearby)

In [40]:
all_nearby.groupby('Neighborhood').count()

Unnamed: 0_level_0,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,100,100,100,100
2,100,100,100,100
3,100,100,100,100
4,100,100,100,100
5,100,100,100,100
6,100,100,100,100
7,100,100,100,100
8,100,100,100,100
9,100,100,100,100
10,100,100,100,100


## Standardize data into proportion in neighborhood and save

In [42]:
# one hot encoding
onehot = pd.get_dummies(all_nearby[['Venue Category']], prefix = "", prefix_sep = "")

# add neighborhood column back to dataframe
onehot['Neighborhood'] = all_nearby['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [onehot.columns[-1]] + list(onehot.columns[:-1])
onehot = onehot[fixed_columns]

grouped = onehot.groupby('Neighborhood').mean().reset_index()
grouped.head()

Unnamed: 0,Neighborhood,Accessories Store,Afghan Restaurant,African Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Waterfront,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,...,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0
3,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,...,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0


In [43]:
grouped.to_csv('cleaned_data/foursquare.csv',index=False)

## Rewrite data to show top venues in each neighborhood

In [183]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending = False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [189]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        # append 'st', 'nd', 'rd' to the top 3 venues
        columns.append('{}{} Most Common Venue'.format(ind + 1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind + 1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns = columns)
neighborhoods_venues_sorted['Neighborhood'] = grouped['Neighborhood']

for ind in np.arange(grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,1,Donut Shop,Pharmacy,Pizza Place,Fast Food Restaurant,Bank,Supermarket,Caribbean Restaurant,Park,Gym / Fitness Center,Coffee Shop
1,2,Pharmacy,Donut Shop,Coffee Shop,Pizza Place,Train Station,Burger Joint,Bank,Ice Cream Shop,Park,Italian Restaurant
2,3,Bar,Bagel Shop,Pub,Coffee Shop,Greek Restaurant,Gym,Grocery Store,Pizza Place,Cocktail Bar,Donut Shop
3,4,Bakery,Pizza Place,Italian Restaurant,Bagel Shop,Sushi Restaurant,Chinese Restaurant,Coffee Shop,Diner,Liquor Store,Bank
4,5,Coffee Shop,Hotel,Gym,Park,Memorial Site,Pizza Place,Department Store,Café,Sandwich Place,Bar
