This is the beginning of pulling the Foursquare API data for Auburn, AL.

In [1]:
LIMIT = 50 # limit of number of venues returned by Foursquare API

radius = 8047 # roughly 5 miles

CLIENT_ID = 'IKQXDVTO5PJATTFAODPFSUOJRAWW2BQCH5HQ0CCWHAPZ0QDO'
CLIENT_SECRET = '22VLB3DCWGRF00SXSLWGSOWYDQ0GZLZIKMQIHC03FCEUXZF5'
VERSION = '20200630' 
neighborhood_latitude = 32.6099
neighborhood_longitude = -85.4808

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=IKQXDVTO5PJATTFAODPFSUOJRAWW2BQCH5HQ0CCWHAPZ0QDO&client_secret=22VLB3DCWGRF00SXSLWGSOWYDQ0GZLZIKMQIHC03FCEUXZF5&v=20200630&ll=32.6099,-85.4808&radius=8047&limit=50'

In [4]:
pip install folium

Collecting folium
  Downloading folium-0.11.0-py2.py3-none-any.whl (93 kB)
[K     |████████████████████████████████| 93 kB 3.2 MB/s  eta 0:00:01
Collecting branca>=0.3.0
  Downloading branca-0.4.1-py3-none-any.whl (24 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.4.1 folium-0.11.0
Note: you may need to restart the kernel to use updated packages.


In [5]:
import requests
import pandas as pd
import json
from sklearn.cluster import KMeans
import folium

In [6]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5fcfb2b36d31c0183b446fa3'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': '$-$$$$', 'key': 'price'},
    {'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Auburn',
  'headerFullLocation': 'Auburn',
  'headerLocationGranularity': 'city',
  'totalResults': 154,
  'suggestedBounds': {'ne': {'lat': 32.682323072423074,
    'lng': -85.39498406493853},
   'sw': {'lat': 32.53747692757693, 'lng': -85.56661593506148}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4f5ceeafe4b0c4b68a250bc7',
       'name': 'The Hound',
       'location': {'address': '124 Tichenor Ave',
        'lat': 32.60787386057047,
        'lng': -85.48083679130289,
        'labeledLatLngs': [{'label': 'display',
        

This function returns the venue categories from the json file.

In [7]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

The next section creates a pandas dataframe from the json object.

In [8]:
from pandas.io.json import json_normalize
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.id','venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()



Unnamed: 0,id,name,categories,lat,lng
0,4f5ceeafe4b0c4b68a250bc7,The Hound,Gastropub,32.607874,-85.480837
1,4b14b3cdf964a520cfa523e3,Pita Pit,Sandwich Place,32.608078,-85.481932
2,4b14b400f964a520d2a523e3,Mellow Mushroom,Pizza Place,32.607369,-85.481391
3,4b3263d6f964a520590a25e3,Toomer's Drugs,Pharmacy,32.60666,-85.481597
4,4b14b36cf964a520c8a523e3,Hamilton's,American Restaurant,32.60642,-85.480094


This next section is used to visualize the locations of the venues in order to determine the k needed for splitting Auburn into "neighborhoods" using K-means clustering. As you can see, there are 3 distinct regions - south Auburn, campus (downtown), and Opelika (northeast Auburn).

In [9]:
map_clusters = folium.Map(location=[32.6099,-85.4808],zoom_start=13)
for lat, lon, name in zip(nearby_venues['lat'],nearby_venues['lng'],nearby_venues['name']):
    folium.CircleMarker(
    [lat,lon],
    radius=5,
    popup = name,
    fill=True).add_to(map_clusters)
map_clusters

This next section creates "neighborhoods" within Auburn based on the coordinates of the venues.

In [10]:
kclusters=3
coords = nearby_venues.drop(['id','name','categories'],1)
kmeans = KMeans(n_clusters=kclusters).fit(coords)

In [11]:
print(kmeans.labels_)

[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 2 1 1 0 1 2 2 0 1
 2 2 2 0 0 0 0 2 2 2 2 0 2]


In [12]:
nearby_venues.insert(0,'Cluster Labels', kmeans.labels_)
nearby_venues.head()

Unnamed: 0,Cluster Labels,id,name,categories,lat,lng
0,1,4f5ceeafe4b0c4b68a250bc7,The Hound,Gastropub,32.607874,-85.480837
1,1,4b14b3cdf964a520cfa523e3,Pita Pit,Sandwich Place,32.608078,-85.481932
2,1,4b14b400f964a520d2a523e3,Mellow Mushroom,Pizza Place,32.607369,-85.481391
3,1,4b3263d6f964a520590a25e3,Toomer's Drugs,Pharmacy,32.60666,-85.481597
4,1,4b14b36cf964a520c8a523e3,Hamilton's,American Restaurant,32.60642,-85.480094


In [13]:
import matplotlib.cm as cm
import matplotlib.colors as colors
import numpy as np
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

map_clusters = folium.Map(location=[32.6099,-85.4808],zoom_start=13)
for lat, lon, name, cluster in zip(nearby_venues['lat'],nearby_venues['lng'],nearby_venues['name'], nearby_venues['Cluster Labels']):
    label = folium.Popup(name+ ' ' + str(cluster))
    folium.CircleMarker(
    [lat,lon],
    radius=5,
    popup = label,
    color = rainbow[cluster-1],
    fill_color = rainbow[cluster-1],
    fill_opacity=0.7,
    fill=True).add_to(map_clusters)
map_clusters

The area with cluster label 0 is South Auburn (red dots), the area with cluster label 1 is campus (purple dots), and the area with cluster label 2 is Opelika (cyan dots).

The next step is to figure out whether South Auburn or Opelika is more similar to campus. To do this we will compare the types of venues in each neighborhood to the types of venues on campus using k-means clustering. First, you must assign dummy variables to the different categories and then group them together to find the relative frequency of each type.

In [14]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [15]:
# one hot encoding
auburn_onehot = pd.get_dummies(nearby_venues[['categories']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
auburn_onehot['Neighborhood'] = nearby_venues['Cluster Labels'] 

fixed_columns = [auburn_onehot.columns[-1]] + list(auburn_onehot.columns[:-1])
auburn_onehot = auburn_onehot[fixed_columns]

auburn_onehot.head()

Unnamed: 0,Neighborhood,American Restaurant,Auto Garage,BBQ Joint,Bagel Shop,Burger Joint,Café,Cocktail Bar,Coffee Shop,College Basketball Court,...,Pharmacy,Pizza Place,Restaurant,Sandwich Place,Seafood Restaurant,Smoothie Shop,Spa,Sporting Goods Shop,Supermarket,Taco Place
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
4,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [16]:
auburn_grouped = auburn_onehot.groupby('Neighborhood').mean().reset_index()
auburn_grouped

Unnamed: 0,Neighborhood,American Restaurant,Auto Garage,BBQ Joint,Bagel Shop,Burger Joint,Café,Cocktail Bar,Coffee Shop,College Basketball Court,...,Pharmacy,Pizza Place,Restaurant,Sandwich Place,Seafood Restaurant,Smoothie Shop,Spa,Sporting Goods Shop,Supermarket,Taco Place
0,0,0.0,0.142857,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.142857,0.142857,0.142857,0.0,0.0
1,1,0.129032,0.0,0.096774,0.032258,0.064516,0.032258,0.032258,0.032258,0.032258,...,0.032258,0.064516,0.0,0.064516,0.032258,0.0,0.0,0.0,0.0,0.032258
2,2,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.083333,0.166667,0.083333,0.083333,0.0,0.0,0.0,0.0,0.083333,0.0


In [17]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = auburn_grouped['Neighborhood']

for ind in np.arange(auburn_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(auburn_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,0,Hotel,Sporting Goods Shop,Spa,Smoothie Shop,Auto Garage,Burger Joint,Park,Coffee Shop,Donut Shop,Deli / Bodega
1,1,American Restaurant,BBQ Joint,Burger Joint,Sandwich Place,Pizza Place,Grocery Store,Bagel Shop,Café,Cocktail Bar,Coffee Shop
2,2,Pizza Place,American Restaurant,Deli / Bodega,Grocery Store,Supermarket,Japanese Restaurant,Donut Shop,Pharmacy,Frozen Yogurt Shop,Restaurant


This next piece clusters the neighborhoods based on the most popular types of venues in each area.

In [18]:
# set number of clusters
kclusters = 2

auburn_grouped_clustering = auburn_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(auburn_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([0, 1, 1], dtype=int32)

In [19]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels 2', kmeans.labels_)
neighborhoods_venues_sorted.head()

Unnamed: 0,Cluster Labels 2,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,0,0,Hotel,Sporting Goods Shop,Spa,Smoothie Shop,Auto Garage,Burger Joint,Park,Coffee Shop,Donut Shop,Deli / Bodega
1,1,1,American Restaurant,BBQ Joint,Burger Joint,Sandwich Place,Pizza Place,Grocery Store,Bagel Shop,Café,Cocktail Bar,Coffee Shop
2,1,2,Pizza Place,American Restaurant,Deli / Bodega,Grocery Store,Supermarket,Japanese Restaurant,Donut Shop,Pharmacy,Frozen Yogurt Shop,Restaurant


Neighborhoods 1 and 2 (campus and Opelika) were grouped together in the clustering, thereby suggesting that Opelika is more similar to campus than South Auburn.