In [1]:
import pandas as pd
import numpy as np

import seaborn as sns

import folium
from folium.features import DivIcon
import requests
from tqdm.auto import tqdm

Rexburg's four corners:
- NW: 43.840199, -111.807453
- NE: 43.840199, -111.757579
- SW: 43.804226, -111.807453
- SE: 43.804226, -111.757579

4km square

In [2]:
def grid_circle_centers(corners: np.array, grid_dims: np.array) -> np.array:
    num_vert_segments = grid_dims[0] * 2
    num_horiz_segments = grid_dims[1] * 2
    vert_segment_len = abs(corners[0][0] - corners[3][0]) / num_vert_segments
    horiz_segment_len = abs(corners[2][1] - corners[3][1]) / num_horiz_segments
    
    centers = []
    
    for v in range(1, num_vert_segments, 2):
        v_coord = v_coord = corners[-1][0] - v * vert_segment_len        
        for h in range(1, num_horiz_segments, 2):
            centers.append((v_coord, corners[1][1] + h * horiz_segment_len))
    
    return np.array(centers)

In [3]:
def grid_square_polygons(corners: np.array, grid_dims: np.array) -> np.array:
    num_vert_segments = grid_dims[0]
    num_horiz_segments = grid_dims[1]
    vert_segment_len = abs(corners[0][0] - corners[3][0]) / grid_dims[0]
    horiz_segment_len = abs(corners[2][1] - corners[3][1]) / grid_dims[1]

    polygons = np.zeros((num_vert_segments, num_horiz_segments, 2, 2))
    
    northing_start = corners[2][0]
    easting_start = corners[2][1]
    
    for v in range(num_vert_segments):
        for h in range(num_horiz_segments):
            # Define NW & SE corner coodinates
            polygons[v, h, 0] = [northing_start - vert_segment_len*v, 
                                 easting_start + horiz_segment_len*h]
            polygons[v, h, 1] = [northing_start - (vert_segment_len * (v + 1)), 
                                 easting_start + (horiz_segment_len * (h + 1))]
    
    return polygons

In [4]:
SE = np.array([43.804226, -111.757579])
SW = np.array([43.804226, -111.807453])
NW = np.array([43.840199, -111.807453])
NE = np.array([43.840199, -111.757579])
CORNERS = np.array([SE, SW, NW, NE])

grid_dimensions = np.array([4, 4])
grid_circles = grid_circle_centers(CORNERS, grid_dimensions)
grid_squares = grid_square_polygons(CORNERS, grid_dimensions)

map_center = (round(np.mean([lat for lat, long in CORNERS]), 6), 
              round(np.mean([long for lat, long in CORNERS]), 6))

In [5]:
map_rexburg = folium.Map(location=map_center, zoom_start=13)
folium.Rectangle([each for each in CORNERS[::2]], color='gray', weight=2).add_to(map_rexburg)

counter = 1
for row in grid_squares:
#     print(f' PASS #{counter}'.center(60, '*'))
#     print(' ROW '.center(60, '*'))
#     print(row)
    for square in row:
#         print(' SQUARE '.center(60, '*'))
#         print(square)
        upper_bound, lower_bound = square
#         print(' BOUNDS '.center(60, '*'))
#         print(f'u: {upper_bound} l: {lower_bound}')            
        folium.Rectangle(
            [upper_bound, lower_bound],
            fill=False,
            weight=0.5,
            color='gray'
            ).add_to(map_rexburg)

        folium.map.Marker(
            [grid_circles[counter-1][0], grid_circles[counter-1][1]],
            icon=DivIcon(icon_size=(10,10),
                         icon_anchor=(10,10),
                         html=f'<div style="text-align:center;font-size: 10pt">{counter}</div>')
        ).add_to(map_rexburg)

        counter += 1
        
for lat, long in grid_circles:
    folium.Circle(
        [lat, long],
        radius=710,
        fill=True,
        fill_opacity=0.2,
        weight=0.3
        ).add_to(map_rexburg)

map_rexburg

In [6]:
map_rexburg = folium.Map(location=map_center, zoom_start=13)
folium.Rectangle([each for each in CORNERS[::2]], color='gray', weight=2).add_to(map_rexburg)

for lat, long in grid_circles:
    folium.Circle(
        [lat, long],
        radius=710,
        fill=True,
        fill_opacity=0.2,
        weight=0.3
        ).add_to(map_rexburg)

map_rexburg

In [7]:
CLIENT_ID = 'VXYAGD1UCDJXEPUG3JUKI24MZMLYQUOPMYMEDB0MBXZQFMU3' # your Foursquare ID
CLIENT_SECRET = '1QR1DGV15DE0QEZM5VVYSDFQ1TZJMSP1G4TYVP2ITYYLMWMF' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100

def get_nearby_venues(grid_section_names, latitudes, longitudes, radius=1000):
    venues_list=[]
    for grid_section_name, lat, lng in zip(grid_section_names, latitudes, longitudes):
        print(grid_section_name, end=' ')
            
        # create the API request URL
        req_params = {
            'client_id': CLIENT_ID,
            'client_secret': CLIENT_SECRET,
            'll': f'{lat},{lng}',
            'v': VERSION,
            'radius': radius,
            'limit': LIMIT
        }
        url = 'https://api.foursquare.com/v2/venues/search'
            
        # make the GET request
        results = requests.get(url, req_params).json(
                    )['response']['venues']

        # return only relevant information for each nearby venue
        for v in results:
            try:
                category = v['categories'][0]['name']
            except IndexError:
                category = 'None'
                
            venues_list.append({
                'grid_section': grid_section_name,
                'grid_section_lat': lat,
                'grid_section_long': lng,
                'venue': v['name'],
                'v_lat': v['location']['lat'], 
                'v_long': v['location']['lng'], 
                'category': category
            })

    nearby_venues = pd.DataFrame(venues_list)
    
    return(nearby_venues)

In [8]:
rexburg_venues = get_nearby_venues(range(1, np.prod(grid_dimensions) + 1), 
                                   grid_circles[:, 0], grid_circles[:, 1], 
                                   radius=710)

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 

In [9]:
rexburg_venues.drop_duplicates(['venue', 'v_lat', 'v_long'], inplace=True)
rexburg_venues.shape

(711, 7)

In [10]:
rexburg_venues['grid_section'].value_counts()

1     100
10     84
9      72
7      72
2      69
11     47
3      47
6      45
13     41
14     32
5      27
4      26
12     25
8      12
16      8
15      4
Name: grid_section, dtype: int64

In [11]:
rexburg_venues['category'].value_counts()

Residential Building (Apartment / Condo)    63
None                                        33
Church                                      33
Automotive Shop                             20
Office                                      19
                                            ..
Costume Shop                                 1
Light Rail Station                           1
Golf Course                                  1
Dry Cleaner                                  1
Animal Shelter                               1
Name: category, Length: 188, dtype: int64

In [50]:
rexburg_venues[rexburg_venues['category'].str.contains('None')].sort_values(by='venue').iloc[:, -4:]

Unnamed: 0,venue,v_lat,v_long,category
1055,Aalita,43.822069,-111.78067,
916,Alterra Pest Control,43.821856,-111.788608,
529,BYU-Idaho Alumni Center,43.824978,-111.783766,
151,CALl,43.833954,-111.777592,
427,Century 21,43.820972,-111.792791,
25,Comfort Inn,43.824699,-111.809267,
932,Days Inn,43.820122,-111.78879,
635,Dr. Gates,43.825727,-111.777256,
724,Greenbrier,43.82252,-111.77937,
33,Izcalli Mexican Food,43.826316,-111.788662,


In [52]:
# [Delete]
idxs = [916, 151, 635, 909, 1005, 807, 998, 918, 818, 817, 113, 31]
rexburg_venues.drop(idxs, inplace=True)

In [53]:
# Automotive Shop
idxs = [788, 332]
rexburg_venues.loc[idxs, 'category'] = 'Automotive Shop'

# Church
idxs = [346]
rexburg_venues.loc[idxs, 'category'] = 'Church'

# College Administrative Building
idxs = [529, 994]
rexburg_venues.loc[idxs, 'category'] = 'College Administrative Building'

# High School
idxs = [312]
rexburg_venues.loc[idxs, 'category'] = 'High School'

# Hotel
idxs = [25, 932]
rexburg_venues.loc[idxs, 'category'] = 'Hotel'

# Lighting Store
idxs = [14]
rexburg_venues.loc[idxs, 'category'] = 'Lighting Store'

# Mexican Restaurant
idxs = [33, 266, 26]
rexburg_venues.loc[idxs, 'category'] = 'Mexican Restaurant'

# Movie Theater
idxs = [560]
rexburg_venues.loc[idxs, 'category'] = 'Movie Theater'

# Park
idxs = [1044]
rexburg_venues.loc[idxs, 'category'] = 'Park'

# Post Office
idxs = [548]
rexburg_venues.loc[idxs, 'category'] = 'Post Office'

# Real Estate Office
idxs = [427]
rexburg_venues.loc[idxs, 'category'] = 'Real Estate Office'

# Residential Building (Apartment / Condo)
idxs = [1055, 724, 32, 1232]
rexburg_venues.loc[idxs, 'category'] = 'Residential Building (Apartment / Condo)'

# Tanning Salon
idxs = [819]
rexburg_venues.loc[idxs, 'category'] = 'Tanning Salon'

In [54]:
rexburg_venues[rexburg_venues['category'].str.contains('None')].sort_values(by='venue').iloc[:, -4:]

Unnamed: 0,venue,v_lat,v_long,category


In [67]:
rexburg_venues['category'].value_counts()[:25]

Residential Building (Apartment / Condo)    66
Church                                      34
Automotive Shop                             22
Office                                      19
Building                                    19
Doctor's Office                             17
College Administrative Building             15
College Residence Hall                      15
Gas Station                                 14
Fast Food Restaurant                        13
Mobile Phone Shop                           11
Hardware Store                              10
Mexican Restaurant                          10
Dentist's Office                             9
College Academic Building                    9
Bank                                         9
Medical Center                               8
Salon / Barbershop                           8
Auto Dealership                              8
College Classroom                            8
Tech Startup                                 7
Rental Car Lo

In [78]:
rexburg_venues[rexburg_venues['category'].str.contains('Resid')]

Unnamed: 0,grid_section,grid_section_lat,grid_section_long,venue,v_lat,v_long,category
11,1,43.835702,-111.801219,Cambridge Court,43.825789,-111.796100,Residential Building (Apartment / Condo)
32,1,43.835702,-111.801219,Twin Pines Manor,43.829955,-111.786234,Residential Building (Apartment / Condo)
102,2,43.835702,-111.788750,Mountain Shores Apartments,43.832708,-111.788956,Residential Building (Apartment / Condo)
176,2,43.835702,-111.788750,Middletown Apartments,43.828618,-111.783350,Residential Building (Apartment / Condo)
376,4,43.835702,-111.763813,Brentwood,43.831909,-111.774349,Residential Building (Apartment / Condo)
448,5,43.826709,-111.801219,Peterson Point,43.819105,-111.809036,Residential Building (Apartment / Condo)
449,5,43.826709,-111.801219,Madison Park Apartments,43.830009,-111.797274,Residential Building (Apartment / Condo)
450,5,43.826709,-111.801219,Parkside,43.821268,-111.808790,Residential Building (Apartment / Condo)
476,5,43.826709,-111.801219,Steiner Ave,43.820244,-111.794189,Residential Building (Apartment / Condo)
478,5,43.826709,-111.801219,Campus Courtyard,43.820128,-111.796271,Residential Building (Apartment / Condo)


In [71]:
req_params = {
    'client_id': CLIENT_ID,
    'client_secret': CLIENT_SECRET,
    'v': VERSION,
    }
url = 'https://api.foursquare.com/v2/venues/categories'

# make the GET request
category_tree = requests.get(url, req_params).json()

In [72]:
def show_category_tree(tree, level=0, parent=None, categories=[]):
    if not tree:
        return categories
    else:
        for category in tree:
            categories.append({
                'category': category['name'],
                'level': level,
                'parent': parent
            })
            show_category_tree(category['categories'], level=level+1, 
                               parent=category['name'], categories=categories)
        return categories

In [73]:
categories = pd.DataFrame(show_category_tree(category_tree['response']['categories']))
categories.shape

(941, 3)

In [74]:
def map_category_group(category, max_depth=0):
    current_depth = categories[categories['category'] == category]['level'].values[0]
    current_group = categories[categories['category'] == category]['category'].values[0]

    while current_depth > max_depth:
        current_depth -= 1
        current_group = categories[categories['category'] == current_group]['parent'].values[0]
    
    return current_group

In [None]:
rexburg_venues['group'] = rexburg_venues['category'].apply(lambda x: map_category_group(x, 1))
rexburg_venues['type'] = rexburg_venues['category'].apply(lambda x: map_category_group(x, 0))
rexburg_venues.head()

In [107]:
rexburg_apartments = rexburg_venues[rexburg_venues['category'].str.contains('Resid')]
rexburg_food = rexburg_venues[rexburg_venues['type'].str.contains('Food')]
rexburg_recreation = rexburg_venues[rexburg_venues['type'].str.contains('Outd')]

In [336]:
map_venues = folium.Map(location=map_center, zoom_start=14)
folium.Polygon([each for each in CORNERS], color='gray', weight=2).add_to(map_venues)

for lat, long, label in zip(rexburg_apartments.v_lat, rexburg_apartments.v_long, rexburg_apartments.venue):
    folium.CircleMarker(
        [lat, long],
        radius=3,
        popup=label,
        fill=True,
        color='blue',
        fill_color='blue',
        fill_opacity=0.5,
        weight=1
        ).add_to(map_venues)

for lat, long, label in zip(rexburg_food.v_lat, rexburg_food.v_long, rexburg_food.venue):
    folium.CircleMarker(
        [lat, long],
        radius=3,
        popup=label,
        fill=True,
        color='red',
        fill_color='red',
        fill_opacity=0.5,
        weight=1
        ).add_to(map_venues)

for lat, long, label in zip(rexburg_recreation.v_lat, rexburg_recreation.v_long, rexburg_recreation.venue):
    folium.CircleMarker(
        [lat, long],
        radius=3,
        popup=label,
        fill=True,
        color='green',
        fill_color='green',
        fill_opacity=0.5,
        weight=1
        ).add_to(map_venues)

counter = 1
for square, center in zip(grid_squares.reshape(-1, 2, 2), grid_circles):
    folium.Rectangle(
        square,
        color='gray',
        fill=False,
        weight=0.5,
        ).add_to(map_venues)

    folium.map.Marker(
        center,
        icon=DivIcon(icon_size=(1,1),
                     icon_anchor=(10,10),
                     html=f'<div style="text-align:center;font-size:12pt;color:gray;'
                          f'font-weight:bold">{counter}</div>')
        ).add_to(map_venues)
    counter += 1
    
map_venues

In [128]:
from sklearn.cluster import KMeans, DBSCAN
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import silhouette_score

import matplotlib.cm as cm
import matplotlib.colors as colors

In [112]:
grid_categories = pd.crosstab(rexburg_venues.grid_section, rexburg_venues.group)

In [114]:
grid_categories_scaled = grid_categories / grid_categories.max().max()
grid_categories_scaled

group,ATM,Airport,American Restaurant,Animal Shelter,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Dealership,Automotive Shop,...,Storage Facility,Strip Club,Student Center,Tanning Salon,Tea Room,Thrift / Vintage Store,Trail,University,Video Game Store,Video Store
grid_section,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.058824,0.058824,0.058824,0.058824,0.0,0.0,0.294118,0.058824,0.117647,...,0.117647,0.0,0.0,0.0,0.0,0.0,0.117647,0.0,0.0,0.058824
2,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.294118,...,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.117647
3,0.058824,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.058824,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.117647,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,...,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.058824
6,0.0,0.0,0.0,0.0,0.117647,0.0,0.058824,0.0,0.0,0.0,...,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.058824,0.0,0.0
7,0.058824,0.0,0.0,0.0,0.117647,0.0,0.058824,0.117647,0.0,0.294118,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.235294,0.235294,0.176471,...,0.117647,0.058824,0.058824,0.058824,0.0,0.058824,0.0,0.0,0.0,0.0
10,0.0,0.0,0.0,0.0,0.0,0.058824,0.117647,0.117647,0.0,0.058824,...,0.0,0.0,0.058824,0.058824,0.0,0.0,0.0,0.117647,0.0,0.0


In [116]:
gs_params = {
    'n_clusters': range(2, 11)
}

gs = GridSearchCV(KMeans(),
                  gs_params,
                  n_jobs=-1,
                  cv=3)

gs.fit(grid_categories_scaled)

GridSearchCV(cv=3, error_score='raise-deprecating',
             estimator=KMeans(algorithm='auto', copy_x=True, init='k-means++',
                              max_iter=300, n_clusters=8, n_init=10,
                              n_jobs=None, precompute_distances='auto',
                              random_state=None, tol=0.0001, verbose=0),
             iid='warn', n_jobs=-1, param_grid={'n_clusters': range(2, 11)},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [117]:
gs.best_estimator_.labels_

array([6, 7, 2, 1, 0, 2, 4, 0, 3, 5, 5, 1, 0, 1, 1, 1], dtype=int32)

In [122]:
kclusters = gs.best_params_['n_clusters']
gs.best_params_

{'n_clusters': 8}

In [120]:
silhouette_score(grid_categories_scaled, gs.best_estimator_.labels_)

0.08714315863731642

In [131]:
grid_categories['cluster'] = gs.best_estimator_.labels_
grid_categories = grid_categories.join(rexburg_venues.groupby('grid_section').first().iloc[:, :2])

In [325]:
# create map
map_clusters = folium.Map(location=map_center, zoom_start=13)

folium.Polygon([each for each in CORNERS], color='gray', weight=2).add_to(map_clusters)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.afmhot(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
for lat, lon, poi, cluster, square, center in zip(grid_categories['grid_section_lat'], 
                                                            grid_categories['grid_section_long'], 
                                                            grid_categories.index, 
                                                            grid_categories['cluster'],
                                                            grid_squares.reshape(-1, 2, 2),
                                                            grid_circles):
    
    label = folium.Popup(f'Grid {poi}' + ' Cluster ' + str(cluster + 1))

    folium.Rectangle(
        square,
        popup=label,
        color='gray',
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.25,
        weight=0.5,
        ).add_to(map_clusters)

    folium.map.Marker(
        center,
        icon=DivIcon(icon_size=(1,1),
                     icon_anchor=(10,10),
                     html=f'<div style="text-align:center;font-size: 10pt">{poi}<br>{label}</div>')
        ).add_to(map_clusters)
       
map_clusters

----