In [None]:
import numpy as np 
import pandas as pd 
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import math
import requests
import json 
import folium 
import geocoder 

import io
from PIL import Image
import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans
from pandas.io.json import json_normalize 
from geopy.geocoders import Nominatim 

print('Libraries imported.')

In [None]:
path = "/Users/isalau/Documents/GitHub/Coursera_Capstone/Neighborhood_Labels.csv"
dc = pd.read_csv(path)  

In [None]:
dc = dc.rename({'X': 'Longitude'}, axis='columns')
dc = dc.rename({'Y': 'Latitude'}, axis='columns')
dc = dc.rename({'NAME': 'Neighborhood'}, axis='columns')

In [None]:
def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT=100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):           
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [None]:
path = "/Users/isalau/Documents/GitHub/Coursera_Capstone/dc_venues.csv"
dc_venues_from_csv= pd.read_csv(path)
dc_venues_from_csv.head()
dc_venues = dc_venues_from_csv

In [None]:
#DC Venues
dc_venues.groupby('Neighborhood').count()
# print('There are {} uniques categories.'.format(len(dc_venues['Venue Category'].unique())))

In [None]:
# one hot encoding
dc_onehot = pd.get_dummies(dc_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
dc_onehot['Neighborhood'] = dc_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [dc_onehot.columns[-1]] + list(dc_onehot.columns[:-1])
dc_onehot = dc_onehot[fixed_columns]

dc_grouped = dc_onehot.groupby('Neighborhood').mean().reset_index()

In [None]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [None]:
#get top 20 venues per neighborhood
num_top_venues = 30

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = dc_grouped['Neighborhood']

for ind in np.arange(dc_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(dc_grouped.iloc[ind, :], num_top_venues)

In [None]:
dc_grouped_clustering = dc_grouped.drop('Neighborhood', 1)
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', 0)

# Visualizations 

In [None]:
def makeMap(df, numclusters):
    address = 'Washington,DC'

    geolocator = Nominatim(user_agent="dc_explorer")
    location = geolocator.geocode(address)
    latitude = location.latitude
    longitude = location.longitude

    # create map
    map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

    # set color scheme for the clusters
    x = np.arange(numclusters)
    ys = [i + x + (i*x)**2 for i in range(numclusters)]
    colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
    rainbow = [colors.rgb2hex(i) for i in colors_array]

    # add markers to the map
    markers_colors = []
    for lat, lon, poi, cluster in zip(df['Latitude'], df['Longitude'], df['Neighborhood'], df['Cluster Labels']):
        if (math.isnan(cluster)):
            clus = numclusters
        else: 
            clus = int(cluster)
        label = folium.Popup('Neighborhood: '+ str(poi) + ' Cluster: ' + str(clus), parse_html=True)


        folium.CircleMarker(
            [lat, lon],
            radius=5,
            popup=label,
            color=rainbow[clus-1],
            fill=True,
            fill_color=rainbow[clus-1],
            fill_opacity=0.7).add_to(map_clusters)

    map_clusters
    map_clusters.save('map%s.html' % numclusters)
    return map_clusters 

# KMeans

In [None]:
# 3 clusters ideally representing residential, tourist, industrial
# run from 3 - 10 clusters to see where neighborhoods start to differentiate
for x in range(3,11):
    kclusters = x

    # run k-means clustering
    kmeans = KMeans(n_clusters=kclusters, random_state=200).fit(dc_grouped_clustering)

    # add clustering labels
    neighborhoods_venues_sorted['Cluster Labels']=kmeans.labels_

    dc_merged = dc

    # merge dc_grouped with dc_data to add latitude/longitude for each neighborhood
    dc_merged = dc_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
    
    # show the map 
    makeMap(dc_merged, kclusters)

# Manully Cluster Neighborhoods

In [None]:
def rename_labels(cat):
    #Residential
    Grocery = ['Tea','Bakery','Bagel Shop','Candy Store','Chocolate Shop','Convenience Store','Cupcake Shop','Deli / Bodega','Dessert Shop','Donut Shop','Drugstore','Farmers Market','Food,Food & Drink Shop','Gourmet Shop','Grocery Store','Liquor Store','Market','Organic Grocery','Pharmacy','Smoothie Shop','Snack Place','Supermarket','Supplement Shop','Wine Shop','Beer Store']
    Health = ['Chiropractor','Health & Beauty Service','Hospital','Massage Studio','Nail Salon','Spa','Salon / Barbershop']
    Home = ['Dog Run','Dry Cleaner','Home Service','Laundromat','Locksmith','Moving Target','Other Repair Shop','Pet Service','Shoe Repair','Storage Facility','Tailor Shop','Residential Building (Apartment / Condo)']
    Sports = ['Athletics & Sports','Baseball Field','Basketball Court','Basketball Stadium','Boxing Gym','Cycle Studio','Dance Studio','Field','Golf Course','Gym','Gym / Fitness Center','Gym Pool','Gymnastics Gym','Harbor / Marina','Heliport','Lake','Martial Arts School','Pilates Studio','Playground','Skating Rink','Soccer Field','Soccer Stadium','Sports Club','State / Provincial Park','Volleyball Court','Yoga Studio','Recreation Center','Track','Tennis Court']

    #Residential/Tourism 
    Shopping=['Accessories Store','Antique Shop','Arts & Crafts Store','Automotive Shop','Big Box Store','Bookstore','Boutique','Camera Store','Christmas Market','Clothing Store','Comic Shop','Cosmetics Shop','Department Store','Discount Store','Electronics Store','Flea Market','Flower Shop','Furniture / Home Store','Gift Shop','Hardware Store','Herbs & Spices Store','Jewelry Store','Kids Store','Kitchen Supply Store','Leather Goods Store','Lingerie Store','Mattress Store',"Men/'s Store",'Miscellaneous Shop','Mobile Phone Shop','Music Store','Optical Shop','Paper / Office Supplies Store','Pet Store','Photography Lab','Record Shop','Shipping Store','Shoe Store','Shop & Service','Shopping Mall','Shopping Plaza','Smoke Shop','Souvenir Shop','Sporting Goods Shop','Thrift / Vintage Store','Toy / Game Store','Video Store','Warehouse Store',"Women/'s Store"]
    Bar = ['Bar','Beer Bar','Beer Garden','Brewery','Cocktail Bar','Dive Bar','Gastropub','Gay Bar','Irish Pub','Karaoke Bar','Pub','Speakeasy','Sports Bar','Thai Restaurant','Tiki Bar','Whisky Bar','Wine Bar']
    Restaurant = ['Afghan Restaurant','American Restaurant','Arepa Restaurant','Asian Restaurant','BBQ Joint','Belgian Restaurant','Bistro','Brazilian Restaurant','Breakfast Spot','Burger Joint','Burrito Place','Café','Cafeteria','Cajun / Creole Restaurant','Cantonese Restaurant','Caribbean Restaurant','Cheese Shop','Chinese Restaurant','Coffee Shop','Comfort Food Restaurant','Cuban Restaurant','Diner','Dumpling Restaurant','Eastern European Restaurant','Empanada Restaurant','Ethiopian Restaurant','Falafel Restaurant','Fast Food Restaurant','Filipino Restaurant','Fish & Chips Shop','Food Court','Food Service','Food Truck','French Restaurant','Fried Chicken Joint','Frozen Yogurt Shop','German Restaurant','Gluten-free Restaurant','Greek Restaurant','Hot Dog Joint','Ice Cream Shop','Indian Restaurant','Israeli Restaurant','Italian Restaurant','Japanese Restaurant','Juice Bar','Korean Restaurant','Latin American Restaurant','Mediterranean Restaurant','Mexican Restaurant','Middle Eastern Restaurant','New American Restaurant','Noodle House','Peruvian Restaurant','Pizza Place','Poke Place','Portuguese Restaurant','Puerto Rican Restaurant','Ramen Restaurant','Restaurant','Salad Place','Salvadoran Restaurant','Sandwich Place','Scandinavian Restaurant','Seafood Restaurant','South American Restaurant','Southern / Soul Food Restaurant','Spanish Restaurant','Steakhouse','Street Food Gathering','Sushi Restaurant','Taco Place','Tapas Restaurant','Tea Room','Tex-Mex Restaurant','Turkish Restaurant','Vegetarian / Vegan Restaurant','Vietnamese Restaurant','Wings Joint','Xinjiang Restaurant']
    Transportation = ['Bike Rental / Bike Share','Boat or Ferry','Border Crossing','Bridge','Bus Line','Bus Station','Bus Stop','Gas Station','Metro Station','Rental Car Location','Trail','Train Station','Tunnel']
    Entertainment = ['Bowling','Hookah Bar','Lounge','Nightclub','Nightlife Spot','Roof Deck','Event Space']
    Banking = ['ATM','Bank','Check Cashing Service','Credit Union','Banking']
    Culture = ['Art Gallery','Art Museum','Botanical Garden','Comedy Club','Escape Room','Exhibit','Fountain','Garden','General Entertainment','Historic Site','History Museum','Indie Movie Theater','Memorial Site','Monument / Landmark','Movie Theater','Museum','Music Venue','Opera House','Outdoor Sculpture','Park','Pedestrian Plaza','Performing Arts Venue','Planetarium','Plaza','Public Art','Rock Club','Scenic Lookout','Science Museum','Sculpture Garden','Synagogue','Theater','Winery','Zoo Exhibit']

    #Tourism
    Lodging = ['Bed & Breakfast','Hostel','Hotel','Hotel Bar','Hotel Pool']

    #Industrial
    Infrastructure = ['Intersection','Reservoir','River','Road']
    Business = ['Building','Business Service','Construction & Landscaping','Government Building','Insurance Office','Lawyer','Light Rail Station','Office','Post Office']

    cat = cat.replace(to_replace= Grocery, value="Grocery", regex = True)
    cat = cat.replace(to_replace= Health, value="Health", regex = True)
    cat = cat.replace(to_replace= Home, value="Home", regex = True)
    cat = cat.replace(to_replace= Sports, value="Sports", regex = True)

    cat = cat.replace(to_replace= Shopping, value="Shopping", regex = True)
    cat = cat.replace(to_replace= Restaurant, value="Restaurant", regex = True)
    cat = cat.replace(to_replace= Bar, value="Bar", regex = True)
    cat = cat.replace(to_replace= Transportation, value="Transportation", regex = True)
    cat = cat.replace(to_replace= Entertainment, value="Entertainment", regex = True)
    cat = cat.replace(to_replace= Banking, value="Banking", regex = True)
    cat = cat.replace(to_replace= Culture, value="Culture", regex = True)


    cat = cat.replace(to_replace= Lodging, value="Lodging", regex = True)

    cat = cat.replace(to_replace= Infrastructure, value="Infrastructure", regex = True)
    cat = cat.replace(to_replace= Business, value="Business", regex = True)

    #Residential
    cat = cat.replace(regex=r'.*Food.*', value="Grocery")
    cat = cat.replace(regex=r'.*Grocery.*', value="Grocery")


    cat = cat.replace(regex=r'.*Auto.*', value="Home")
    cat = cat.replace(regex=r'.*School.*', value="Home")
    cat = cat.replace(regex=r'.*Residential.*', value="Home")
    cat = cat.replace(regex=r'.*Child.*', value="Home")
    cat = cat.replace(regex=r'.*Care.*', value="Home")
    cat = cat.replace(regex=r'.*Home.*', value="Home")
    cat = cat.replace(regex=r'.*Neighborhood.*', value="Home")

    cat = cat.replace(regex=r'.*Sports.*', value="Sports")
    cat = cat.replace(regex=r'.*Studio.*', value="Sports")
    cat = cat.replace(regex=r'.*Stables.*', value="Sports")
    cat = cat.replace(regex=r'.*Store.*', value="Sports")

    #Residential/Tourism
    cat = cat.replace(regex=r'.*Bar.*', value="Bar")
    cat = cat.replace(regex=r'.*Restaurant.*', value="Restaurant")

    cat = cat.replace(regex=r'.*Men.*', value="Shopping")
    cat = cat.replace(regex=r'.*Women.*', value="Shopping")

    cat = cat.replace(regex=r'.*Culture.*', value="Culture")
    cat = cat.replace(regex=r'.*Event.*', value="Culture")
    cat = cat.replace(regex=r'.*Concert.*', value="Culture")
    cat = cat.replace(regex=r'.*Art.*', value="Culture")

    cat = cat.replace(regex=r'.*Pool.*', value="Entertainment")
    cat = cat.replace(regex=r'.*Entertainment.*', value="Entertainment")


    #Tourism 
    cat = cat.replace(regex=r'.*Tour.*', value="Tourism")
    cat = cat.replace(regex=r'.*Hotel.*', value="Lodging")

    #Industrial 
    cat = cat.replace(regex=r'.*College.*', value="College")
    cat = cat.replace(regex=r'.*Business.*', value="Business")
    cat = cat.replace(regex=r'.*Bank.*', value="Banking")

    cat = cat.replace(regex=r'Grocery|Health|Home|Sports', value="Residential")
    cat = cat.replace(regex=r'Shopping|Bar|Restaurant|Culture|Banking|Entertainment|Transportation', value="RTBoth")
    labels = cat.replace(regex=r'Lodging|Tourism', value="Tourism")
    cat = cat.replace(regex=r'Business|Infrastructure|College|Factory', value="Industrial")

    return labels

In [None]:
def countLabels(row):
    r = 0 
    i = 0 
    t = 0 
    o = 0 
    #count how many residential, industrial and tousit establishments there are. 
    for place in row:
        if place == "Residential":
            r += 1
        elif place == "RTBoth":
            #if an establishment is labeled as rtboth it gets a point in both residential and tourist. 
            r += 1
            t += 1
        elif place == "Tourism":
            t += 1
        elif place == "Industrial":
            i += 1
        else:
            o += 1
            #ignore the venue
    
    #neighborhood is assigned label based on majority of establishments. 
    #if tie, break arbitrarly ---> priint these 
    l = {r:"r",i:"i",t:"t"} 
    label = l.get(max(l))
     
    newLabel = 0
    
    if label == "r":
        newLabel = 0
    elif label == "i":
        newLabel = 1
    elif label == "t":
        newLabel = 2
    elif label == "o":
        newLabel = 2
    else:
        newLabel = 2
            
    return newLabel

In [None]:
neighborhoods_manual = neighborhoods_venues_sorted.apply(lambda x: rename_labels(x) if x.name != 'Neighborhood' else x)

In [None]:
# for each neighborhood 
    #count how many neighborhoods_manual, industrial and tousit establishments there are. 
for index, row in neighborhoods_manual.iterrows():
    newLabel = countLabels(row)
    print(f'Index: {index}, newLabel: {newLabel}, row: {row.values}\n')
    # cluster neighborhoods based on labels
    neighborhoods_manual['Cluster Labels'] = newLabel

In [None]:
man = dc[['Neighborhood', 'Longitude', 'Latitude']]
mann = neighborhoods_manual[['Cluster Labels','Neighborhood']]
man = man.merge(mann.set_index('Neighborhood'), on='Neighborhood')
makeMap(man, 3)