In [1]:
import numpy as np
import pandas as pd
import json
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

In [86]:
# Neighborhoods in Mexico City
cdmx_cols = pd.read_csv('/Users/rogeliomj/Documents/Coursera_Capstone/Colonias/coloniascdmx.csv', sep=';')

In [87]:
neighborhoods = cdmx_cols[cdmx_cols['ALCALDIA'] == 'ALVARO OBREGON']
neighborhoods = neighborhoods[['ALCALDIA','COLONIA','Geo Point']]
neighborhoods.reset_index(drop=True, inplace=True)

In [88]:
geops = neighborhoods['Geo Point'].str.split(pat=',', expand=True)
neighborhoods = pd.concat([neighborhoods,geops], axis=1)
neighborhoods.drop('Geo Point', axis=1, inplace=True)
neighborhoods.rename(columns={'ALCALDIA':'District','COLONIA':'Neighborhood',0:'Latitude',1:'Longitude'}, inplace=True)
neighborhoods['Latitude'] = neighborhoods['Latitude'].astype('float')
neighborhoods['Longitude'] = neighborhoods['Longitude'].astype('float')

In [90]:
neighborhoods.head()

Unnamed: 0,District,Neighborhood,Latitude,Longitude
0,ALVARO OBREGON,2DA JALALPA TEPITO (AMPL),19.375655,-99.233207
1,ALVARO OBREGON,ESTADO DE HIDALGO_,19.364444,-99.241681
2,ALVARO OBREGON,SAN CLEMENTE,19.349309,-99.226218
3,ALVARO OBREGON,VILLA SOLIDARIDAD,19.383402,-99.219292
4,ALVARO OBREGON,TEPOPOTLA,19.362128,-99.251763


In [92]:
neighborhoods.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 249 entries, 0 to 248
Data columns (total 4 columns):
District        249 non-null object
Neighborhood    249 non-null object
Latitude        249 non-null float64
Longitude       249 non-null float64
dtypes: float64(2), object(2)
memory usage: 7.9+ KB


In [101]:
# create map of Mexico City using latitude and longitude values

latitude = 19.340995
longitude = -99.249742

map_cdmx = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, district, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['District'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, district)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=4,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_cdmx)  
    
# map_cdmx

In [102]:
CLIENT_ID = 'X2BOT3ZFC3IHF0QVIQ4ZDRZC2BQBGYXOUI05KCTTS2QVUNDG' # your Foursquare ID
CLIENT_SECRET = '5IQ2I3LIBVSMU5FDBJZYZJWFVKVENIDHUGUBUD20W3H5FMXW' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: X2BOT3ZFC3IHF0QVIQ4ZDRZC2BQBGYXOUI05KCTTS2QVUNDG
CLIENT_SECRET:5IQ2I3LIBVSMU5FDBJZYZJWFVKVENIDHUGUBUD20W3H5FMXW


In [103]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [106]:

LIMIT = 100 # limit of number of venues returned by Foursquare API

radius = 500 # define radius


In [107]:
ao_venues = getNearbyVenues(names=neighborhoods['Neighborhood'],
                                   latitudes=neighborhoods['Latitude'],
                                   longitudes=neighborhoods['Longitude']
                                  )

2DA  JALALPA TEPITO (AMPL)
ESTADO DE HIDALGO_
SAN CLEMENTE
VILLA SOLIDARIDAD
TEPOPOTLA
PASEO DE LAS LOMAS-SAN GABRIEL
PRESIDENTES 2DA (AMPL)
LOS GAMITOS
LOMAS DE SANTO DOMINGO
LOMAS DE CENTENARIO (U HAB)
CANUTILLO(AGUASCALIENTES)
AGUILAS PILARES
JALALPA TEPITO
EL BATAN
BELEN DE LAS FLORES
LOMAS DE AXOMIATLA
LA PERA XOCHINAHUAC (U HAB)
MOLINO DE ROSAS
LOMAS DE LOS CEDROS
19 DE MAYO
SAN PEDRO DE LOS PINOS
SANTA LUCIA (PBLO)
PARAJE EL CABALLITO
ACUEDUCTO
SANTA MARIA NONOALCO
PRADOS LA PROVIDENCIA
SEARS ROEBUCK (U HAB)
LOMAS DE GUADALUPE
LOMAS DE CHAMONTOYA
MINAS DE CRISTO
LOMAS DE CAPULIN
MARIA G DE GARCIA RUIZ
LOMAS DE LOS ANGELES TETELPAN
MOLINO DE ROSAS (AMPL)
BARRIO NORTE
CANUTILLO 3RA SECCION
TETELPAN (PBLO)
TEPEACA
JALALPA EL GRANDE
LA CONCHITA
GALEANA
LA LOMA
LIBERACION PROLETARIA
CALZADA JALALPA
EL ARBOL
CAADA DEL OLIVAR (U HAB)
OLIVAR DE LOS PADRES
MILPA DEL CEDRO
GOLONDRINAS
EMANCIPACION DEL PUEBLO
PALMAS
SAN ANGEL INN
PUENTE COLORADO
TARANGO (U HAB)
1RA VICTORIA SECCION BOSQUES

In [111]:
print(ao_venues.shape)
ao_venues.head()

(4313, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,2DA JALALPA TEPITO (AMPL),19.375655,-99.233207,Total Body fitness center,19.377245,-99.231135,Gym
1,2DA JALALPA TEPITO (AMPL),19.375655,-99.233207,"Tlacoyos ""polo""",19.377724,-99.231144,Fast Food Restaurant
2,2DA JALALPA TEPITO (AMPL),19.375655,-99.233207,Campo De Futbol Jalalpa,19.374624,-99.237177,Soccer Field
3,2DA JALALPA TEPITO (AMPL),19.375655,-99.233207,Carnitas Nacho,19.371821,-99.230726,Taco Place
4,ESTADO DE HIDALGO_,19.364444,-99.241681,Starbucks,19.365002,-99.243963,Coffee Shop


In [113]:
print('There are {} uniques categories.'.format(len(ao_venues['Venue Category'].unique())))

There are 247 uniques categories.


# Analyze each neighboorhood

In [138]:
# one hot encoding
ao_onehot = pd.get_dummies(ao_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
ao_onehot['Neighborhood'] = ao_venues['Neighborhood'] 

In [139]:
ao_onehot.head()

Unnamed: 0,ATM,American Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Arts & Entertainment,Asian Restaurant,Athletics & Sports,Auto Garage,...,Video Store,Warehouse Store,Whisky Bar,Wine Bar,Winery,Wings Joint,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [140]:
ao_grouped = ao_onehot.groupby('Neighborhood').mean().reset_index()
ao_grouped

Unnamed: 0,Neighborhood,ATM,American Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Arts & Entertainment,Asian Restaurant,Athletics & Sports,...,Video Store,Warehouse Store,Whisky Bar,Wine Bar,Winery,Wings Joint,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,19 DE MAYO,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,...,0.0,0.000000,0.000000,0.0,0.0,0.00,0.0,0.000000,0.0,0.0
1,1RA VICTORIA,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,...,0.0,0.000000,0.000000,0.0,0.0,0.00,0.0,0.000000,0.0,0.0
2,1RA VICTORIA SECCION BOSQUES,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,...,0.0,0.000000,0.000000,0.0,0.0,0.00,0.0,0.000000,0.0,0.0
3,26 DE JULIO,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,...,0.0,0.000000,0.000000,0.0,0.0,0.00,0.0,0.000000,0.0,0.0
4,2DA JALALPA TEPITO (AMPL),0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,...,0.0,0.000000,0.000000,0.0,0.0,0.00,0.0,0.000000,0.0,0.0
5,2DA EL PIRUL (AMPL),0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,...,0.0,0.000000,0.000000,0.0,0.0,0.00,0.0,0.000000,0.0,0.0
6,ABRAHAM GONZALEZ,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,...,0.0,0.000000,0.052632,0.0,0.0,0.00,0.0,0.000000,0.0,0.0
7,ACUEDUCTO,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,...,0.0,0.000000,0.000000,0.0,0.0,0.05,0.0,0.000000,0.0,0.0
8,ACUILOTLA,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,...,0.0,0.000000,0.000000,0.0,0.0,0.00,0.0,0.000000,0.0,0.0
9,AGUILAS 3ER PARQUE,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.045455,...,0.0,0.000000,0.000000,0.0,0.0,0.00,0.0,0.000000,0.0,0.0


In [142]:
ao_grouped.shape

(247, 247)

#### Let's print each neighborhood along with the top 5 most common venues

In [143]:
num_top_venues = 5

for hood in ao_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = ao_grouped[ao_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----19 DE MAYO----
                venue  freq
0          Restaurant  0.12
1          Taco Place  0.12
2  Mexican Restaurant  0.12
3         Coffee Shop  0.12
4          Donut Shop  0.12


----1RA VICTORIA----
                venue  freq
0          Taco Place  0.17
1  Mexican Restaurant  0.14
2                Park  0.07
3        Burger Joint  0.03
4   Convenience Store  0.03


----1RA VICTORIA SECCION BOSQUES----
                venue  freq
0          Taco Place  0.25
1  Mexican Restaurant  0.10
2                Park  0.10
3          Restaurant  0.05
4               Diner  0.05


----26 DE JULIO----
               venue  freq
0  Convenience Store   0.2
1        Coffee Shop   0.2
2         Restaurant   0.1
3         Donut Shop   0.1
4         Taco Place   0.1


----2DA  JALALPA TEPITO (AMPL)----
                  venue  freq
0            Taco Place  0.25
1  Fast Food Restaurant  0.25
2                   Gym  0.25
3          Soccer Field  0.25
4                   ATM  0.00


----2DA EL P

4          Playground  0.00


----CAROLA----
              venue  freq
0              Park  0.15
1        Taco Place  0.15
2        Restaurant  0.08
3  Sushi Restaurant  0.08
4     Historic Site  0.08


----CEDRO CHICO----
                venue  freq
0          Taco Place  0.25
1  Mexican Restaurant  0.25
2          Restaurant  0.12
3        Cocktail Bar  0.12
4               Trail  0.12


----CHIMALISTAC----
                venue  freq
0          Restaurant  0.07
1                 Bar  0.05
2  Mexican Restaurant  0.05
3  Spanish Restaurant  0.05
4   French Restaurant  0.05


----COLINAS DE TARANGO----
                venue  freq
0      Breakfast Spot  0.13
1  Mexican Restaurant  0.13
2   Electronics Store  0.07
3        Burger Joint  0.07
4               Diner  0.07


----COLINAS DEL SUR----
                    venue  freq
0             Pizza Place  0.18
1      Mexican Restaurant  0.18
2            Burger Joint  0.12
3                Gym Pool  0.06
4  Argentinian Restaurant  0.06


--

4         Supermarket  0.07


----LA HUERTA----
                venue  freq
0  Mexican Restaurant  0.29
1        Burger Joint  0.12
2          Restaurant  0.12
3         Record Shop  0.06
4      Farmers Market  0.06


----LA JOYA----
                  venue  freq
0           Coffee Shop  0.18
1           Flea Market  0.18
2  Fast Food Restaurant  0.09
3    Mexican Restaurant  0.09
4                  Food  0.09


----LA LOMA----
                   venue  freq
0    Japanese Restaurant  0.25
1      Convenience Store  0.25
2               Pie Shop  0.25
3       Asian Restaurant  0.25
4  Performing Arts Venue  0.00


----LA MARTINICA----
              venue  freq
0           Dog Run  0.25
1      Liquor Store  0.12
2  Stationery Store  0.12
3   Nature Preserve  0.12
4        Street Art  0.12


----LA MEXICANA----
                venue  freq
0            Pharmacy  0.10
1          Taco Place  0.10
2         Pizza Place  0.05
3  Mexican Restaurant  0.05
4          Steakhouse  0.05


----LA MEXI

        venue  freq
0     Dog Run  0.50
1        Food  0.25
2    Mountain  0.25
3         ATM  0.00
4  Playground  0.00


----MIGUEL GAONA ARMENTA----
        venue  freq
0  Taco Place  0.50
1     Dog Run  0.25
2    Tea Room  0.25
3         ATM  0.00
4    Pie Shop  0.00


----MIGUEL HIDALGO----
                  venue  freq
0  Gym / Fitness Center  0.12
1            Taco Place  0.12
2     Convenience Store  0.08
3         Shopping Mall  0.08
4    Mexican Restaurant  0.08


----MILPA DEL CEDRO----
                venue  freq
0          Restaurant  0.25
1          Taco Place  0.25
2        Cocktail Bar  0.12
3  Mexican Restaurant  0.12
4               Trail  0.12


----MINAS DE CRISTO----
                venue  freq
0                Park  0.10
1          Taco Place  0.10
2       Auto Workshop  0.10
3              Garden  0.05
4  Mexican Restaurant  0.05


----MOLINO DE ROSAS----
                venue  freq
0  Mexican Restaurant  0.18
1          Taco Place  0.11
2         Pizza Place  0.1

                venue  freq
0  Mexican Restaurant  0.25
1          Taco Place  0.15
2          Restaurant  0.15
3      Sandwich Place  0.05
4      Ice Cream Shop  0.05


----SANTA LUCIA REACOMODO----
                venue  freq
0  Mexican Restaurant  0.31
1          Restaurant  0.12
2          Taco Place  0.12
3         College Gym  0.06
4                Café  0.06


----SANTA MARIA NONOALCO----
                venue  freq
0  Seafood Restaurant  0.17
1              Market  0.11
2  Mexican Restaurant  0.11
3          Taco Place  0.11
4         Flea Market  0.11


----SANTA ROSA XOCHIAC (PBLO)----
                    venue  freq
0            Burger Joint  0.17
1           Historic Site  0.17
2             Wings Joint  0.17
3                   Diner  0.17
4  Furniture / Home Store  0.17


----SEARS ROEBUCK (U HAB)----
                venue  freq
0          Taco Place  0.22
1  Mexican Restaurant  0.17
2          Restaurant  0.11
3              Market  0.06
4   Convenience Store  0.06


---

#### Let's create a function to sort venues in descending order

In [146]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [150]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = ao_grouped['Neighborhood']

for ind in np.arange(ao_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(ao_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,19 DE MAYO,Restaurant,Taco Place,Mexican Restaurant,Garden,Donut Shop,Convenience Store,Coffee Shop,Ice Cream Shop,Asian Restaurant,Event Space
1,1RA VICTORIA,Taco Place,Mexican Restaurant,Park,Pharmacy,Bar,Farmers Market,Burger Joint,Food,Seafood Restaurant,Lounge
2,1RA VICTORIA SECCION BOSQUES,Taco Place,Park,Mexican Restaurant,Food,Gym Pool,Garden,Burger Joint,Bar,Seafood Restaurant,Sushi Restaurant
3,26 DE JULIO,Coffee Shop,Convenience Store,Restaurant,Mexican Restaurant,Donut Shop,Taco Place,Pharmacy,Department Store,Fast Food Restaurant,Event Service
4,2DA JALALPA TEPITO (AMPL),Fast Food Restaurant,Taco Place,Soccer Field,Gym,Zoo Exhibit,Electronics Store,Flower Shop,Flea Market,Fish Market,Farmers Market


In [152]:
ao_venues.to_csv('/Users/rogeliomj/Documents/Coursera_Capstone/ao_venues/ao_venues.csv', index=False)