###Data Section

In [1]:
import requests
import lxml.html
import pandas as pd
import numpy as np
import matplotlib.cm as cm
import matplotlib.colors as colors

We will get all Guadalajara, México neighborhoods zip codes

In [2]:
website_url = requests.get('https://codigo-postal.co/en-us/mexico/jalisco/guadalajara/')

In [3]:
doc = lxml.html.fromstring(website_url.content)

In [4]:
tree = lxml.html.fromstring(website_url.text)

In [5]:
tr_elements = doc.xpath('//tr')

In [6]:
[len(T) for T in tr_elements[:12]]

[5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5]

In [7]:
#Create empty list
col=[]
i=0
#For each row, store each first element (header) and an empty list
for t in tr_elements[0]:
    i+=1
    name=t.text_content()
    print '%d:"%s"'%(i,name)
    col.append((name,[]))

1:"ZIP Code"
2:"Asentamiento"
3:"Municipality"
4:"State"
5:"Type"


In [29]:
#Since out first row is the header, data is stored on the second row onwards
for j in range(1,len(tr_elements)):
    #T is our j'th row
    T=tr_elements[j]
    
    #If row is not of size 10, the //tr data is not from our table 
    if len(T)!=5:
        break
    
    #i is the index of our column
    i=0
    
    #Iterate through each element of the row
    for t in T.iterchildren():
        data=t.text_content() 
        #Check if row is empty
        if i>0:
        #Convert any numerical value to integers
            try:
                data=int(data)
            except:
                pass
        #Append the data to the empty list of the i'th column
        col[i][1].append(data)
        #Increment i for the next column
        i+=1

In [13]:
[len(C) for (title,C) in col]

[50, 50, 50, 50, 50]

In [144]:
Dict={title:column for (title,column) in col}
df=pd.DataFrame(Dict)

In [145]:
df.head()

Unnamed: 0,Asentamiento,Municipality,State,Type,ZIP Code
0,Colonia 1 de Mayo,Guadalajara,Jalisco,Urbano,44970
1,Unidad habitacional 18 de Marzo,Guadalajara,Jalisco,Urbano,44960
2,Unidad habitacional 2001,Guadalajara,Jalisco,Urbano,44820
3,Colonia 5 de Mayo,Guadalajara,Jalisco,Urbano,44970
4,Colonia 5 de Mayo 2a Secc,Guadalajara,Jalisco,Urbano,44970


Since all neighborhoods are on the same municipality and same state, we will drop those columns from our dataframe to have the neigborhood name and zip code only

In [146]:
df.drop(['State'],axis=1, inplace=True)

In [147]:
df.drop(['Municipality'],axis=1,inplace=True)

In [148]:
df.drop(['Type'],axis=1, inplace=True)

In [149]:
df.head()

Unnamed: 0,Asentamiento,ZIP Code
0,Colonia 1 de Mayo,44970
1,Unidad habitacional 18 de Marzo,44960
2,Unidad habitacional 2001,44820
3,Colonia 5 de Mayo,44970
4,Colonia 5 de Mayo 2a Secc,44970


In [150]:
df = df[['ZIP Code', 'Asentamiento']]

In [151]:
df.head()

Unnamed: 0,ZIP Code,Asentamiento
0,44970,Colonia 1 de Mayo
1,44960,Unidad habitacional 18 de Marzo
2,44820,Unidad habitacional 2001
3,44970,Colonia 5 de Mayo
4,44970,Colonia 5 de Mayo 2a Secc


We will merge same Zip codes neighborhoods on the same row

In [156]:
df = df.groupby(['ZIP Code'])['Asentamiento'].apply(', '.join).reset_index()

In [159]:
df.head(10)

Unnamed: 0,ZIP Code,Asentamiento
0,44130,"Fraccionamiento Arcos, Colonia Arcos Vallarta,..."
1,44150,"Colonia Barrera, Colonia Barrera"
2,44160,"Colonia Americana, Colonia Americana"
3,44200,"Colonia Artesanos, Colonia Artesanos"
4,44230,"Fraccionamiento Autocinema, Fraccionamiento Au..."
5,44250,"Colonia Balcones de Huentitán, Colonia Balcone..."
6,44260,"Colonia Barrio Mezquitan, Colonia Barrio Mezqu..."
7,44270,"Colonia Alcalde Barranquitas, Colonia Alcalde ..."
8,44300,"Fraccionamiento Batallón de San Patricio, Frac..."
9,44306,"Colonia Bosques de La Cantera, Colonia Bosques..."


In [160]:
df.shape

(38, 2)

In [161]:
df.dtypes

ZIP Code        object
Asentamiento    object
dtype: object

In [162]:
df['ZIP Code'] = df['ZIP Code'].astype(int)

In [113]:
df.at[9 , 'Zip Code'] = 44300

In [136]:
df_guad = pd.read_csv('Guad_zip.csv')

In [137]:
df_guad.head()

Unnamed: 0,ZIP Code,Latitude,Longitude
0,44100,20.6743,-103.3501
1,44110,20.6731,-103.3927
2,44130,20.6733,-103.3804
3,44140,20.6689,-103.3724
4,44150,20.6696,-103.3774


In [138]:
df_guad.dtypes

ZIP Code       int64
Latitude     float64
Longitude    float64
dtype: object

In [163]:
df = df.join(df_guad.set_index('ZIP Code'), on='ZIP Code')

In [248]:
df.head()

Unnamed: 0,ZIP Code,Asentamiento,Latitude,Longitude
0,44130,"Fraccionamiento Arcos, Colonia Arcos Vallarta,...",20.6733,-103.3804
1,44150,"Colonia Barrera, Colonia Barrera",20.6696,-103.3774
2,44160,"Colonia Americana, Colonia Americana",20.6718,-103.3631
3,44200,"Colonia Artesanos, Colonia Artesanos",20.6868,-103.3573
4,44230,"Fraccionamiento Autocinema, Fraccionamiento Au...",20.7118,-103.3371


In [80]:
df.shape

(38, 4)

In [236]:
#df.at[30, 'Asentamiento'] = 'Colonia Atlas, Unidad habitacional Atlas, Fraccionamiento Atlas 2a. Seccion, Fraccionamiento Atlas Poniente'

In [244]:
df.at[37, 'Asentamiento']

'Colonia Balcones Del 4, Colonia Balcones Del 4'

In [179]:
from geopy.geocoders import Nominatim
import folium
from pandas.io.json import json_normalize

In [180]:
address = 'Guadalajara, Jal, Mexico'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Guadalajara, Jal, Mexico are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Guadalajara, Jal, Mexico are 20.6720375, -103.3383962.


In [314]:
# create map of Guadalajara using latitude and longitude values
map_guad = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, neighborhood in zip(df['Latitude'], df['Longitude'], df['Asentamiento']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label.encode, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_guad)  
    
map_guad

Is hard to find a document with all Guadalajara Neighborhoods and it's latitude longitud, so I had to try to find, a collect them one by one and create my own csv file do display the map above. One think to take in count here is that some zip codes are very close and the lat, log for it is the same, that's why we can see not the entire 37 Neighborhoods with it's blue mark.

In [259]:
CLIENT_ID='UNJEKN5EI55WJHUNXE2VHNWF1C0P0VRVO0YLITPFZSFLME0H'
CLIENT_SECRET='S1PS05LY4OARAFSNIALPWSKBZ3FHPPPR4J0LOXCKGVQYFTEJ'
VERSION = 20180605
LIMIT = 100

In [361]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [362]:
Guadalajara_venues = getNearbyVenues(names=df['Asentamiento'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )

In [363]:
print(Guadalajara_venues.shape)
Guadalajara_venues.head()

(557, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Fraccionamiento Arcos, Colonia Arcos Vallarta,...",20.6733,-103.3804,Palreal,20.672972,-103.379974,Coffee Shop
1,"Fraccionamiento Arcos, Colonia Arcos Vallarta,...",20.6733,-103.3804,AirePAZ Chocolatería,20.673444,-103.378929,Dessert Shop
2,"Fraccionamiento Arcos, Colonia Arcos Vallarta,...",20.6733,-103.3804,Little Tokyo,20.673194,-103.378875,Japanese Restaurant
3,"Fraccionamiento Arcos, Colonia Arcos Vallarta,...",20.6733,-103.3804,Casa Tomás,20.673444,-103.378535,Spanish Restaurant
4,"Fraccionamiento Arcos, Colonia Arcos Vallarta,...",20.6733,-103.3804,PuercoEspada,20.672953,-103.378643,Seafood Restaurant


In [364]:
Guadalajara_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Barrio Antigua Penal de Oblatos, Colonia Blanco y Cuellar 1ra., Colonia Blanco y Cuellar 2da., Colonia Blanco y Cuellar 3ra., Barrio Antigua Penal de Oblatos, Colonia Blanco y Cuellar 1ra., Colonia Blanco y Cuellar 2da., Colonia Blanco y Cuellar 3ra.",12,12,12,12,12,12
"Colonia 1 de Mayo, Colonia 5 de Mayo, Colonia 5 de Mayo 2a Secc, Colonia 1 de Mayo, Colonia 5 de Mayo, Colonia 5 de Mayo 2a Secc",4,4,4,4,4,4
"Colonia 8 de Julio, Colonia 8 de Julio",18,18,18,18,18,18
"Colonia Agraria, Colonia Agraria",1,1,1,1,1,1
Colonia Agustin Yanez,1,1,1,1,1,1
"Colonia Alcalde Barranquitas, Colonia Alcalde Barranquitas",28,28,28,28,28,28
"Colonia Aldrete, Colonia Aldrete",1,1,1,1,1,1
"Colonia Americana, Colonia Americana",82,82,82,82,82,82
Colonia Ampliacion Talpita,2,2,2,2,2,2
"Colonia Arboledas Del Sur, Colonia Arboledas Del Sur",5,5,5,5,5,5


In [365]:
print('There are {} uniques categories.'.format(len(Guadalajara_venues['Venue Category'].unique())))

There are 147 uniques categories.


In [368]:
guad_onehot = pd.get_dummies(Guadalajara_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
guad_onehot['Neighborhood'] = Guadalajara_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [guad_onehot.columns[-1]] + list(guad_onehot.columns[:-1])
guad_onehot = guad_onehot[fixed_columns]

guad_onehot.head()

Unnamed: 0,Neighborhood,Adult Boutique,Advertising Agency,American Restaurant,Argentinian Restaurant,Asian Restaurant,Athletics & Sports,Auto Garage,BBQ Joint,Bakery,...,Tailor Shop,Tapas Restaurant,Tea Room,Theater,Tourist Information Center,Vegetarian / Vegan Restaurant,Wine Bar,Wings Joint,Yoga Studio,Yucatecan Restaurant
0,"Fraccionamiento Arcos, Colonia Arcos Vallarta,...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Fraccionamiento Arcos, Colonia Arcos Vallarta,...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Fraccionamiento Arcos, Colonia Arcos Vallarta,...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Fraccionamiento Arcos, Colonia Arcos Vallarta,...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Fraccionamiento Arcos, Colonia Arcos Vallarta,...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [369]:
guad_grouped = guad_onehot.groupby('Neighborhood').mean().reset_index()
guad_grouped

Unnamed: 0,Neighborhood,Adult Boutique,Advertising Agency,American Restaurant,Argentinian Restaurant,Asian Restaurant,Athletics & Sports,Auto Garage,BBQ Joint,Bakery,...,Tailor Shop,Tapas Restaurant,Tea Room,Theater,Tourist Information Center,Vegetarian / Vegan Restaurant,Wine Bar,Wings Joint,Yoga Studio,Yucatecan Restaurant
0,"Barrio Antigua Penal de Oblatos, Colonia Blanc...",0.0,0.0,0.083333,0.0,0.083333,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Colonia 1 de Mayo, Colonia 5 de Mayo, Colonia ...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Colonia 8 de Julio, Colonia 8 de Julio",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0
3,"Colonia Agraria, Colonia Agraria",0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Colonia Agustin Yanez,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Colonia Alcalde Barranquitas, Colonia Alcalde ...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,...,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0
6,"Colonia Aldrete, Colonia Aldrete",0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Colonia Americana, Colonia Americana",0.0,0.012195,0.012195,0.0,0.0,0.0,0.0,0.0,0.036585,...,0.0,0.012195,0.02439,0.0,0.0,0.02439,0.012195,0.0,0.012195,0.0
8,Colonia Ampliacion Talpita,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"Colonia Arboledas Del Sur, Colonia Arboledas D...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0


In [371]:
for hood in guad_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = guad_grouped[guad_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True))
    print('\n')

----Barrio Antigua Penal de Oblatos, Colonia Blanco y Cuellar 1ra., Colonia Blanco y Cuellar 2da., Colonia Blanco y Cuellar 3ra., Barrio Antigua Penal de Oblatos, Colonia Blanco y Cuellar 1ra., Colonia Blanco y Cuellar 2da., Colonia Blanco y Cuellar 3ra.----
                             venue  freq
0                      Flea Market  0.17
1                              Gym  0.08
2              American Restaurant  0.08
3                 Asian Restaurant  0.08
4        Cajun / Creole Restaurant  0.08
5                      Pizza Place  0.08
6                              Bar  0.08
7          Health & Beauty Service  0.08
8                        Pet Store  0.08
9                           Market  0.08
10                      Taco Place  0.08
11                  Adult Boutique  0.00
12                       Nightclub  0.00
13                 Organic Grocery  0.00
14   Paper / Office Supplies Store  0.00
15                            Park  0.00
16                        Pharmacy  0.00
17 

                             venue  freq
0                             Farm   1.0
1                            Motel   0.0
2                    Movie Theater   0.0
3                       Nail Salon   0.0
4                        Nightclub   0.0
5                  Organic Grocery   0.0
6    Paper / Office Supplies Store   0.0
7                             Park   0.0
8                        Pet Store   0.0
9                   Adult Boutique   0.0
10                        Pharmacy   0.0
11              Photography Studio   0.0
12                        Pie Shop   0.0
13                     Pizza Place   0.0
14                      Playground   0.0
15                           Plaza   0.0
16                            Pool   0.0
17                 Motorcycle Shop   0.0
18             Moroccan Restaurant   0.0
19                           Hotel   0.0
20             Monument / Landmark   0.0
21               Indian Restaurant   0.0
22              Italian Restaurant   0.0
23             J

                             venue  freq
0                          Dog Run   1.0
1                   Adult Boutique   0.0
2                        Pet Store   0.0
3                    Movie Theater   0.0
4                       Nail Salon   0.0
5                        Nightclub   0.0
6                  Organic Grocery   0.0
7    Paper / Office Supplies Store   0.0
8                             Park   0.0
9                         Pharmacy   0.0
10                           Motel   0.0
11              Photography Studio   0.0
12                        Pie Shop   0.0
13                     Pizza Place   0.0
14                      Playground   0.0
15                           Plaza   0.0
16                            Pool   0.0
17                 Motorcycle Shop   0.0
18             Moroccan Restaurant   0.0
19                           Hotel   0.0
20             Monument / Landmark   0.0
21               Indian Restaurant   0.0
22              Italian Restaurant   0.0
23             J

                             venue  freq
0                    Shopping Mall  0.15
1                Indian Restaurant  0.08
2                 Sushi Restaurant  0.08
3                  Bubble Tea Shop  0.08
4               Seafood Restaurant  0.08
5                        Gift Shop  0.08
6               Mexican Restaurant  0.08
7                    Grocery Store  0.08
8                      Coffee Shop  0.08
9                        BBQ Joint  0.08
10                      Laundromat  0.08
11                            Café  0.08
12                        Pharmacy  0.00
13                       Pet Store  0.00
14                            Park  0.00
15                      Nail Salon  0.00
16   Paper / Office Supplies Store  0.00
17              Photography Studio  0.00
18                        Pie Shop  0.00
19                 Organic Grocery  0.00
20                     Pizza Place  0.00
21                       Nightclub  0.00
22                      Playground  0.00
23              

                             venue  freq
0               Mexican Restaurant  0.32
1                       Taco Place  0.14
2                           Bakery  0.07
3                       Restaurant  0.04
4                      Pizza Place  0.04
5                      Flea Market  0.04
6                             Food  0.04
7                       Food Court  0.04
8                         Building  0.04
9                       Steakhouse  0.04
10            Gym / Fitness Center  0.04
11               Electronics Store  0.04
12                    Liquor Store  0.04
13                       BBQ Joint  0.04
14                     Wings Joint  0.04
15                     Auto Garage  0.04
16                          Market  0.00
17   Paper / Office Supplies Store  0.00
18                           Plaza  0.00
19                      Playground  0.00
20               Korean Restaurant  0.00
21                        Pie Shop  0.00
22              Photography Studio  0.00
23              

                             venue  freq
0                   Breakfast Spot  0.17
1                             Park  0.17
2                       Steakhouse  0.17
3                           Lounge  0.17
4                       Taco Place  0.17
5                      Tailor Shop  0.17
6               Photography Studio  0.00
7                         Pharmacy  0.00
8                        Pet Store  0.00
9                         Pie Shop  0.00
10             Moroccan Restaurant  0.00
11                     Pizza Place  0.00
12   Paper / Office Supplies Store  0.00
13                      Playground  0.00
14                 Organic Grocery  0.00
15                           Plaza  0.00
16                       Nightclub  0.00
17                      Nail Salon  0.00
18                   Movie Theater  0.00
19                 Motorcycle Shop  0.00
20                           Motel  0.00
21                  Adult Boutique  0.00
22             Monument / Landmark  0.00
23              

                             venue  freq
0                           Market   0.4
1                       Playground   0.2
2                     Cocktail Bar   0.2
3    Paper / Office Supplies Store   0.2
4                   Adult Boutique   0.0
5                             Park   0.0
6                    Movie Theater   0.0
7                       Nail Salon   0.0
8                        Nightclub   0.0
9                  Organic Grocery   0.0
10                        Pharmacy   0.0
11                       Pet Store   0.0
12                           Motel   0.0
13              Photography Studio   0.0
14                        Pie Shop   0.0
15                     Pizza Place   0.0
16                           Plaza   0.0
17                            Pool   0.0
18                 Motorcycle Shop   0.0
19             Moroccan Restaurant   0.0
20                             Pub   0.0
21                      Laundromat   0.0
22               Indian Restaurant   0.0
23              

In [372]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [374]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = guad_grouped['Neighborhood']

for ind in np.arange(guad_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(guad_grouped.iloc[ind, :], num_top_venues)

In [414]:
neighborhoods_venues_sorted.head(28)

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,0,"Barrio Antigua Penal de Oblatos, Colonia Blanc...",Flea Market,Health & Beauty Service,Taco Place,Pet Store,Cajun / Creole Restaurant
1,0,"Colonia 1 de Mayo, Colonia 5 de Mayo, Colonia ...",Taco Place,Pub,Ice Cream Shop,Pharmacy,Electronics Store
2,0,"Colonia 8 de Julio, Colonia 8 de Julio",Mexican Restaurant,Pharmacy,Taco Place,Burger Joint,Food Truck
3,2,"Colonia Agraria, Colonia Agraria",Athletics & Sports,Yucatecan Restaurant,Event Service,Food,Flower Shop
4,1,Colonia Agustin Yanez,Farm,Yucatecan Restaurant,Cosmetics Shop,Food,Flower Shop
5,0,"Colonia Alcalde Barranquitas, Colonia Alcalde ...",Taco Place,Mexican Restaurant,Food Truck,Chinese Restaurant,Café
6,2,"Colonia Aldrete, Colonia Aldrete",Athletics & Sports,Yucatecan Restaurant,Event Service,Food,Flower Shop
7,0,"Colonia Americana, Colonia Americana",Mexican Restaurant,Café,Coffee Shop,Pizza Place,Breakfast Spot
8,3,Colonia Ampliacion Talpita,Dog Run,Yucatecan Restaurant,Farm,Food,Flower Shop
9,0,"Colonia Arboledas Del Sur, Colonia Arboledas D...",Taco Place,Wings Joint,Seafood Restaurant,Gym,Park


In [433]:
kclusters = 5
guad_grouped_clustering = guad_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(guad_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:20]

array([0, 0, 0, 2, 1, 0, 2, 0, 3, 0, 0, 0, 0, 0, 4, 4, 0, 0, 0, 4],
      dtype=int32)

In [434]:
guad_grouped_clustering.head()

Unnamed: 0,Adult Boutique,Advertising Agency,American Restaurant,Argentinian Restaurant,Asian Restaurant,Athletics & Sports,Auto Garage,BBQ Joint,Bakery,Bar,...,Tailor Shop,Tapas Restaurant,Tea Room,Theater,Tourist Information Center,Vegetarian / Vegan Restaurant,Wine Bar,Wings Joint,Yoga Studio,Yucatecan Restaurant
0,0.0,0.0,0.083333,0.0,0.083333,0.0,0.0,0.0,0.0,0.083333,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [435]:
#neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

guad_merged = df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
guad_merged = guad_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Asentamiento')



# check the last columns!
guad_merged.head()

Unnamed: 0,ZIP Code,Asentamiento,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,44130,"Fraccionamiento Arcos, Colonia Arcos Vallarta,...",20.6733,-103.3804,0.0,Restaurant,Italian Restaurant,Mexican Restaurant,Coffee Shop,Hotel
1,44150,"Colonia Barrera, Colonia Barrera",20.6696,-103.3774,0.0,Mexican Restaurant,Italian Restaurant,Seafood Restaurant,Restaurant,Café
2,44160,"Colonia Americana, Colonia Americana",20.6718,-103.3631,0.0,Mexican Restaurant,Café,Coffee Shop,Pizza Place,Breakfast Spot
3,44200,"Colonia Artesanos, Colonia Artesanos",20.6868,-103.3573,0.0,Mexican Restaurant,Flower Shop,Convenience Store,Boutique,Cosmetics Shop
4,44230,"Fraccionamiento Autocinema, Fraccionamiento Au...",20.7118,-103.3371,0.0,Taco Place,Comfort Food Restaurant,Italian Restaurant,River,Park


In [437]:
guad_merged.fillna(5,inplace=True)

In [438]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(guad_merged['Latitude'], guad_merged['Longitude'], guad_merged['Asentamiento'], guad_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [423]:
rainbow

[u'#8000ff', u'#80ffb4', u'#ff0000']

In [366]:
guad_bakery = pd.get_dummies(Guadalajara_venues[['Venue Category']], prefix="", prefix_sep="")

In [285]:
filter1 = Guadalajara_venues['Venue Category'] == 'Bakery'

In [289]:
filter2 = Guadalajara_venues['Venue Category']=='Coffee Shop'

In [290]:
filter3 =  Guadalajara_venues['Venue Category']=='Dessert Shop'

In [286]:
guad = Guadalajara_venues[filter1]

In [291]:
guad1 = Guadalajara_venues[filter2]

In [292]:
guad2 = Guadalajara_venues[filter3]

In [293]:
dfs = [guad, guad1, guad2]

In [297]:
from functools import reduce

In [319]:
guad_bakeries = pd.concat(dfs)

In [321]:
guad_bakeries.head(22)

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
150,"Colonia Americana, Colonia Americana",20.6718,-103.3631,Neufeld,20.671399,-103.363664,Bakery
160,"Colonia Americana, Colonia Americana",20.6718,-103.3631,La Panadería,20.672248,-103.36275,Bakery
192,"Colonia Americana, Colonia Americana",20.6718,-103.3631,Pastelería Luvier,20.674897,-103.360582,Bakery
261,"Colonia Barrio Mezquitan, Colonia Barrio Mezqu...",20.6972,-103.3558,Panaderia La Fama de Mezquitán,20.698564,-103.353511,Bakery
273,"Colonia Barrio Mezquitan, Colonia Barrio Mezqu...",20.6972,-103.3558,Pastelería Petit,20.696986,-103.360235,Bakery
397,"Colonia Arcos Sur, Colonia Arcos Sur",20.6701,-103.3941,Pastelería Luvier,20.66657,-103.39586,Bakery
0,"Fraccionamiento Arcos, Colonia Arcos Vallarta,...",20.6733,-103.3804,Palreal,20.672972,-103.379974,Coffee Shop
16,"Fraccionamiento Arcos, Colonia Arcos Vallarta,...",20.6733,-103.3804,Gatopardo Cafetería,20.674654,-103.380905,Coffee Shop
34,"Fraccionamiento Arcos, Colonia Arcos Vallarta,...",20.6733,-103.3804,Comala Barra de Café,20.671414,-103.379936,Coffee Shop
50,"Fraccionamiento Arcos, Colonia Arcos Vallarta,...",20.6733,-103.3804,Café La Flor de Córdoba López Cotilla,20.673221,-103.381414,Coffee Shop


In [311]:
guad_bakeries.shape

(22, 7)

In [315]:
map_guad = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, neighborhood in zip(guad_bakeries['Venue Latitude'], guad_bakeries['Venue Longitude'], guad_bakeries['Venue Category']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label.encode, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_guad)  
    
map_guad

In [325]:
guad_bakeries.head(10)

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
150,"Colonia Americana, Colonia Americana",20.6718,-103.3631,Neufeld,20.671399,-103.363664,Bakery
160,"Colonia Americana, Colonia Americana",20.6718,-103.3631,La Panadería,20.672248,-103.36275,Bakery
192,"Colonia Americana, Colonia Americana",20.6718,-103.3631,Pastelería Luvier,20.674897,-103.360582,Bakery
261,"Colonia Barrio Mezquitan, Colonia Barrio Mezqu...",20.6972,-103.3558,Panaderia La Fama de Mezquitán,20.698564,-103.353511,Bakery
273,"Colonia Barrio Mezquitan, Colonia Barrio Mezqu...",20.6972,-103.3558,Pastelería Petit,20.696986,-103.360235,Bakery


In [323]:
from sklearn.cluster import KMeans
from geopy.geocoders import Nominatim
import folium
from pandas.io.json import json_normalize

In [329]:
guad_bakeries['Venue Category']=guad_bakeries['Venue Category'].apply(lambda x: 1 if x == 'Bakery' else 2)

In [333]:
guad_bakeries.head(22)

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
150,"Colonia Americana, Colonia Americana",20.6718,-103.3631,Neufeld,20.671399,-103.363664,1
160,"Colonia Americana, Colonia Americana",20.6718,-103.3631,La Panadería,20.672248,-103.36275,1
192,"Colonia Americana, Colonia Americana",20.6718,-103.3631,Pastelería Luvier,20.674897,-103.360582,1
261,"Colonia Barrio Mezquitan, Colonia Barrio Mezqu...",20.6972,-103.3558,Panaderia La Fama de Mezquitán,20.698564,-103.353511,1
273,"Colonia Barrio Mezquitan, Colonia Barrio Mezqu...",20.6972,-103.3558,Pastelería Petit,20.696986,-103.360235,1
397,"Colonia Arcos Sur, Colonia Arcos Sur",20.6701,-103.3941,Pastelería Luvier,20.66657,-103.39586,1
0,"Fraccionamiento Arcos, Colonia Arcos Vallarta,...",20.6733,-103.3804,Palreal,20.672972,-103.379974,2
16,"Fraccionamiento Arcos, Colonia Arcos Vallarta,...",20.6733,-103.3804,Gatopardo Cafetería,20.674654,-103.380905,2
34,"Fraccionamiento Arcos, Colonia Arcos Vallarta,...",20.6733,-103.3804,Comala Barra de Café,20.671414,-103.379936,2
50,"Fraccionamiento Arcos, Colonia Arcos Vallarta,...",20.6733,-103.3804,Café La Flor de Córdoba López Cotilla,20.673221,-103.381414,2


In [347]:
guad_bakery_grouped=guad_bakeries.drop(['Venue'],axis=1)

In [349]:
guad_bakery_grouped.drop(['Neighborhood'],axis=1,inplace=True)

In [350]:
guad_bakery_grouped.head()

Unnamed: 0,Neighborhood Latitude,Neighborhood Longitude,Venue Latitude,Venue Longitude,Venue Category
150,20.6718,-103.3631,20.671399,-103.363664,1
160,20.6718,-103.3631,20.672248,-103.36275,1
192,20.6718,-103.3631,20.674897,-103.360582,1
261,20.6972,-103.3558,20.698564,-103.353511,1
273,20.6972,-103.3558,20.696986,-103.360235,1


In [351]:
kclusters = 5
guad_grouped_clustering = guad_bakery_grouped

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(guad_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:20]

array([3, 3, 3, 1, 1, 3, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 2, 4],
      dtype=int32)

In [352]:
guad_bakeries.insert(0, 'Cluster Labels', kmeans.labels_)

In [357]:
guad_bakeries.head(22)

Unnamed: 0,Cluster Labels,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
150,3,"Colonia Americana, Colonia Americana",20.6718,-103.3631,Neufeld,20.671399,-103.363664,1
160,3,"Colonia Americana, Colonia Americana",20.6718,-103.3631,La Panadería,20.672248,-103.36275,1
192,3,"Colonia Americana, Colonia Americana",20.6718,-103.3631,Pastelería Luvier,20.674897,-103.360582,1
261,1,"Colonia Barrio Mezquitan, Colonia Barrio Mezqu...",20.6972,-103.3558,Panaderia La Fama de Mezquitán,20.698564,-103.353511,1
273,1,"Colonia Barrio Mezquitan, Colonia Barrio Mezqu...",20.6972,-103.3558,Pastelería Petit,20.696986,-103.360235,1
397,3,"Colonia Arcos Sur, Colonia Arcos Sur",20.6701,-103.3941,Pastelería Luvier,20.66657,-103.39586,1
0,2,"Fraccionamiento Arcos, Colonia Arcos Vallarta,...",20.6733,-103.3804,Palreal,20.672972,-103.379974,2
16,2,"Fraccionamiento Arcos, Colonia Arcos Vallarta,...",20.6733,-103.3804,Gatopardo Cafetería,20.674654,-103.380905,2
34,2,"Fraccionamiento Arcos, Colonia Arcos Vallarta,...",20.6733,-103.3804,Comala Barra de Café,20.671414,-103.379936,2
50,2,"Fraccionamiento Arcos, Colonia Arcos Vallarta,...",20.6733,-103.3804,Café La Flor de Córdoba López Cotilla,20.673221,-103.381414,2


In [356]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(guad_bakeries['Neighborhood Latitude'], guad_bakeries['Neighborhood Longitude'], guad_bakeries['Neighborhood'], guad_bakeries['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters