<h2>1. Create dataset</h2>
<p>I've loaded to Github a set of data of all barrios along with their latitudes and longitudes</p>

In [6]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

df = pd.read_csv("https://raw.githubusercontent.com/diecou/ds-training/master/buenos_aires_coor.csv", error_bad_lines=False)

df.head()

Unnamed: 0,Comuna,Barrio,Latitude,Longitude
0,15,AGRONOMIA,-34.5925,-58.4944
1,5,ALMAGRO,-34.6111,-58.4202
2,3,BALVANERA,-34.6101,-58.4059
3,4,BARRACAS,-34.6411,-58.3774
4,13,BELGRANO,-34.5621,-58.4567


<h2>2. Import remaining libraries

In [7]:
import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


<h2>3. Create a map of Buenos Aires with its 'Barrios'</h2>

In [8]:
address = 'Buenos Aires'

geolocator = Nominatim(user_agent="ba_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Buenos Aires are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Buenos Aires are -34.6075616, -58.437076.


<h3>Draw the map

In [9]:
# create map of New York using latitude and longitude values
map_ba = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, comuna, barrio in zip(df['Latitude'], df['Longitude'], df['Comuna'], df['Barrio']):
    label = '{}, Comuna {}'.format(barrio, comuna)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_ba)  
    
map_ba

<h2>4. Analyze Barrios in Buenos Aires</h2>

<h3>Define Foursquare Credentials and Version

In [10]:
CLIENT_ID = 'FTXL5TNGA2NSEP5ANEOE0RXKXXVC31N0ABYXTQ525E0P5Q2N' # your Foursquare ID
CLIENT_SECRET = '1F5YPJTE2MJ3YSCMBD3V5KMZR2E23AYF5NK4SWEBOI1PCROG' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: FTXL5TNGA2NSEP5ANEOE0RXKXXVC31N0ABYXTQ525E0P5Q2N
CLIENT_SECRET:1F5YPJTE2MJ3YSCMBD3V5KMZR2E23AYF5NK4SWEBOI1PCROG



<h3>Create a function to explore all the Barrios in Buenos Aires to look for the venues of our interest

In [11]:
# function to repeat the exploring process to all the neighborhoods in Buenos Aires
import urllib
def getNearbyVenues(names, latitudes, longitudes, radius=5000, categoryIds=''):
    try:
        venues_list=[]
        for name, lat, lng in zip(names, latitudes, longitudes):
            #print(name)

            # create the API request URL
            link = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, VERSION, lat, lng, radius, LIMIT)

            if (categoryIds != ''):
                link = link + '&categoryId={}'
                link = link.format(categoryIds)

            # make the GET request
            respons = requests.get(link).json()
            results = respons["response"]['venues']

            # return only relevant information for each nearby venue
            for v in results:
                success = False
                try:
                    category = v['categories'][0]['name']
                    success = True
                except:
                    pass

                if success:
                    venues_list.append([(
                        name, 
                        lat, 
                        lng, 
                        v['name'], 
                        v['location']['lat'], 
                        v['location']['lng'],
                        v['categories'][0]['name']
                    )])

        nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
        nearby_venues.columns = ['Localidad', 
                  'Localidad Latitude', 
                  'Localidad Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    except:
        print(link)
        print(results)
        print(nearby_venues)

    return(nearby_venues)

<h3>4a. Run the above function to find the location of existing Cinemas</h3>

In [12]:
LIMIT = 1000

# Use category id 4bf58dd8d48988d16c941735 to only get the burger joints
ba_cinemas = getNearbyVenues(names=df['Barrio'], latitudes=df['Latitude'], longitudes=df['Longitude'], radius=1000, categoryIds='4bf58dd8d48988d17f941735')
ba_cinemas.head()

Unnamed: 0,Localidad,Localidad Latitude,Localidad Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,AGRONOMIA,-34.5925,-58.4944,Cineclub La Pampa,-34.584548,-58.487944,Indie Movie Theater
1,ALMAGRO,-34.6111,-58.4202,Cinemark Caballito,-34.616343,-58.429011,Multiplex
2,BALVANERA,-34.6101,-58.4059,Gaumont,-34.605704,-58.40242,Multiplex
3,BALVANERA,-34.6101,-58.4059,Teatro IFT,-34.603752,-58.406408,Indie Theater
4,BALVANERA,-34.6101,-58.4059,UIP - United International Pictures Argentina,-34.602885,-58.395653,Movie Theater


<h3>4b. Run the same function to find the location of Restaurants</h3>

In [13]:
LIMIT = 1000

# Use category id 4bf58dd8d48988d16c941735 to only get the burger joints
ba_restaurants = getNearbyVenues(names=df['Barrio'], latitudes=df['Latitude'], longitudes=df['Longitude'], radius=1000, categoryIds='4d4b7105d754a06374d81259')
ba_restaurants.head()

Unnamed: 0,Localidad,Localidad Latitude,Localidad Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,AGRONOMIA,-34.5925,-58.4944,La Floreada,-34.594121,-58.492288,Restaurant
1,AGRONOMIA,-34.5925,-58.4944,Bonafide,-34.590722,-58.498184,Coffee Shop
2,AGRONOMIA,-34.5925,-58.4944,Al Piatto,-34.590089,-58.497244,Pizza Place
3,AGRONOMIA,-34.5925,-58.4944,Viejo Almacén de Quesos y Fiambres,-34.592346,-58.50059,Deli / Bodega
4,AGRONOMIA,-34.5925,-58.4944,Bellagamba Bodegón,-34.596853,-58.496777,Argentinian Restaurant


<h3>4c. Run the same function to find the location of Theaters</h3>

In [14]:
# Use category id 4bf58dd8d48988d16c941735 to only get the burger joints
ba_theaters = getNearbyVenues(names=df['Barrio'], latitudes=df['Latitude'], longitudes=df['Longitude'], radius=1000, categoryIds='4bf58dd8d48988d137941735')
ba_theaters.head()

Unnamed: 0,Localidad,Localidad Latitude,Localidad Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,AGRONOMIA,-34.5925,-58.4944,Instituto Sudamericano de Ilusionismo,-34.593473,-58.502755,Theater
1,AGRONOMIA,-34.5925,-58.4944,Gargantúa,-34.599755,-58.501886,Theater
2,ALMAGRO,-34.6111,-58.4202,Actors Studio Teatro ™ y Estudio de Carlos Gan...,-34.608654,-58.420578,Theater
3,ALMAGRO,-34.6111,-58.4202,TEATRO DEL PASILLO,-34.611836,-58.419897,Theater
4,ALMAGRO,-34.6111,-58.4202,La Huella,-34.601076,-58.417911,Vegetarian / Vegan Restaurant


<h2>5. Group Data

<h3>Let's define a function to group all the venues found before

In [17]:
def addColumn(initDf, title, groupedDf):
    grouped = groupedDf.groupby('Localidad').count()
    
    for n in initDf['Localidad']:
        try:
            initDf.loc[initDf['Localidad'] == n,title] = grouped.loc[n, 'Venue']
        except:
            initDf.loc[initDf['Localidad'] == n,title] = 0

<h3>Add venue count to each Barrio

In [18]:
df_venues = df.copy()
df_venues.rename(columns={'Barrio':'Localidad'}, inplace=True)

addColumn(df_venues, 'Cinemas', ba_cinemas)
addColumn(df_venues, 'Restaurants', ba_restaurants)
addColumn(df_venues, 'Theaters', ba_theaters)

df_venues.rename(columns={'Localidad':'Barrio'}, inplace=True)

df_venues.head()

Unnamed: 0,Comuna,Barrio,Latitude,Longitude,Cinemas,Restaurants,Theaters
0,15,AGRONOMIA,-34.5925,-58.4944,1.0,50.0,2.0
1,5,ALMAGRO,-34.6111,-58.4202,1.0,50.0,20.0
2,3,BALVANERA,-34.6101,-58.4059,12.0,50.0,27.0
3,4,BARRACAS,-34.6411,-58.3774,0.0,49.0,1.0
4,13,BELGRANO,-34.5621,-58.4567,12.0,50.0,2.0


<h3>Assign weight according to the customer's needs

In [19]:
# the client wants to open the cinema where there isn't one close by to attract the most number of clients. Therefore we assign a high negative value
weight_cinema = -10

# places with several restaurants are desirable to attract people that are already looking to spend time out of home
weight_restaurant = 1

# theaters are complementary to cinemas. They attract a large number of people which makes those places suitable for opening new entertainment businesses
weight_theater = 2

In [21]:
df_weighted = df_venues[['Barrio']].copy()
df_weighted.head()

Unnamed: 0,Barrio
0,AGRONOMIA
1,ALMAGRO
2,BALVANERA
3,BARRACAS
4,BELGRANO


<h3>Calculate the score of each Barrio using the determined weights

In [23]:
df_weighted['Score'] = df_venues['Cinemas'] * weight_cinema + df_venues['Restaurants'] * weight_restaurant + df_venues['Theaters'] * weight_theater
df_weighted = df_weighted.sort_values(by=['Score'], ascending=False)
df_weighted

Unnamed: 0,Barrio,Score
1,ALMAGRO,80.0
5,BOCA,62.0
38,VILLA GRAL. MITRE,53.0
3,BARRACAS,51.0
14,LINIERS,51.0
37,VILLA DEVOTO,50.0
34,VERSALLES,50.0
25,PATERNAL,49.0
11,CONSTITUCION,46.0
31,SAN NICOLAS,46.0


<h2>6. Show to most suitable Barrio along with the venues under analysis</h2>

<h3>Create a function to add the analyzed venues to the map

In [25]:
# function to add markers for given venues to map
def addToMap(df, color, existingMap):
    for lat, lng, local, venue, venueCat in zip(df['Venue Latitude'], df['Venue Longitude'], df['Localidad'], df['Venue'], df['Venue Category']):
        label = '{} ({}) - {}'.format(venue, venueCat, local)
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color=color,
            fill=True,
            fill_color=color,
            fill_opacity=0.7).add_to(existingMap)

<h3>Show Map

In [26]:
map_result = folium.Map(location=[latitude, longitude], zoom_start=15)

winner = df[df['Barrio'] == 'ALMAGRO']

for lat, lng, local in zip(winner['Latitude'], winner['Longitude'], winner['Barrio']):
    label = '{}'.format(local)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='blue',
        fill_opacity=0.7).add_to(map_result) 

addToMap(ba_cinemas[ba_cinemas['Localidad'] == 'ALMAGRO'], 'red', map_result)
addToMap(ba_restaurants[ba_restaurants['Localidad'] == 'ALMAGRO'], 'green', map_result)
addToMap(ba_theaters[ba_theaters['Localidad'] == 'ALMAGRO'], 'gold', map_result)

map_result