## Importing necessary libraries

In [101]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

import numpy as np # library to handle data in a vectorized manner

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

#!conda install -c conda-forge geopy --yes
#!conda install -c anaconda beautiful-soup --yes
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes
#=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


## Importing data from Wikipedia

In [102]:
from bs4 import BeautifulSoup
import requests
website_url = requests.get("https://es.wikipedia.org/wiki/Anexo:Localidades_de_Bogot%C3%A1").text
soup = BeautifulSoup(website_url,'lxml')
#print(soup.prettify())


In [106]:
My_table = soup.find('table',{'class':'sortable wikitable'}) #cleaning the data
links = My_table.findAll('a') 

In [107]:
Division = []
for link in links:
    Division.append(link.get('title'))
    
print(Division)

del Division[0:2]

[None, None, 'Usaquén', 'Chapinero', 'Santa Fe (Bogotá)', 'San Cristóbal (Bogotá)', 'Usme', 'Tunjuelito', 'Bosa (Bogotá)', 'Kennedy (Bogotá)', 'Fontibón', 'Engativá', 'Suba', 'Barrios Unidos (Bogotá)', 'Teusaquillo', 'Los Mártires', 'Antonio Nariño (Bogotá)', 'Puente Aranda', 'La Candelaria', 'Rafael Uribe Uribe (Bogotá)', 'Ciudad Bolívar (Bogotá)', 'Sumapaz (Bogotá)']


In [108]:
df = pd.DataFrame() #assigning data to a dataframe 
df['Localidades'] = Division
df.rename({'Localidades':'Division'},axis = 1,inplace = True)
df

Unnamed: 0,Division
0,Usaquén
1,Chapinero
2,Santa Fe (Bogotá)
3,San Cristóbal (Bogotá)
4,Usme
5,Tunjuelito
6,Bosa (Bogotá)
7,Kennedy (Bogotá)
8,Fontibón
9,Engativá


In [172]:
def get_coords_local(localidad, output_as='center'):
    """
    get the bounding box of a locality in WGS84 given its name

    Parameters
    ----------
    localidad : str
        name of the country in english and lowercase
    output_as : 'str
        chose from 'boundingbox' or 'center'. 
         - 'boundingbox' for [latmin, latmax, lonmin, lonmax]
         - 'center' for [latcenter, loncenter]

    Returns
    -------
    output : list
        list with coordinates as str
    """
    # create url
    url = '{0}{1}{2}'.format('http://nominatim.openstreetmap.org/search.php?q=',
                             localidad+', Bogota, Bogota Capital District',
                             '&format=json&polygon=0')
    response = requests.get(url).json()[0]

    # parse response to list
    if output_as == 'boundingbox':
        lst = response[output_as]
        output = [float(i) for i in lst]
    if output_as == 'center':
        lst = [response.get(key) for key in ['lat','lon']]
        output = [float(i) for i in lst]
    return output

In [110]:
df2 = df.copy() #creating a copy of the dataframe to perform operation

latitudeCln = []
longitudeCln = []
for index, row in df2.iterrows():
    print(row[0])
    lat, long = get_coords_local(localidad=row[0], output_as='center')
    latitudeCln.append(lat)
    longitudeCln.append(long)

df2['Latitude'] = latitudeCln
df2['Longitude'] = longitudeCln

df2.shape # 20 divisions in Bogota

Usaquén
Chapinero
Santa Fe (Bogotá)
San Cristóbal (Bogotá)
Usme
Tunjuelito
Bosa (Bogotá)
Kennedy (Bogotá)
Fontibón
Engativá
Suba
Barrios Unidos (Bogotá)
Teusaquillo
Los Mártires
Antonio Nariño (Bogotá)
Puente Aranda
La Candelaria
Rafael Uribe Uribe (Bogotá)
Ciudad Bolívar (Bogotá)
Sumapaz (Bogotá)


(20, 3)

In [111]:
df2

Unnamed: 0,Division,Latitude,Longitude
0,Usaquén,4.694969,-74.031093
1,Chapinero,4.645377,-74.061943
2,Santa Fe (Bogotá),4.602204,-74.078837
3,San Cristóbal (Bogotá),4.549094,-74.047819
4,Usme,4.508155,-74.114328
5,Tunjuelito,4.562204,-74.127647
6,Bosa (Bogotá),4.609913,-74.18471
7,Kennedy (Bogotá),4.631582,-74.151319
8,Fontibón,4.673327,-74.144732
9,Engativá,4.708695,-74.109643


In [112]:
address = 'Bogotá, Colombia' #assigning coordinates to Bogota

geolocator = Nominatim(user_agent="capstoneProject")
location = geolocator.geocode(address, timeout=60, exactly_one=True)
latitude = location.latitude
longitude = location.longitude
print('The decimal coordinates of Bogotá are {}, {}.'.format(latitude, longitude))

The decimal coordinates of Bogotá are 4.59808, -74.0760439.


In [113]:
# make a copy of the dataframe to get it simply back if needed
df3 = df2.copy()
df3

Unnamed: 0,Division,Latitude,Longitude
0,Usaquén,4.694969,-74.031093
1,Chapinero,4.645377,-74.061943
2,Santa Fe (Bogotá),4.602204,-74.078837
3,San Cristóbal (Bogotá),4.549094,-74.047819
4,Usme,4.508155,-74.114328
5,Tunjuelito,4.562204,-74.127647
6,Bosa (Bogotá),4.609913,-74.18471
7,Kennedy (Bogotá),4.631582,-74.151319
8,Fontibón,4.673327,-74.144732
9,Engativá,4.708695,-74.109643


In [192]:
# create map of Bogotá using latitude and longitude values
map_bogota = folium.Map(location=[latitude, longitude], zoom_start=15)

# add markers to map
for lat, lng, local in zip(df3['Latitude'], df3['Longitude'], df3['Division']):
    label = '{}'.format(local)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_bogota)  
    
map_bogota

## Now will use Foursquare to search for Mobile shops in Bogota

In [115]:
limit = 500 # limit of number of venues returned by Foursquare API
radius = 5000 # define radius
CLIENT_ID = 'WPHCLUGHVTAIAF4R3PYAV0YETQWKAZFU5X3G0OYZPUEQWBAZ'
CLIENT_SECRET = 'CFAI1O2PYN1QDLNFQPSZN5S25JFFI22GG5NW4WIC1SEUZKVP'
VERSION = '20181020'
# function to repeat the exploring process to all the neighborhoods in Toronto
import urllib
def getNearbyVenues(names, latitudes, longitudes, radius=5000, categoryIds=''):
    try:
        venues_list=[]
        for name, lat, lng in zip(names, latitudes, longitudes):
            #print(name)

            # create the API request URL
            url = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, VERSION, lat, lng, radius, limit)

            if (categoryIds != ''):
                url = url + '&categoryId={}'
                url = url.format(categoryIds)

            # make the GET request
            response = requests.get(url).json()
            results = response["response"]['venues']

            # return only relevant information for each nearby venue
            for v in results:
                success = False
                try:
                    category = v['categories'][0]['name']
                    success = True
                except:
                    pass

                if success:
                    venues_list.append([(
                        name, 
                        lat, 
                        lng, 
                        v['name'], 
                        v['location']['lat'], 
                        v['location']['lng'],
                        v['categories'][0]['name']
                    )])

        nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
        nearby_venues.columns = ['Division', 
                  'Latitude', 
                  'Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    except:
        print(url)
        print(response)
        print(results)
        print(nearby_venues)

    return(nearby_venues)

In [116]:
# Use category id 4f04afc02fb6e1c99f3db0bc to only get the mobile shops
bogota_mobile_shop = getNearbyVenues(names=df3['Division'], latitudes=df3['Latitude'], longitudes=df3['Longitude'], radius=5000, categoryIds='4f04afc02fb6e1c99f3db0bc')
bogota_mobile_shop.head()

Unnamed: 0,Division,Latitude,Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Usaquén,4.694969,-74.031093,Claro,4.692292,-74.031972,Mobile Phone Shop
1,Usaquén,4.694969,-74.031093,Claro Unicentro,4.701076,-74.04161,Mobile Phone Shop
2,Usaquén,4.694969,-74.031093,Centro de Experiencia Movistar Calle 103,4.688108,-74.047348,Mobile Phone Shop
3,Usaquén,4.694969,-74.031093,Claro,4.678395,-74.046498,Mobile Phone Shop
4,Usaquén,4.694969,-74.031093,Centro de Experiencia Movistar,4.719739,-74.068207,Mobile Phone Shop


In [117]:
bogota_mobile_shop.shape

(671, 7)

## Plotting the Data using Folium

In [119]:
# function to add markers for given venues to map
def addToMap(df, color, existingMap):
    for lat, lng, local, venue, venueCat in zip(df['Venue Latitude'], df['Venue Longitude'], df['Division'], df['Venue'], df['Venue Category']):
        label = '{} ({}) - {}'.format(venue, venueCat, local)
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color=color,
            fill=True,
            fill_color=color,
            fill_opacity=0.7).add_to(existingMap)

In [194]:
map_bogota_mobile = folium.Map(location=[latitude, longitude], zoom_start=12)
addToMap(bogota_mobile_shop, 'red', map_bogota_mobile)
map_bogota_mobile

## After plotting the mobile shops we will now chose categories from Foursquare where good number of Mobile shops are located within vicinity

### In this project I have identified 5 categories 
### Namely :
### 1. High Schools
### 2. Universities
### 3. Offices
### 4. Entertainment related 
### 5. Electronics Shops
### 6. Hotels

( Note : These categories are not exhausted but have been identified through trial & error and some common sense (: )

In [121]:
bogota_venues_highschools = getNearbyVenues(names=df3['Division'], latitudes=df3['Latitude'], longitudes=df3['Longitude'], radius=1000, categoryIds='4bf58dd8d48988d13d941735')
bogota_venues_highschools.head()

Unnamed: 0,Division,Latitude,Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Usaquén,4.694969,-74.031093,Gimnasio Los Cerros,4.694464,-74.026556,School
1,Chapinero,4.645377,-74.061943,Colegio Nuestra Señora de Chiquinquira,4.6498,-74.065643,High School
2,Chapinero,4.645377,-74.061943,Colegio del Sagrado Corazón de Jesus Bethlemitas,4.649182,-74.058036,High School
3,Chapinero,4.645377,-74.061943,Colegio Distrital Simón Rodriguez,4.653948,-74.060732,High School
4,Chapinero,4.645377,-74.061943,Colegio Ervit,4.642634,-74.066469,High School


In [122]:
bogota_venues_highschools.shape

(40, 7)

In [190]:
map_bogota_highschools = folium.Map(location=[latitude, longitude], zoom_start=15)
addToMap(bogota_venues_highschools, 'green', map_bogota_highschools)
map_bogota_highschools

In [124]:
bogota_venues_uni = getNearbyVenues(names=df3['Division'], latitudes=df3['Latitude'], longitudes=df3['Longitude'], radius=1000, categoryIds='4bf58dd8d48988d1ae941735')
bogota_venues_uni.head()

Unnamed: 0,Division,Latitude,Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Usaquén,4.694969,-74.031093,Tecnologico de Monterry,4.6921,-74.034916,University
1,Usaquén,4.694969,-74.031093,Centrum Católica,4.690426,-74.038657,University
2,Usaquén,4.694969,-74.031093,UTadeo M7a,4.70539,-74.032827,University
3,Chapinero,4.645377,-74.061943,Universidad de La Salle,4.644576,-74.059379,University
4,Chapinero,4.645377,-74.061943,Universidad Santo Tomás - Edificio Sto. Doming...,4.638281,-74.06355,College Academic Building


In [125]:
bogota_venues_uni.shape

(117, 7)

In [189]:
map_bogota_uni = folium.Map(location=[latitude, longitude], zoom_start=15)
addToMap(bogota_venues_uni, 'gold', map_bogota_uni)
map_bogota_uni

In [88]:
bogota_venues_office = getNearbyVenues(names=df3['Division'], latitudes=df3['Latitude'], longitudes=df3['Longitude'], radius=1000, categoryIds='4d4b7105d754a06375d81259')
bogota_venues_office.head()

Unnamed: 0,Division,Latitude,Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Usaquén,4.694969,-74.031093,Bistronomy,4.695679,-74.030267,French Restaurant
1,Usaquén,4.694969,-74.031093,Centro Médico de la Sabana,4.696026,-74.031596,Doctor's Office
2,Usaquén,4.694969,-74.031093,WeWork Usaquén,4.694304,-74.032745,Coworking Space
3,Usaquén,4.694969,-74.031093,Planet IFE,4.690749,-74.038713,Advertising Agency
4,Usaquén,4.694969,-74.031093,Cisco Systems Colombia,4.692935,-74.034419,Office


In [89]:
bogota_venues_office.shape

(798, 7)

In [188]:
map_bogota_office = folium.Map(location=[latitude, longitude], zoom_start=15)
addToMap(bogota_venues_office, 'fuchsia', map_bogota_office)
map_bogota_office

In [138]:
bogota_entertainment = getNearbyVenues(names=df3['Division'], latitudes=df3['Latitude'], longitudes=df3['Longitude'], radius=1000, categoryIds='4d4b7104d754a06370d81259')
bogota_entertainment.head()

Unnamed: 0,Division,Latitude,Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Usaquén,4.694969,-74.031093,Mercado de las Pulgas Usaquen,4.695534,-74.030991,Flea Market
1,Usaquén,4.694969,-74.031093,Tienda de Café,4.695467,-74.030644,Breakfast Spot
2,Usaquén,4.694969,-74.031093,Petipa Escuela de Danza,4.70298,-74.03082,Dance Studio
3,Usaquén,4.694969,-74.031093,Feria Artesanal de Usaquén,4.696245,-74.030463,Street Art
4,Usaquén,4.694969,-74.031093,Monkis Place,4.694061,-74.030076,General Entertainment


In [139]:
bogota_entertainment.shape

(429, 7)

In [187]:
map_bogota_entertainment = folium.Map(location=[latitude, longitude], zoom_start=15)
addToMap(bogota_entertainment, 'darkgreen', map_bogota_entertainment)
map_bogota_entertainment

In [142]:
bogota_electronics = getNearbyVenues(names=df3['Division'], latitudes=df3['Latitude'], longitudes=df3['Longitude'], radius=1000, categoryIds='4bf58dd8d48988d122951735')
bogota_electronics.head()

Unnamed: 0,Division,Latitude,Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Usaquén,4.694969,-74.031093,Mac Center,4.696648,-74.031222,Electronics Store
1,Usaquén,4.694969,-74.031093,Teknopolis,4.692994,-74.032299,Electronics Store
2,Usaquén,4.694969,-74.031093,Mac Center Unicentro,4.701345,-74.038277,Electronics Store
3,Usaquén,4.694969,-74.031093,Centro de Eventos Espaceo's Usaquen,4.700094,-74.037331,Electronics Store
4,Usaquén,4.694969,-74.031093,K-tronix 124,4.700652,-74.028819,Electronics Store


In [143]:
bogota_electronics.shapeChapinero 	

(144, 7)

In [186]:
map_bogota_electronics = folium.Map(location=[latitude, longitude], zoom_start=15)
addToMap(bogota_electronics, 'cadetblue', map_bogota_electronics)
map_bogota_electronics

In [158]:
bogota_hotel = getNearbyVenues(names=df3['Division'], latitudes=df3['Latitude'], longitudes=df3['Longitude'], radius=1000, categoryIds='4bf58dd8d48988d1fa931735')
bogota_hotel.head()

Unnamed: 0,Division,Latitude,Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Usaquén,4.694969,-74.031093,Hotel Casona,4.695335,-74.031067,Bed & Breakfast
1,Usaquén,4.694969,-74.031093,Hampton Inn by Hilton,4.69567,-74.031986,Hotel
2,Usaquén,4.694969,-74.031093,Hotel NH Collection Bogotá Hacienda Royal,4.691981,-74.031946,Hotel
3,Usaquén,4.694969,-74.031093,W Hotel,4.695469,-74.038275,Hotel
4,Usaquén,4.694969,-74.031093,Hotel Black,4.703193,-74.032172,Hotel


In [159]:
bogota_hotel.shape

(237, 7)

In [185]:
map_bogota_hotel = folium.Map(location=[latitude, longitude], zoom_start=15)
addToMap(bogota_hotel, 'orange', map_bogota_hotel)
map_bogota_hotel

## Pheww.. 
## Now lets come to make a dataframe in which all of the categories are aggregated

In [91]:
def addColumn(startDf, columnTitle, dataDf):
    grouped = dataDf.groupby('Division').count()
    
    for n in startDf['Division']:
        try:
            startDf.loc[startDf['Division'] == n,columnTitle] = grouped.loc[n, 'Venue']
        except:
            startDf.loc[startDf['Division'] == n,columnTitle] = 0

In [163]:
df_data = df3.copy()
df_data.rename(columns={'Division':'Division'}, inplace=True)
addColumn(df_data, 'Mobile Shop', bogota_mobile_shop)
addColumn(df_data, 'High Schools', bogota_venues_highschools)
addColumn(df_data, 'Universities', bogota_venues_uni)
addColumn(df_data, 'Offices', bogota_venues_office)
addColumn(df_data, 'Entertainment', bogota_entertainment)
addColumn(df_data, 'Electronics', bogota_electronics)
addColumn(df_data, 'Hotel', bogota_hotel)
df_data

Unnamed: 0,Division,Latitude,Longitude,Mobile Shop,High Schools,Universities,Offices,Entertainment,Electronics,Hotel
0,Usaquén,4.694969,-74.031093,46.0,1.0,3.0,50.0,47.0,8.0,44.0
1,Chapinero,4.645377,-74.061943,46.0,4.0,35.0,50.0,50.0,18.0,50.0
2,Santa Fe (Bogotá),4.602204,-74.078837,48.0,2.0,22.0,50.0,49.0,42.0,50.0
3,San Cristóbal (Bogotá),4.549094,-74.047819,6.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Usme,4.508155,-74.114328,0.0,3.0,0.0,31.0,2.0,0.0,0.0
5,Tunjuelito,4.562204,-74.127647,11.0,2.0,0.0,43.0,2.0,1.0,1.0
6,Bosa (Bogotá),4.609913,-74.18471,14.0,1.0,0.0,39.0,6.0,0.0,0.0
7,Kennedy (Bogotá),4.631582,-74.151319,46.0,1.0,1.0,46.0,7.0,0.0,0.0
8,Fontibón,4.673327,-74.144732,45.0,3.0,2.0,48.0,14.0,2.0,10.0
9,Engativá,4.708695,-74.109643,46.0,2.0,1.0,47.0,27.0,3.0,0.0


## From this dataframe we can easily see which categories have more mobile shops in vicinity
## Now we try to give a score to every Division in Bogota based on these categories which will also determine which one the is best location for David's Mobile Shop

In [168]:
# negative weight, because David wants to open a mobile shop and thus wants to avoid concurrence as much as possible
weight_mobile = -1

# positive weight, because high school students are good customers
# positive weight, because uni students are more better customers
# positive weight because employees are even better customers
# positive weight, because uni students are more better customers and likewise

weight_schools = 1
weight_uni = 1.5
weight_offices = 2
weight_entertainment = 2
weight_electronics = 1.5
weight_hotel = 2

In [165]:
df_weighted = df_data[['Division']].copy()

In [169]:
df_weighted['Score'] = df_data['Mobile Shop'] * weight_mobile + df_data['High Schools'] * weight_schools + df_data['Universities'] * weight_uni + df_data['Offices'] * weight_offices + df_data['Entertainment'] * weight_entertainment +  df_data['Electronics'] * weight_electronics + df_data['Hotel'] * weight_hotel 
df_weighted = df_weighted.sort_values(by=['Score'], ascending=False)
df_weighted

Unnamed: 0,Division,Score
16,La Candelaria,355.5
2,Santa Fe (Bogotá),348.0
1,Chapinero,337.5
0,Usaquén,253.5
12,Teusaquillo,184.0
11,Barrios Unidos (Bogotá),169.0
13,Los Mártires,129.0
9,Engativá,110.0
8,Fontibón,108.0
14,Antonio Nariño (Bogotá),105.5


## La Candelaria Locality is the best option for David in order to open his Mobile Shop followed by Santa Fe and Chapinero
### We end our project by plotting all of these categories on a single map

In [184]:
map_bog_result = folium.Map(location=[latitude, longitude], zoom_start=15)

bog_win = df3[df3['Division'] == 'La Candelaria']

for lat, lng, local in zip(bog_win['Latitude'], bog_win['Longitude'], bog_win['Division']):
    label = '{}'.format(local)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='blue',
        fill_opacity=0.7).add_to(map_bog_result) 

addToMap(bogota_mobile_shop[bogota_mobile_shop['Division'] == 'La Candelaria'], 'red', map_bog_result)
addToMap(bogota_venues_highschools[bogota_venues_highschools['Division'] == 'La Candelaria'], 'green', map_bog_result)
addToMap(bogota_venues_uni[bogota_venues_uni['Division'] == 'La Candelaria'], 'gold', map_bog_result)
addToMap(bogota_venues_office[bogota_venues_office['Division'] == 'La Candelaria'], 'fuchsia', map_bog_result)
addToMap(bogota_entertainment[bogota_entertainment['Division'] == 'La Candelaria'], 'darkgreen', map_bog_result)
addToMap(bogota_electronics[bogota_electronics['Division'] == 'La Candelaria'], 'cadetblue', map_bog_result)
addToMap(bogota_hotel[bogota_hotel['Division'] == 'La Candelaria'], 'orange', map_bog_result)

map_bog_result

# Thanks :)