## Aux 4 Madrid venues -Foursquare API-

In [1]:
# Import libraries
import pandas as pd
import json
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
import requests
import types
from botocore.client import Config
import ibm_boto3

#### Create a DataFrame (df_madrid_geo) including the coordinates for each Madrid district.
#### Cell is hidden as it contains credentials.

In [2]:
# The code was removed by Watson Studio for sharing.

#### Coordinates for Madrid districts

In [3]:
df_madrid_geo

Unnamed: 0,DISTRICT,LATITUDE,LONGITUDE
0,ARGANZUELA,40.400211,-3.69618
1,BARAJAS,40.473659,-3.57777
2,CARABANCHEL,40.375215,-3.744876
3,CENTRO,40.411516,-3.707644
4,CHAMARTIN,40.462059,-3.6766
5,CHAMBERI,40.43404,-3.70379
6,CIUDAD LINEAL,40.445668,-3.654384
7,FUENCARRAL-EL PARDO,40.498402,-3.7314
8,HORTALEZA,40.474441,-3.6411
9,LATINA,40.400211,-3.726519


#### Define Foursquare Credentials and Version
#### Cell is hidden as it contains credentials.

In [4]:
# The code was removed by Watson Studio for sharing.

#### Define a function to obtain the top 100 venues for each district (max value for the API) in a 400 meters radius for the category 'food'

In [5]:
def getNearbyVenues(names, latitudes, longitudes, radius=400, limit=500, categoryId="4d4b7105d754a06374d81259"):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?categoryId={}&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            categoryId,
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            limit)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['DISTRICT', 
                  'LATITUDE', 
                  'LONGITUDE', 
                  'VENUE', 
                  'VENUE LATITUDE', 
                  'VENUE LONGITUDE', 
                  'VENUE CATEGORY',]
    return(nearby_venues)

#### Include the venues information in the DataFrame

In [6]:
df_venues = getNearbyVenues(names=df_madrid_geo['DISTRICT'],
                                   latitudes=df_madrid_geo['LATITUDE'],
                                   longitudes=df_madrid_geo['LONGITUDE']
                                  )

ARGANZUELA
BARAJAS
CARABANCHEL
CENTRO
CHAMARTIN
CHAMBERI
CIUDAD LINEAL
FUENCARRAL-EL PARDO
HORTALEZA
LATINA
MONCLOA-ARAVACA
MORATALAZ
PUENTE DE VALLECAS
RETIRO
SALAMANCA
SAN BLAS-CANILLEJAS
TETUAN
USERA
VICALVARO
VILLA DE VALLECAS
VILLAVERDE


In [7]:
# Check the size of the df
df_venues.shape

(410, 7)

#### Venues information

In [8]:
df_venues.head()

Unnamed: 0,DISTRICT,LATITUDE,LONGITUDE,VENUE,VENUE LATITUDE,VENUE LONGITUDE,VENUE CATEGORY
0,ARGANZUELA,40.400211,-3.69618,La Pequeña Graná,40.399574,-3.69855,Tapas Restaurant
1,ARGANZUELA,40.400211,-3.69618,Havana Blues,40.40205,-3.698488,Cuban Restaurant
2,ARGANZUELA,40.400211,-3.69618,El Quinto Pecado,40.400028,-3.694446,Gastropub
3,ARGANZUELA,40.400211,-3.69618,Tres Cerditos,40.397316,-3.694184,Chinese Restaurant
4,ARGANZUELA,40.400211,-3.69618,Restaurante Buen Gusto,40.401766,-3.698961,Chinese Restaurant


#### Explore venue 'food' categories

In [9]:
df_venues['VENUE CATEGORY'].value_counts()

Spanish Restaurant                 86
Tapas Restaurant                   49
Restaurant                         44
Café                               26
Pizza Place                        17
Bakery                             15
Diner                              12
Mediterranean Restaurant           11
Burger Joint                       10
Sandwich Place                     10
Breakfast Spot                      9
Mexican Restaurant                  9
Italian Restaurant                  9
Chinese Restaurant                  9
Snack Place                         9
Gastropub                           7
Sushi Restaurant                    7
Fast Food Restaurant                7
Bistro                              6
Vegetarian / Vegan Restaurant       6
Asian Restaurant                    6
Japanese Restaurant                 5
Seafood Restaurant                  5
Paella Restaurant                   4
Falafel Restaurant                  2
American Restaurant                 2
Latin Americ

#### Explore the number of venues by district

In [10]:
df_venues['DISTRICT'].value_counts()

CENTRO                 100
CHAMBERI                85
SALAMANCA               34
ARGANZUELA              31
CIUDAD LINEAL           24
TETUAN                  24
BARAJAS                 20
PUENTE DE VALLECAS      18
SAN BLAS-CANILLEJAS     16
HORTALEZA                9
USERA                    8
CHAMARTIN                8
MORATALAZ                7
VILLAVERDE               5
VILLA DE VALLECAS        5
LATINA                   5
RETIRO                   5
CARABANCHEL              4
MONCLOA-ARAVACA          2
Name: DISTRICT, dtype: int64

In [11]:
# Check if there is any missing value
df_venues.isna().sum()

DISTRICT           0
LATITUDE           0
LONGITUDE          0
VENUE              0
VENUE LATITUDE     0
VENUE LONGITUDE    0
VENUE CATEGORY     0
dtype: int64

#### We save the DataFrame into a CSV file (Venues.csv). Cell is hidden as it contains credentials.

In [12]:
# The code was removed by Watson Studio for sharing.

{'file_name': 'Venues.csv',
 'message': 'File saved to project storage.',
 'bucket_name': 'capstoneprojectnotebook-donotdelete-pr-o8lsietovhyq9h',
 'asset_id': 'b0844513-8519-418c-bb04-8acd38bb95aa'}

#### Venues grouped by district

In [13]:
# Create a new df with the no of venues grouped by district
df_venues_summary = df_venues.groupby('DISTRICT').count().reset_index()
df_venues_summary.rename(columns={'LATITUDE':'NO VENUES'}, inplace=True)
df_venues_summary = df_venues_summary.drop(df_venues_summary.columns[[2, 3, 4, 5, 6]], axis=1)
df_venues_summary.sort_values(by=['NO VENUES'], ascending=False, inplace=True)
df_venues_summary.reset_index(inplace=True, drop=True)
df_venues_summary

Unnamed: 0,DISTRICT,NO VENUES
0,CENTRO,100
1,CHAMBERI,85
2,SALAMANCA,34
3,ARGANZUELA,31
4,CIUDAD LINEAL,24
5,TETUAN,24
6,BARAJAS,20
7,PUENTE DE VALLECAS,18
8,SAN BLAS-CANILLEJAS,16
9,HORTALEZA,9


In [14]:
# Check the new size of the df
df_venues_summary.shape

(19, 2)

In [15]:
# Add new rows for districts with 0 venues
new_row1 = {'DISTRICT':'FUENCARRAL-EL PARDO', 'NO VENUES':0}
new_row2 = {'DISTRICT':'VICALVARO', 'NO VENUES':0}
df_venues_summary = df_venues_summary.append(new_row1, ignore_index=True)
df_venues_summary = df_venues_summary.append(new_row2, ignore_index=True)

In [16]:
df_venues_summary

Unnamed: 0,DISTRICT,NO VENUES
0,CENTRO,100
1,CHAMBERI,85
2,SALAMANCA,34
3,ARGANZUELA,31
4,CIUDAD LINEAL,24
5,TETUAN,24
6,BARAJAS,20
7,PUENTE DE VALLECAS,18
8,SAN BLAS-CANILLEJAS,16
9,HORTALEZA,9


#### We save the DataFrame into a CSV file (Venues_Summary.csv). Cell is hidden as it contains credentials.

In [17]:
# The code was removed by Watson Studio for sharing.

{'file_name': 'Venues_Summary.csv',
 'message': 'File saved to project storage.',
 'bucket_name': 'capstoneprojectnotebook-donotdelete-pr-o8lsietovhyq9h',
 'asset_id': '02db3a64-f699-41df-aa04-15a8accf75f3'}