# Collecting Data from Foursquare

### Importing libraries

In [1]:
import requests # library to handle requests
import pandas as pd # library for data analsysis
import json

# Library for saving en reading data from the project
#from project_lib import Project

print('Importing ready!')

Importing ready!


### Retreive the Neighbourhood Data 

#### Create a list with cities we wan't to explore

In [2]:
city_list = ['Amsterdam', 'Berlin', 'Paris', 'Vienna', 'Madrid', 'Rome']

#### Create a list with the Neighbourhood files of those cities

In [3]:
# create an empty files list
files = []

# Add city files to list
for city in city_list:
    file = 'Neighbourhoods_of_' + city + '.csv'
    files.append(file)

files

['Neighbourhoods_of_Amsterdam.csv',
 'Neighbourhoods_of_Berlin.csv',
 'Neighbourhoods_of_Paris.csv',
 'Neighbourhoods_of_Vienna.csv',
 'Neighbourhoods_of_Madrid.csv',
 'Neighbourhoods_of_Rome.csv']

#### Retrieve de geographical data of the Neighbourhoods

In [4]:
column_names = ["City", "Neighbourhood", "latitude", "longitude"]
city_data = pd.DataFrame(columns = column_names)

# Read the CSV data file into a pandas DataFrame
for file in files:
    # Read csv file in a dataframe
    csv_data = pd.read_csv(file)
    # Select the columns
    csv_data = csv_data[['City', 'Neighbourhood', 'latitude', 'longitude']]
    # Merge the data from the different files together in 1 dataframe
    city_data = pd.concat([city_data, csv_data], ignore_index=True)

#### Show some dataframe information

In [5]:
# Shape of the dataframe 
city_data.shape

(561, 4)

In [6]:
# Statistics per city of the neighbourhood data
city_groupby = city_data.groupby('City').agg(['min','max','count','nunique']).reset_index(drop=False)
city_groupby

Unnamed: 0_level_0,City,Neighbourhood,Neighbourhood,Neighbourhood,Neighbourhood,latitude,latitude,latitude,latitude,longitude,longitude,longitude,longitude
Unnamed: 0_level_1,Unnamed: 1_level_1,min,max,count,nunique,min,max,count,nunique,min,max,count,nunique
0,Amsterdam,Amsteldorp,Zuidas,91,91,52.093622,52.419487,91,91,4.749422,5.11969,91,91
1,Berlin,Adlershof,Stadtrandsiedlung Malchow,97,97,52.375665,52.636672,97,97,13.140052,13.701802,97,97
2,Madrid,Abrantes,Zofío,128,128,40.153387,40.557883,128,128,-3.828783,-3.362433,128,128
3,Paris,Amérique,Épinettes,79,79,48.821991,48.894058,79,79,2.266738,2.406736,79,79
4,Rome,Alessandrino,Tuscolano,56,56,41.684213,42.048083,56,56,12.22306,12.903213,56,56
5,Vienna,Albern,Zwischenbrücken,110,107,48.128042,48.301908,110,107,16.222748,16.522979,110,107


## Explore the Neighbourhoods with Foursquare

#### Define Foursquare Credentials and Version

In [7]:
# @hidden_cell
CLIENT_ID = 'XF113NLLL1D3DC0JMKX5NC5PY3K4JS3P4DRJCW0JEPTNTCRQ' # your Foursquare ID
CLIENT_SECRET = 'ZEYTHFTYQUXOWVVRT2VKXWIPLZY5OU0YA5JUSIDTRORDAXMY' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 100 # A default Foursquare API limit value

#### Create a function to repeat the same process to all the neighbourhoods

In [8]:
def getNearbyVenues(cities, names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for city, name, lat, lng in zip(cities, names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            city,
            name, 
            lat, 
            lng, 
            v['venue']['id'], 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['id'],
            v['venue']['categories'][0]['name']) for v in results])
                
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['City',
                  'Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue Id',
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category Id',           
                  'Venue Category Name']
    
    return(nearby_venues)

#### Create a function to get de rating of a venue

In [9]:
def getRatingOfVenue(venue_id):
    
    url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)
    
    result = requests.get(url).json()
    print(result)
    try:
        venue_rating=result['response']['venue']['rating']
    except:
        venue_rating = 0
        #print('No data available for id=',ID)
        
        
    return venue_rating

#### Process all neighbourhoods and save them in a CSV file per city

In [10]:
for city in city_list:
    print(city)
    # Select city from dataframe with geographical coordinates
    select_city = city_data.loc[city_data['City'] == city]
    
    # Call the function to get all neighbourhood venues from the selected city
    city_venues = getNearbyVenues(cities=select_city['City'],
                                  names=select_city['Neighbourhood'],
                                  latitudes=select_city['latitude'],
                                  longitudes=select_city['longitude']
                                 )
    
    # Save the collected venues in a CSV file
    file = 'Venues_of_' + city + '.csv'
    city_venues.to_csv(file, index=False)
    print('The FourSquare venues of {} are saved in {}.'.format(city, file))


Amsterdam
Amsteldorp
Amsterdam Oud-West
Amsterdam Oud-Zuid
Amsterdam Science Park
Apollobuurt
Betondorp
Bijlmermeer
Binnenstad 
Bos en Lommer
Buiksloot
Buikslotermeer
Buitenveldert
Bullewijk
Burgwallen Nieuwe Zijde
Burgwallen Oude Zijde
Cruquiuseiland
Czaar Peterbuurt
Dapperbuurt
De Aker
De Pijp
De Wallen
Diamantbuurt 
Duivelseiland 
Eastern Docklands
Eendracht 
Floradorp
Frederik Hendrikbuurt
Gaasperdam
Geuzenveld
Gouden Reael
Grachtengordel
Haarlemmerbuurt 
Houthaven
IJburg
Indische Buurt
Java-eiland
Jordaan
Kadijken
Kadoelen
Kinkerbuurt
KNSM Island
Landelijk Noord
Landlust
Lastage
Molenwijk 
Museumkwartier 
NDSM
Negen Straatjes
Nieuw Sloten
Nieuwe Pijp
Nieuwendam
Olympisch Kwartier
Omval
Oostelijke Eilanden
Oosterdokseiland
Oosterparkbuurt 
Oostoever
Oostpoort
Oostzanerwerf
Osdorp
Oud Osdorp
Oud-Oost
Oude Pijp
Overhoeks
Overtoomse Veld
Plantage
Ransdorp
Rapenburg 
Rivierenbuurt 
Ruigoord
Schellingwoude
Schinkelbuurt
Slotermeer
Slotervaart 
Spaarndammerbuurt
Staatsliedenbuurt 
Stadio

### Merging all city CSV files into one

In [11]:
# create an empty files list
first = True

for city in city_list:
    file = 'Venues_of_' + city + '.csv'
    # Read csv file
    temp = pd.read_csv(file)
    if first:
        venues = temp.copy()
        first = False
    else:
        # Merge the data from the different files together in 1 dataframe
        venues = pd.concat([venues, temp], ignore_index=True)

file = 'Venues_of_Cities.csv'
venues.to_csv(file, index=False)
print('The FourSquare venues of all cities are saved in {}.'.format(file))

The FourSquare venues of all cities are saved in Venues_of_Cities.csv.


# Collect Foursquare categories

#### Create a function to join the parent category with the child category

In [12]:
def getParentChild(level, parent, child, name):
    lst =[]
    
    lst.append(level)
    lst.append(parent)
    lst.append(child)
    lst.append(name)
    
    return lst

#### Create a function to collect all Foursquare categories in a parent/child dataframe

In [13]:
def getCategories():
    url = 'https://api.foursquare.com/v2/venues/categories?v=20170211&oauth_token=QEJ4AQPTMMNB413HGNZ5YDMJSHTOHZHMLZCAQCCLXIX41OMP&includeSupportedCC=true'

    cat_lst = []
    results = requests.get(url)
    todos = json.loads(results.text)
    
    for cat1 in todos["response"]['categories']:
        cat_lst.append(getParentChild(1, None, cat1['id'], cat1['name']))
 
        for cat2 in cat1['categories']:
            cat_lst.append(getParentChild(2, cat1['id'], cat2['id'], cat2['name']))
            
            for cat3 in cat2['categories']:
                cat_lst.append(getParentChild(3, cat2['id'], cat3['id'], cat3['name']))
                
                for cat4 in cat3['categories']:
                    cat_lst.append(getParentChild(4, cat3['id'], cat4['id'], cat4['name']))
                    
                    for cat5 in cat4['categories']:
                        cat_lst.append(getParentChild(5, cat4['id'], cat5['id'], cat5['name']))
                        
                        for cat6 in cat5['categories']:
                            cat_lst.append(getParentChild(6, cat5['id'], cat6['id'], cat6['name']))
                        
            
    df = pd.DataFrame(cat_lst)
    df.columns = ['level', 'parent_id', 'child_id', 'name']
    
    return df

In [14]:
cat= getCategories()
cat[cat['level'] == 6]

Unnamed: 0,level,parent_id,child_id,name


####  Parent/child level Statistics

In [15]:
cat.groupby('level').agg(['min','max','count','nunique']).reset_index(drop=False)

Unnamed: 0_level_0,level,parent_id,parent_id,parent_id,parent_id,child_id,child_id,child_id,child_id,name,name,name,name
Unnamed: 0_level_1,Unnamed: 1_level_1,min,max,count,nunique,min,max,count,nunique,min,max,count,nunique
0,1,,,0,0,4d4b7104d754a06370d81259,4e67e38e036454776db1fb3a,10,10,Arts & Entertainment,Travel & Transport,10,10
1,2,4d4b7104d754a06370d81259,4e67e38e036454776db1fb3a,470,10,4bf58dd8d48988d100951735,5fac018b99ce226e27fe7573,470,470,ATM,Zoo,470,470
2,3,4bf58dd8d48988d103951735,5744ccdfe4b0c0459246b4c7,382,53,4bf58dd8d48988d100941735,5f2c5de85b4c177b9a6de29c,382,382,Abruzzo Restaurant,Çöp Şiş Place,382,382
3,4,4bf58dd8d48988d111941735,52e81612bcbc57f1066b79f7,94,10,4bf58dd8d48988d101941735,5f2c3f6b5b4c177b9a6dc388,94,94,Acehnese Restaurant,Zhejiang Restaurant,94,94
4,5,4bf58dd8d48988d16b941735,4eb1bfa43b7b52c0e1adc2e8,14,2,52939a643cf9994f4e043a33,5f2c1c31b6d05514c704334c,14,14,Acai House,Tapiocaria,14,14


#### Save the categories in a CSV file for later use

In [16]:
file = 'Categories.csv'
cat.to_csv(file, index=False)
print('The FourSquare categories are saved in {}.'.format(file))

The FourSquare categories are saved in Categories.csv.
